diff options
Diffstat (limited to 'compiler/GHC/CmmToAsm/Reg')
22 files changed, 6203 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph.hs b/compiler/GHC/CmmToAsm/Reg/Graph.hs new file mode 100644 index 0000000000..6dfe84cf95 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph.hs @@ -0,0 +1,472 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} + +-- | Graph coloring register allocator. +module GHC.CmmToAsm.Reg.Graph ( + regAlloc +) where +import GhcPrelude + +import qualified GraphColor as Color +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Reg.Graph.Spill +import GHC.CmmToAsm.Reg.Graph.SpillClean +import GHC.CmmToAsm.Reg.Graph.SpillCost +import GHC.CmmToAsm.Reg.Graph.Stats +import GHC.CmmToAsm.Reg.Graph.TrivColorable +import GHC.CmmToAsm.Instr +import GHC.CmmToAsm.Reg.Target +import GHC.Platform.Reg.Class +import GHC.Platform.Reg + +import Bag +import GHC.Driver.Session +import Outputable +import GHC.Platform +import UniqFM +import UniqSet +import UniqSupply +import Util (seqList) +import GHC.CmmToAsm.CFG + +import Data.Maybe +import Control.Monad + + +-- | The maximum number of build\/spill cycles we'll allow. +-- +-- It should only take 3 or 4 cycles for the allocator to converge. +-- If it takes any longer than this it's probably in an infinite loop, +-- so it's better just to bail out and report a bug. +maxSpinCount :: Int +maxSpinCount = 10 + + +-- | The top level of the graph coloring register allocator. +regAlloc + :: (Outputable statics, Outputable instr, Instruction instr) + => DynFlags + -> UniqFM (UniqSet RealReg) -- ^ registers we can use for allocation + -> UniqSet Int -- ^ set of available spill slots. + -> Int -- ^ current number of spill slots + -> [LiveCmmDecl statics instr] -- ^ code annotated with liveness information. + -> Maybe CFG -- ^ CFG of basic blocks if available + -> UniqSM ( [NatCmmDecl statics instr] + , Maybe Int, [RegAllocStats statics instr] ) + -- ^ code with registers allocated, additional stacks required + -- and stats for each stage of allocation + +regAlloc dflags regsFree slotsFree slotsCount code cfg + = do + -- TODO: the regClass function is currently hard coded to the default + -- target architecture. Would prefer to determine this from dflags. + -- There are other uses of targetRegClass later in this module. + let platform = targetPlatform dflags + triv = trivColorable platform + (targetVirtualRegSqueeze platform) + (targetRealRegSqueeze platform) + + (code_final, debug_codeGraphs, slotsCount', _) + <- regAlloc_spin dflags 0 + triv + regsFree slotsFree slotsCount [] code cfg + + let needStack + | slotsCount == slotsCount' + = Nothing + | otherwise + = Just slotsCount' + + return ( code_final + , needStack + , reverse debug_codeGraphs ) + + +-- | Perform solver iterations for the graph coloring allocator. +-- +-- We extract a register conflict graph from the provided cmm code, +-- and try to colour it. If that works then we use the solution rewrite +-- the code with real hregs. If coloring doesn't work we add spill code +-- and try to colour it again. After `maxSpinCount` iterations we give up. +-- +regAlloc_spin + :: forall instr statics. + (Instruction instr, + Outputable instr, + Outputable statics) + => DynFlags + -> Int -- ^ Number of solver iterations we've already performed. + -> Color.Triv VirtualReg RegClass RealReg + -- ^ Function for calculating whether a register is trivially + -- colourable. + -> UniqFM (UniqSet RealReg) -- ^ Free registers that we can allocate. + -> UniqSet Int -- ^ Free stack slots that we can use. + -> Int -- ^ Number of spill slots in use + -> [RegAllocStats statics instr] -- ^ Current regalloc stats to add to. + -> [LiveCmmDecl statics instr] -- ^ Liveness annotated code to allocate. + -> Maybe CFG + -> UniqSM ( [NatCmmDecl statics instr] + , [RegAllocStats statics instr] + , Int -- Slots in use + , Color.Graph VirtualReg RegClass RealReg) + +regAlloc_spin dflags spinCount triv regsFree slotsFree slotsCount debug_codeGraphs code cfg + = do + let platform = targetPlatform dflags + + -- If any of these dump flags are turned on we want to hang on to + -- intermediate structures in the allocator - otherwise tell the + -- allocator to ditch them early so we don't end up creating space leaks. + let dump = or + [ dopt Opt_D_dump_asm_regalloc_stages dflags + , dopt Opt_D_dump_asm_stats dflags + , dopt Opt_D_dump_asm_conflicts dflags ] + + -- Check that we're not running off down the garden path. + when (spinCount > maxSpinCount) + $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded." + ( text "It looks like the register allocator is stuck in an infinite loop." + $$ text "max cycles = " <> int maxSpinCount + $$ text "regsFree = " <> (hcat $ punctuate space $ map ppr + $ nonDetEltsUniqSet $ unionManyUniqSets + $ nonDetEltsUFM regsFree) + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + $$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree)) + + -- Build the register conflict graph from the cmm code. + (graph :: Color.Graph VirtualReg RegClass RealReg) + <- {-# SCC "BuildGraph" #-} buildGraph code + + -- VERY IMPORTANT: + -- We really do want the graph to be fully evaluated _before_ we + -- start coloring. If we don't do this now then when the call to + -- Color.colorGraph forces bits of it, the heap will be filled with + -- half evaluated pieces of graph and zillions of apply thunks. + seqGraph graph `seq` return () + + -- Build a map of the cost of spilling each instruction. + -- This is a lazy binding, so the map will only be computed if we + -- actually have to spill to the stack. + let spillCosts = foldl' plusSpillCostInfo zeroSpillCostInfo + $ map (slurpSpillCostInfo platform cfg) code + + -- The function to choose regs to leave uncolored. + let spill = chooseSpill spillCosts + + -- Record startup state in our log. + let stat1 + = if spinCount == 0 + then Just $ RegAllocStatsStart + { raLiveCmm = code + , raGraph = graph + , raSpillCosts = spillCosts } + else Nothing + + -- Try and color the graph. + let (graph_colored, rsSpill, rmCoalesce) + = {-# SCC "ColorGraph" #-} + Color.colorGraph + (gopt Opt_RegsIterative dflags) + spinCount + regsFree triv spill graph + + -- Rewrite registers in the code that have been coalesced. + let patchF reg + | RegVirtual vr <- reg + = case lookupUFM rmCoalesce vr of + Just vr' -> patchF (RegVirtual vr') + Nothing -> reg + + | otherwise + = reg + + let (code_coalesced :: [LiveCmmDecl statics instr]) + = map (patchEraseLive patchF) code + + -- Check whether we've found a coloring. + if isEmptyUniqSet rsSpill + + -- Coloring was successful because no registers needed to be spilled. + then do + -- if -fasm-lint is turned on then validate the graph. + -- This checks for bugs in the graph allocator itself. + let graph_colored_lint = + if gopt Opt_DoAsmLinting dflags + then Color.validateGraph (text "") + True -- Require all nodes to be colored. + graph_colored + else graph_colored + + -- Rewrite the code to use real hregs, using the colored graph. + let code_patched + = map (patchRegsFromGraph platform graph_colored_lint) + code_coalesced + + -- Clean out unneeded SPILL/RELOAD meta instructions. + -- The spill code generator just spills the entire live range + -- of a vreg, but it might not need to be on the stack for + -- its entire lifetime. + let code_spillclean + = map (cleanSpills platform) code_patched + + -- Strip off liveness information from the allocated code. + -- Also rewrite SPILL/RELOAD meta instructions into real machine + -- instructions along the way + let code_final + = map (stripLive dflags) code_spillclean + + -- Record what happened in this stage for debugging + let stat + = RegAllocStatsColored + { raCode = code + , raGraph = graph + , raGraphColored = graph_colored_lint + , raCoalesced = rmCoalesce + , raCodeCoalesced = code_coalesced + , raPatched = code_patched + , raSpillClean = code_spillclean + , raFinal = code_final + , raSRMs = foldl' addSRM (0, 0, 0) + $ map countSRMs code_spillclean } + + -- Bundle up all the register allocator statistics. + -- .. but make sure to drop them on the floor if they're not + -- needed, otherwise we'll get a space leak. + let statList = + if dump then [stat] ++ maybeToList stat1 ++ debug_codeGraphs + else [] + + -- Ensure all the statistics are evaluated, to avoid space leaks. + seqList statList (return ()) + + return ( code_final + , statList + , slotsCount + , graph_colored_lint) + + -- Coloring was unsuccessful. We need to spill some register to the + -- stack, make a new graph, and try to color it again. + else do + -- if -fasm-lint is turned on then validate the graph + let graph_colored_lint = + if gopt Opt_DoAsmLinting dflags + then Color.validateGraph (text "") + False -- don't require nodes to be colored + graph_colored + else graph_colored + + -- Spill uncolored regs to the stack. + (code_spilled, slotsFree', slotsCount', spillStats) + <- regSpill platform code_coalesced slotsFree slotsCount rsSpill + + -- Recalculate liveness information. + -- NOTE: we have to reverse the SCCs here to get them back into + -- the reverse-dependency order required by computeLiveness. + -- If they're not in the correct order that function will panic. + code_relive <- mapM (regLiveness platform . reverseBlocksInTops) + code_spilled + + -- Record what happened in this stage for debugging. + let stat = + RegAllocStatsSpill + { raCode = code + , raGraph = graph_colored_lint + , raCoalesced = rmCoalesce + , raSpillStats = spillStats + , raSpillCosts = spillCosts + , raSpilled = code_spilled } + + -- Bundle up all the register allocator statistics. + -- .. but make sure to drop them on the floor if they're not + -- needed, otherwise we'll get a space leak. + let statList = + if dump + then [stat] ++ maybeToList stat1 ++ debug_codeGraphs + else [] + + -- Ensure all the statistics are evaluated, to avoid space leaks. + seqList statList (return ()) + + regAlloc_spin dflags (spinCount + 1) triv regsFree slotsFree' + slotsCount' statList code_relive cfg + + +-- | Build a graph from the liveness and coalesce information in this code. +buildGraph + :: Instruction instr + => [LiveCmmDecl statics instr] + -> UniqSM (Color.Graph VirtualReg RegClass RealReg) + +buildGraph code + = do + -- Slurp out the conflicts and reg->reg moves from this code. + let (conflictList, moveList) = + unzip $ map slurpConflicts code + + -- Slurp out the spill/reload coalesces. + let moveList2 = map slurpReloadCoalesce code + + -- Add the reg-reg conflicts to the graph. + let conflictBag = unionManyBags conflictList + let graph_conflict + = foldr graphAddConflictSet Color.initGraph conflictBag + + -- Add the coalescences edges to the graph. + let moveBag + = unionBags (unionManyBags moveList2) + (unionManyBags moveList) + + let graph_coalesce + = foldr graphAddCoalesce graph_conflict moveBag + + return graph_coalesce + + +-- | Add some conflict edges to the graph. +-- Conflicts between virtual and real regs are recorded as exclusions. +graphAddConflictSet + :: UniqSet Reg + -> Color.Graph VirtualReg RegClass RealReg + -> Color.Graph VirtualReg RegClass RealReg + +graphAddConflictSet set graph + = let virtuals = mkUniqSet + [ vr | RegVirtual vr <- nonDetEltsUniqSet set ] + + graph1 = Color.addConflicts virtuals classOfVirtualReg graph + + graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 classOfVirtualReg r2) + graph1 + [ (vr, rr) + | RegVirtual vr <- nonDetEltsUniqSet set + , RegReal rr <- nonDetEltsUniqSet set] + -- See Note [Unique Determinism and code generation] + + in graph2 + + +-- | Add some coalesence edges to the graph +-- Coalesences between virtual and real regs are recorded as preferences. +graphAddCoalesce + :: (Reg, Reg) + -> Color.Graph VirtualReg RegClass RealReg + -> Color.Graph VirtualReg RegClass RealReg + +graphAddCoalesce (r1, r2) graph + | RegReal rr <- r1 + , RegVirtual vr <- r2 + = Color.addPreference (vr, classOfVirtualReg vr) rr graph + + | RegReal rr <- r2 + , RegVirtual vr <- r1 + = Color.addPreference (vr, classOfVirtualReg vr) rr graph + + | RegVirtual vr1 <- r1 + , RegVirtual vr2 <- r2 + = Color.addCoalesce + (vr1, classOfVirtualReg vr1) + (vr2, classOfVirtualReg vr2) + graph + + -- We can't coalesce two real regs, but there could well be existing + -- hreg,hreg moves in the input code. We'll just ignore these + -- for coalescing purposes. + | RegReal _ <- r1 + , RegReal _ <- r2 + = graph + +#if __GLASGOW_HASKELL__ <= 810 + | otherwise + = panic "graphAddCoalesce" +#endif + + +-- | Patch registers in code using the reg -> reg mapping in this graph. +patchRegsFromGraph + :: (Outputable statics, Outputable instr, Instruction instr) + => Platform -> Color.Graph VirtualReg RegClass RealReg + -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr + +patchRegsFromGraph platform graph code + = patchEraseLive patchF code + where + -- Function to lookup the hardreg for a virtual reg from the graph. + patchF reg + -- leave real regs alone. + | RegReal{} <- reg + = reg + + -- this virtual has a regular node in the graph. + | RegVirtual vr <- reg + , Just node <- Color.lookupNode graph vr + = case Color.nodeColor node of + Just color -> RegReal color + Nothing -> RegVirtual vr + + -- no node in the graph for this virtual, bad news. + | otherwise + = pprPanic "patchRegsFromGraph: register mapping failed." + ( text "There is no node in the graph for register " + <> ppr reg + $$ ppr code + $$ Color.dotGraph + (\_ -> text "white") + (trivColorable platform + (targetVirtualRegSqueeze platform) + (targetRealRegSqueeze platform)) + graph) + + +----- +-- for when laziness just isn't what you wanted... +-- We need to deepSeq the whole graph before trying to colour it to avoid +-- space leaks. +seqGraph :: Color.Graph VirtualReg RegClass RealReg -> () +seqGraph graph = seqNodes (nonDetEltsUFM (Color.graphMap graph)) + -- See Note [Unique Determinism and code generation] + +seqNodes :: [Color.Node VirtualReg RegClass RealReg] -> () +seqNodes ns + = case ns of + [] -> () + (n : ns) -> seqNode n `seq` seqNodes ns + +seqNode :: Color.Node VirtualReg RegClass RealReg -> () +seqNode node + = seqVirtualReg (Color.nodeId node) + `seq` seqRegClass (Color.nodeClass node) + `seq` seqMaybeRealReg (Color.nodeColor node) + `seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeConflicts node))) + `seq` (seqRealRegList (nonDetEltsUniqSet (Color.nodeExclusions node))) + `seq` (seqRealRegList (Color.nodePreference node)) + `seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeCoalesce node))) + -- It's OK to use nonDetEltsUniqSet for seq + +seqVirtualReg :: VirtualReg -> () +seqVirtualReg reg = reg `seq` () + +seqRealReg :: RealReg -> () +seqRealReg reg = reg `seq` () + +seqRegClass :: RegClass -> () +seqRegClass c = c `seq` () + +seqMaybeRealReg :: Maybe RealReg -> () +seqMaybeRealReg mr + = case mr of + Nothing -> () + Just r -> seqRealReg r + +seqVirtualRegList :: [VirtualReg] -> () +seqVirtualRegList rs + = case rs of + [] -> () + (r : rs) -> seqVirtualReg r `seq` seqVirtualRegList rs + +seqRealRegList :: [RealReg] -> () +seqRealRegList rs + = case rs of + [] -> () + (r : rs) -> seqRealReg r `seq` seqRealRegList rs diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/Base.hs b/compiler/GHC/CmmToAsm/Reg/Graph/Base.hs new file mode 100644 index 0000000000..95fa174415 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph/Base.hs @@ -0,0 +1,163 @@ + +-- | Utils for calculating general worst, bound, squeese and free, functions. +-- +-- as per: "A Generalized Algorithm for Graph-Coloring Register Allocation" +-- Michael Smith, Normal Ramsey, Glenn Holloway. +-- PLDI 2004 +-- +-- These general versions are not used in GHC proper because they are too slow. +-- Instead, hand written optimised versions are provided for each architecture +-- in MachRegs*.hs +-- +-- This code is here because we can test the architecture specific code against +-- it. +-- +module GHC.CmmToAsm.Reg.Graph.Base ( + RegClass(..), + Reg(..), + RegSub(..), + + worst, + bound, + squeese +) where + +import GhcPrelude + +import UniqSet +import UniqFM +import Unique +import MonadUtils (concatMapM) + + +-- Some basic register classes. +-- These aren't necessarily in 1-to-1 correspondence with the allocatable +-- RegClasses in MachRegs.hs +data RegClass + -- general purpose regs + = ClassG32 -- 32 bit GPRs + | ClassG16 -- 16 bit GPRs + | ClassG8 -- 8 bit GPRs + + -- floating point regs + | ClassF64 -- 64 bit FPRs + deriving (Show, Eq, Enum) + + +-- | A register of some class +data Reg + -- a register of some class + = Reg RegClass Int + + -- a sub-component of one of the other regs + | RegSub RegSub Reg + deriving (Show, Eq) + + +-- | so we can put regs in UniqSets +instance Uniquable Reg where + getUnique (Reg c i) + = mkRegSingleUnique + $ fromEnum c * 1000 + i + + getUnique (RegSub s (Reg c i)) + = mkRegSubUnique + $ fromEnum s * 10000 + fromEnum c * 1000 + i + + getUnique (RegSub _ (RegSub _ _)) + = error "RegArchBase.getUnique: can't have a sub-reg of a sub-reg." + + +-- | A subcomponent of another register +data RegSub + = SubL16 -- lowest 16 bits + | SubL8 -- lowest 8 bits + | SubL8H -- second lowest 8 bits + deriving (Show, Enum, Ord, Eq) + + +-- | Worst case displacement +-- +-- a node N of classN has some number of neighbors, +-- all of which are from classC. +-- +-- (worst neighbors classN classC) is the maximum number of potential +-- colors for N that can be lost by coloring its neighbors. +-- +-- This should be hand coded/cached for each particular architecture, +-- because the compute time is very long.. +worst :: (RegClass -> UniqSet Reg) + -> (Reg -> UniqSet Reg) + -> Int -> RegClass -> RegClass -> Int + +worst regsOfClass regAlias neighbors classN classC + = let regAliasS regs = unionManyUniqSets + $ map regAlias + $ nonDetEltsUniqSet regs + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + + -- all the regs in classes N, C + regsN = regsOfClass classN + regsC = regsOfClass classC + + -- all the possible subsets of c which have size < m + regsS = filter (\s -> sizeUniqSet s >= 1 + && sizeUniqSet s <= neighbors) + $ powersetLS regsC + + -- for each of the subsets of C, the regs which conflict + -- with posiblities for N + regsS_conflict + = map (\s -> intersectUniqSets regsN (regAliasS s)) regsS + + in maximum $ map sizeUniqSet $ regsS_conflict + + +-- | For a node N of classN and neighbors of classesC +-- (bound classN classesC) is the maximum number of potential +-- colors for N that can be lost by coloring its neighbors. +bound :: (RegClass -> UniqSet Reg) + -> (Reg -> UniqSet Reg) + -> RegClass -> [RegClass] -> Int + +bound regsOfClass regAlias classN classesC + = let regAliasS regs = unionManyUniqSets + $ map regAlias + $ nonDetEltsUFM regs + -- See Note [Unique Determinism and code generation] + + regsC_aliases + = unionManyUniqSets + $ map (regAliasS . getUniqSet . regsOfClass) classesC + + overlap = intersectUniqSets (regsOfClass classN) regsC_aliases + + in sizeUniqSet overlap + + +-- | The total squeese on a particular node with a list of neighbors. +-- +-- A version of this should be constructed for each particular architecture, +-- possibly including uses of bound, so that alised registers don't get +-- counted twice, as per the paper. +squeese :: (RegClass -> UniqSet Reg) + -> (Reg -> UniqSet Reg) + -> RegClass -> [(Int, RegClass)] -> Int + +squeese regsOfClass regAlias classN countCs + = sum + $ map (\(i, classC) -> worst regsOfClass regAlias i classN classC) + $ countCs + + +-- | powerset (for lists) +powersetL :: [a] -> [[a]] +powersetL = concatMapM (\x -> [[],[x]]) + + +-- | powersetLS (list of sets) +powersetLS :: Uniquable a => UniqSet a -> [UniqSet a] +powersetLS s = map mkUniqSet $ powersetL $ nonDetEltsUniqSet s + -- See Note [Unique Determinism and code generation] diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs b/compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs new file mode 100644 index 0000000000..d223137dd0 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs @@ -0,0 +1,99 @@ +-- | Register coalescing. +module GHC.CmmToAsm.Reg.Graph.Coalesce ( + regCoalesce, + slurpJoinMovs +) where +import GhcPrelude + +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Instr +import GHC.Platform.Reg + +import GHC.Cmm +import Bag +import Digraph +import UniqFM +import UniqSet +import UniqSupply + + +-- | Do register coalescing on this top level thing +-- +-- For Reg -> Reg moves, if the first reg dies at the same time the +-- second reg is born then the mov only serves to join live ranges. +-- The two regs can be renamed to be the same and the move instruction +-- safely erased. +regCoalesce + :: Instruction instr + => [LiveCmmDecl statics instr] + -> UniqSM [LiveCmmDecl statics instr] + +regCoalesce code + = do + let joins = foldl' unionBags emptyBag + $ map slurpJoinMovs code + + let alloc = foldl' buildAlloc emptyUFM + $ bagToList joins + + let patched = map (patchEraseLive (sinkReg alloc)) code + + return patched + + +-- | Add a v1 = v2 register renaming to the map. +-- The register with the lowest lexical name is set as the +-- canonical version. +buildAlloc :: UniqFM Reg -> (Reg, Reg) -> UniqFM Reg +buildAlloc fm (r1, r2) + = let rmin = min r1 r2 + rmax = max r1 r2 + in addToUFM fm rmax rmin + + +-- | Determine the canonical name for a register by following +-- v1 = v2 renamings in this map. +sinkReg :: UniqFM Reg -> Reg -> Reg +sinkReg fm r + = case lookupUFM fm r of + Nothing -> r + Just r' -> sinkReg fm r' + + +-- | Slurp out mov instructions that only serve to join live ranges. +-- +-- During a mov, if the source reg dies and the destination reg is +-- born then we can rename the two regs to the same thing and +-- eliminate the move. +slurpJoinMovs + :: Instruction instr + => LiveCmmDecl statics instr + -> Bag (Reg, Reg) + +slurpJoinMovs live + = slurpCmm emptyBag live + where + slurpCmm rs CmmData{} + = rs + + slurpCmm rs (CmmProc _ _ _ sccs) + = foldl' slurpBlock rs (flattenSCCs sccs) + + slurpBlock rs (BasicBlock _ instrs) + = foldl' slurpLI rs instrs + + slurpLI rs (LiveInstr _ Nothing) = rs + slurpLI rs (LiveInstr instr (Just live)) + | Just (r1, r2) <- takeRegRegMoveInstr instr + , elementOfUniqSet r1 $ liveDieRead live + , elementOfUniqSet r2 $ liveBorn live + + -- only coalesce movs between two virtuals for now, + -- else we end up with allocatable regs in the live + -- regs list.. + , isVirtualReg r1 && isVirtualReg r2 + = consBag (r1, r2) rs + + | otherwise + = rs + diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs b/compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs new file mode 100644 index 0000000000..a0e11433f7 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs @@ -0,0 +1,382 @@ + +-- | When there aren't enough registers to hold all the vregs we have to spill +-- some of those vregs to slots on the stack. This module is used modify the +-- code to use those slots. +module GHC.CmmToAsm.Reg.Graph.Spill ( + regSpill, + SpillStats(..), + accSpillSL +) where +import GhcPrelude + +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Instr +import GHC.Platform.Reg +import GHC.Cmm hiding (RegSet) +import GHC.Cmm.BlockId +import GHC.Cmm.Dataflow.Collections + +import MonadUtils +import State +import Unique +import UniqFM +import UniqSet +import UniqSupply +import Outputable +import GHC.Platform + +import Data.List +import Data.Maybe +import Data.IntSet (IntSet) +import qualified Data.IntSet as IntSet + + +-- | Spill all these virtual regs to stack slots. +-- +-- Bumps the number of required stack slots if required. +-- +-- +-- TODO: See if we can split some of the live ranges instead of just globally +-- spilling the virtual reg. This might make the spill cleaner's job easier. +-- +-- TODO: On CISCy x86 and x86_64 we don't necessarily have to add a mov instruction +-- when making spills. If an instr is using a spilled virtual we may be able to +-- address the spill slot directly. +-- +regSpill + :: Instruction instr + => Platform + -> [LiveCmmDecl statics instr] -- ^ the code + -> UniqSet Int -- ^ available stack slots + -> Int -- ^ current number of spill slots. + -> UniqSet VirtualReg -- ^ the regs to spill + -> UniqSM + ([LiveCmmDecl statics instr] + -- code with SPILL and RELOAD meta instructions added. + , UniqSet Int -- left over slots + , Int -- slot count in use now. + , SpillStats ) -- stats about what happened during spilling + +regSpill platform code slotsFree slotCount regs + + -- Not enough slots to spill these regs. + | sizeUniqSet slotsFree < sizeUniqSet regs + = -- pprTrace "Bumping slot count:" (ppr slotCount <> text " -> " <> ppr (slotCount+512)) $ + let slotsFree' = (addListToUniqSet slotsFree [slotCount+1 .. slotCount+512]) + in regSpill platform code slotsFree' (slotCount+512) regs + + | otherwise + = do + -- Allocate a slot for each of the spilled regs. + let slots = take (sizeUniqSet regs) $ nonDetEltsUniqSet slotsFree + let regSlotMap = listToUFM + $ zip (nonDetEltsUniqSet regs) slots + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + + -- Grab the unique supply from the monad. + us <- getUniqueSupplyM + + -- Run the spiller on all the blocks. + let (code', state') = + runState (mapM (regSpill_top platform regSlotMap) code) + (initSpillS us) + + return ( code' + , minusUniqSet slotsFree (mkUniqSet slots) + , slotCount + , makeSpillStats state') + + +-- | Spill some registers to stack slots in a top-level thing. +regSpill_top + :: Instruction instr + => Platform + -> RegMap Int + -- ^ map of vregs to slots they're being spilled to. + -> LiveCmmDecl statics instr + -- ^ the top level thing. + -> SpillM (LiveCmmDecl statics instr) + +regSpill_top platform regSlotMap cmm + = case cmm of + CmmData{} + -> return cmm + + CmmProc info label live sccs + | LiveInfo static firstId liveVRegsOnEntry liveSlotsOnEntry <- info + -> do + -- The liveVRegsOnEntry contains the set of vregs that are live + -- on entry to each basic block. If we spill one of those vregs + -- we remove it from that set and add the corresponding slot + -- number to the liveSlotsOnEntry set. The spill cleaner needs + -- this information to erase unneeded spill and reload instructions + -- after we've done a successful allocation. + let liveSlotsOnEntry' :: BlockMap IntSet + liveSlotsOnEntry' + = mapFoldlWithKey patchLiveSlot + liveSlotsOnEntry liveVRegsOnEntry + + let info' + = LiveInfo static firstId + liveVRegsOnEntry + liveSlotsOnEntry' + + -- Apply the spiller to all the basic blocks in the CmmProc. + sccs' <- mapM (mapSCCM (regSpill_block platform regSlotMap)) sccs + + return $ CmmProc info' label live sccs' + + where -- Given a BlockId and the set of registers live in it, + -- if registers in this block are being spilled to stack slots, + -- then record the fact that these slots are now live in those blocks + -- in the given slotmap. + patchLiveSlot + :: BlockMap IntSet -> BlockId -> RegSet -> BlockMap IntSet + + patchLiveSlot slotMap blockId regsLive + = let + -- Slots that are already recorded as being live. + curSlotsLive = fromMaybe IntSet.empty + $ mapLookup blockId slotMap + + moreSlotsLive = IntSet.fromList + $ catMaybes + $ map (lookupUFM regSlotMap) + $ nonDetEltsUniqSet regsLive + -- See Note [Unique Determinism and code generation] + + slotMap' + = mapInsert blockId (IntSet.union curSlotsLive moreSlotsLive) + slotMap + + in slotMap' + + +-- | Spill some registers to stack slots in a basic block. +regSpill_block + :: Instruction instr + => Platform + -> UniqFM Int -- ^ map of vregs to slots they're being spilled to. + -> LiveBasicBlock instr + -> SpillM (LiveBasicBlock instr) + +regSpill_block platform regSlotMap (BasicBlock i instrs) + = do instrss' <- mapM (regSpill_instr platform regSlotMap) instrs + return $ BasicBlock i (concat instrss') + + +-- | Spill some registers to stack slots in a single instruction. +-- If the instruction uses registers that need to be spilled, then it is +-- prefixed (or postfixed) with the appropriate RELOAD or SPILL meta +-- instructions. +regSpill_instr + :: Instruction instr + => Platform + -> UniqFM Int -- ^ map of vregs to slots they're being spilled to. + -> LiveInstr instr + -> SpillM [LiveInstr instr] + +regSpill_instr _ _ li@(LiveInstr _ Nothing) + = do return [li] + +regSpill_instr platform regSlotMap + (LiveInstr instr (Just _)) + = do + -- work out which regs are read and written in this instr + let RU rlRead rlWritten = regUsageOfInstr platform instr + + -- sometimes a register is listed as being read more than once, + -- nub this so we don't end up inserting two lots of spill code. + let rsRead_ = nub rlRead + let rsWritten_ = nub rlWritten + + -- if a reg is modified, it appears in both lists, want to undo this.. + let rsRead = rsRead_ \\ rsWritten_ + let rsWritten = rsWritten_ \\ rsRead_ + let rsModify = intersect rsRead_ rsWritten_ + + -- work out if any of the regs being used are currently being spilled. + let rsSpillRead = filter (\r -> elemUFM r regSlotMap) rsRead + let rsSpillWritten = filter (\r -> elemUFM r regSlotMap) rsWritten + let rsSpillModify = filter (\r -> elemUFM r regSlotMap) rsModify + + -- rewrite the instr and work out spill code. + (instr1, prepost1) <- mapAccumLM (spillRead regSlotMap) instr rsSpillRead + (instr2, prepost2) <- mapAccumLM (spillWrite regSlotMap) instr1 rsSpillWritten + (instr3, prepost3) <- mapAccumLM (spillModify regSlotMap) instr2 rsSpillModify + + let (mPrefixes, mPostfixes) = unzip (prepost1 ++ prepost2 ++ prepost3) + let prefixes = concat mPrefixes + let postfixes = concat mPostfixes + + -- final code + let instrs' = prefixes + ++ [LiveInstr instr3 Nothing] + ++ postfixes + + return $ instrs' + + +-- | Add a RELOAD met a instruction to load a value for an instruction that +-- writes to a vreg that is being spilled. +spillRead + :: Instruction instr + => UniqFM Int + -> instr + -> Reg + -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr'])) + +spillRead regSlotMap instr reg + | Just slot <- lookupUFM regSlotMap reg + = do (instr', nReg) <- patchInstr reg instr + + modify $ \s -> s + { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 0, 1) } + + return ( instr' + , ( [LiveInstr (RELOAD slot nReg) Nothing] + , []) ) + + | otherwise = panic "RegSpill.spillRead: no slot defined for spilled reg" + + +-- | Add a SPILL meta instruction to store a value for an instruction that +-- writes to a vreg that is being spilled. +spillWrite + :: Instruction instr + => UniqFM Int + -> instr + -> Reg + -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr'])) + +spillWrite regSlotMap instr reg + | Just slot <- lookupUFM regSlotMap reg + = do (instr', nReg) <- patchInstr reg instr + + modify $ \s -> s + { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 0) } + + return ( instr' + , ( [] + , [LiveInstr (SPILL nReg slot) Nothing])) + + | otherwise = panic "RegSpill.spillWrite: no slot defined for spilled reg" + + +-- | Add both RELOAD and SPILL meta instructions for an instruction that +-- both reads and writes to a vreg that is being spilled. +spillModify + :: Instruction instr + => UniqFM Int + -> instr + -> Reg + -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr'])) + +spillModify regSlotMap instr reg + | Just slot <- lookupUFM regSlotMap reg + = do (instr', nReg) <- patchInstr reg instr + + modify $ \s -> s + { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 1) } + + return ( instr' + , ( [LiveInstr (RELOAD slot nReg) Nothing] + , [LiveInstr (SPILL nReg slot) Nothing])) + + | otherwise = panic "RegSpill.spillModify: no slot defined for spilled reg" + + +-- | Rewrite uses of this virtual reg in an instr to use a different +-- virtual reg. +patchInstr + :: Instruction instr + => Reg -> instr -> SpillM (instr, Reg) + +patchInstr reg instr + = do nUnique <- newUnique + + -- The register we're rewriting is supposed to be virtual. + -- If it's not then something has gone horribly wrong. + let nReg + = case reg of + RegVirtual vr + -> RegVirtual (renameVirtualReg nUnique vr) + + RegReal{} + -> panic "RegAlloc.Graph.Spill.patchIntr: not patching real reg" + + let instr' = patchReg1 reg nReg instr + return (instr', nReg) + + +patchReg1 + :: Instruction instr + => Reg -> Reg -> instr -> instr + +patchReg1 old new instr + = let patchF r + | r == old = new + | otherwise = r + in patchRegsOfInstr instr patchF + + +-- Spiller monad -------------------------------------------------------------- +-- | State monad for the spill code generator. +type SpillM a + = State SpillS a + +-- | Spill code generator state. +data SpillS + = SpillS + { -- | Unique supply for generating fresh vregs. + stateUS :: UniqSupply + + -- | Spilled vreg vs the number of times it was loaded, stored. + , stateSpillSL :: UniqFM (Reg, Int, Int) } + + +-- | Create a new spiller state. +initSpillS :: UniqSupply -> SpillS +initSpillS uniqueSupply + = SpillS + { stateUS = uniqueSupply + , stateSpillSL = emptyUFM } + + +-- | Allocate a new unique in the spiller monad. +newUnique :: SpillM Unique +newUnique + = do us <- gets stateUS + case takeUniqFromSupply us of + (uniq, us') + -> do modify $ \s -> s { stateUS = us' } + return uniq + + +-- | Add a spill/reload count to a stats record for a register. +accSpillSL :: (Reg, Int, Int) -> (Reg, Int, Int) -> (Reg, Int, Int) +accSpillSL (r1, s1, l1) (_, s2, l2) + = (r1, s1 + s2, l1 + l2) + + +-- Spiller stats -------------------------------------------------------------- +-- | Spiller statistics. +-- Tells us what registers were spilled. +data SpillStats + = SpillStats + { spillStoreLoad :: UniqFM (Reg, Int, Int) } + + +-- | Extract spiller statistics from the spiller state. +makeSpillStats :: SpillS -> SpillStats +makeSpillStats s + = SpillStats + { spillStoreLoad = stateSpillSL s } + + +instance Outputable SpillStats where + ppr stats + = pprUFM (spillStoreLoad stats) + (vcat . map (\(r, s, l) -> ppr r <+> int s <+> int l)) diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs b/compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs new file mode 100644 index 0000000000..6d14c7194b --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph/SpillClean.hs @@ -0,0 +1,616 @@ +{-# LANGUAGE CPP #-} + +-- | Clean out unneeded spill\/reload instructions. +-- +-- Handling of join points +-- ~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- B1: B2: +-- ... ... +-- RELOAD SLOT(0), %r1 RELOAD SLOT(0), %r1 +-- ... A ... ... B ... +-- jump B3 jump B3 +-- +-- B3: ... C ... +-- RELOAD SLOT(0), %r1 +-- ... +-- +-- The Plan +-- ~~~~~~~~ +-- As long as %r1 hasn't been written to in A, B or C then we don't need +-- the reload in B3. +-- +-- What we really care about here is that on the entry to B3, %r1 will +-- always have the same value that is in SLOT(0) (ie, %r1 is _valid_) +-- +-- This also works if the reloads in B1\/B2 were spills instead, because +-- spilling %r1 to a slot makes that slot have the same value as %r1. +-- +module GHC.CmmToAsm.Reg.Graph.SpillClean ( + cleanSpills +) where +import GhcPrelude + +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Instr +import GHC.Platform.Reg + +import GHC.Cmm.BlockId +import GHC.Cmm +import UniqSet +import UniqFM +import Unique +import State +import Outputable +import GHC.Platform +import GHC.Cmm.Dataflow.Collections + +import Data.List +import Data.Maybe +import Data.IntSet (IntSet) +import qualified Data.IntSet as IntSet + + +-- | The identification number of a spill slot. +-- A value is stored in a spill slot when we don't have a free +-- register to hold it. +type Slot = Int + + +-- | Clean out unneeded spill\/reloads from this top level thing. +cleanSpills + :: Instruction instr + => Platform + -> LiveCmmDecl statics instr + -> LiveCmmDecl statics instr + +cleanSpills platform cmm + = evalState (cleanSpin platform 0 cmm) initCleanS + + +-- | Do one pass of cleaning. +cleanSpin + :: Instruction instr + => Platform + -> Int -- ^ Iteration number for the cleaner. + -> LiveCmmDecl statics instr -- ^ Liveness annotated code to clean. + -> CleanM (LiveCmmDecl statics instr) + +cleanSpin platform spinCount code + = do + -- Initialise count of cleaned spill and reload instructions. + modify $ \s -> s + { sCleanedSpillsAcc = 0 + , sCleanedReloadsAcc = 0 + , sReloadedBy = emptyUFM } + + code_forward <- mapBlockTopM (cleanBlockForward platform) code + code_backward <- cleanTopBackward code_forward + + -- During the cleaning of each block we collected information about + -- what regs were valid across each jump. Based on this, work out + -- whether it will be safe to erase reloads after join points for + -- the next pass. + collateJoinPoints + + -- Remember how many spill and reload instructions we cleaned in this pass. + spills <- gets sCleanedSpillsAcc + reloads <- gets sCleanedReloadsAcc + modify $ \s -> s + { sCleanedCount = (spills, reloads) : sCleanedCount s } + + -- If nothing was cleaned in this pass or the last one + -- then we're done and it's time to bail out. + cleanedCount <- gets sCleanedCount + if take 2 cleanedCount == [(0, 0), (0, 0)] + then return code + + -- otherwise go around again + else cleanSpin platform (spinCount + 1) code_backward + + +------------------------------------------------------------------------------- +-- | Clean out unneeded reload instructions, +-- while walking forward over the code. +cleanBlockForward + :: Instruction instr + => Platform + -> LiveBasicBlock instr + -> CleanM (LiveBasicBlock instr) + +cleanBlockForward platform (BasicBlock blockId instrs) + = do + -- See if we have a valid association for the entry to this block. + jumpValid <- gets sJumpValid + let assoc = case lookupUFM jumpValid blockId of + Just assoc -> assoc + Nothing -> emptyAssoc + + instrs_reload <- cleanForward platform blockId assoc [] instrs + return $ BasicBlock blockId instrs_reload + + + +-- | Clean out unneeded reload instructions. +-- +-- Walking forwards across the code +-- On a reload, if we know a reg already has the same value as a slot +-- then we don't need to do the reload. +-- +cleanForward + :: Instruction instr + => Platform + -> BlockId -- ^ the block that we're currently in + -> Assoc Store -- ^ two store locations are associated if + -- they have the same value + -> [LiveInstr instr] -- ^ acc + -> [LiveInstr instr] -- ^ instrs to clean (in backwards order) + -> CleanM [LiveInstr instr] -- ^ cleaned instrs (in forward order) + +cleanForward _ _ _ acc [] + = return acc + +-- Rewrite live range joins via spill slots to just a spill and a reg-reg move +-- hopefully the spill will be also be cleaned in the next pass +cleanForward platform blockId assoc acc (li1 : li2 : instrs) + + | LiveInstr (SPILL reg1 slot1) _ <- li1 + , LiveInstr (RELOAD slot2 reg2) _ <- li2 + , slot1 == slot2 + = do + modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 } + cleanForward platform blockId assoc acc + $ li1 : LiveInstr (mkRegRegMoveInstr platform reg1 reg2) Nothing + : instrs + +cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs) + | Just (r1, r2) <- takeRegRegMoveInstr i1 + = if r1 == r2 + -- Erase any left over nop reg reg moves while we're here + -- this will also catch any nop moves that the previous case + -- happens to add. + then cleanForward platform blockId assoc acc instrs + + -- If r1 has the same value as some slots and we copy r1 to r2, + -- then r2 is now associated with those slots instead + else do let assoc' = addAssoc (SReg r1) (SReg r2) + $ delAssoc (SReg r2) + $ assoc + + cleanForward platform blockId assoc' (li : acc) instrs + + +cleanForward platform blockId assoc acc (li : instrs) + + -- Update association due to the spill. + | LiveInstr (SPILL reg slot) _ <- li + = let assoc' = addAssoc (SReg reg) (SSlot slot) + $ delAssoc (SSlot slot) + $ assoc + in cleanForward platform blockId assoc' (li : acc) instrs + + -- Clean a reload instr. + | LiveInstr (RELOAD{}) _ <- li + = do (assoc', mli) <- cleanReload platform blockId assoc li + case mli of + Nothing -> cleanForward platform blockId assoc' acc + instrs + + Just li' -> cleanForward platform blockId assoc' (li' : acc) + instrs + + -- Remember the association over a jump. + | LiveInstr instr _ <- li + , targets <- jumpDestsOfInstr instr + , not $ null targets + = do mapM_ (accJumpValid assoc) targets + cleanForward platform blockId assoc (li : acc) instrs + + -- Writing to a reg changes its value. + | LiveInstr instr _ <- li + , RU _ written <- regUsageOfInstr platform instr + = let assoc' = foldr delAssoc assoc (map SReg $ nub written) + in cleanForward platform blockId assoc' (li : acc) instrs + + + +-- | Try and rewrite a reload instruction to something more pleasing +cleanReload + :: Instruction instr + => Platform + -> BlockId + -> Assoc Store + -> LiveInstr instr + -> CleanM (Assoc Store, Maybe (LiveInstr instr)) + +cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg) _) + + -- If the reg we're reloading already has the same value as the slot + -- then we can erase the instruction outright. + | elemAssoc (SSlot slot) (SReg reg) assoc + = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 } + return (assoc, Nothing) + + -- If we can find another reg with the same value as this slot then + -- do a move instead of a reload. + | Just reg2 <- findRegOfSlot assoc slot + = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 } + + let assoc' = addAssoc (SReg reg) (SReg reg2) + $ delAssoc (SReg reg) + $ assoc + + return ( assoc' + , Just $ LiveInstr (mkRegRegMoveInstr platform reg2 reg) Nothing) + + -- Gotta keep this instr. + | otherwise + = do -- Update the association. + let assoc' + = addAssoc (SReg reg) (SSlot slot) + -- doing the reload makes reg and slot the same value + $ delAssoc (SReg reg) + -- reg value changes on reload + $ assoc + + -- Remember that this block reloads from this slot. + accBlockReloadsSlot blockId slot + + return (assoc', Just li) + +cleanReload _ _ _ _ + = panic "RegSpillClean.cleanReload: unhandled instr" + + +------------------------------------------------------------------------------- +-- | Clean out unneeded spill instructions, +-- while walking backwards over the code. +-- +-- If there were no reloads from a slot between a spill and the last one +-- then the slot was never read and we don't need the spill. +-- +-- SPILL r0 -> s1 +-- RELOAD s1 -> r2 +-- SPILL r3 -> s1 <--- don't need this spill +-- SPILL r4 -> s1 +-- RELOAD s1 -> r5 +-- +-- Maintain a set of +-- "slots which were spilled to but not reloaded from yet" +-- +-- Walking backwards across the code: +-- a) On a reload from a slot, remove it from the set. +-- +-- a) On a spill from a slot +-- If the slot is in set then we can erase the spill, +-- because it won't be reloaded from until after the next spill. +-- +-- otherwise +-- keep the spill and add the slot to the set +-- +-- TODO: This is mostly inter-block +-- we should really be updating the noReloads set as we cross jumps also. +-- +-- TODO: generate noReloads from liveSlotsOnEntry +-- +cleanTopBackward + :: Instruction instr + => LiveCmmDecl statics instr + -> CleanM (LiveCmmDecl statics instr) + +cleanTopBackward cmm + = case cmm of + CmmData{} + -> return cmm + + CmmProc info label live sccs + | LiveInfo _ _ _ liveSlotsOnEntry <- info + -> do sccs' <- mapM (mapSCCM (cleanBlockBackward liveSlotsOnEntry)) sccs + return $ CmmProc info label live sccs' + + +cleanBlockBackward + :: Instruction instr + => BlockMap IntSet + -> LiveBasicBlock instr + -> CleanM (LiveBasicBlock instr) + +cleanBlockBackward liveSlotsOnEntry (BasicBlock blockId instrs) + = do instrs_spill <- cleanBackward liveSlotsOnEntry emptyUniqSet [] instrs + return $ BasicBlock blockId instrs_spill + + + +cleanBackward + :: Instruction instr + => BlockMap IntSet -- ^ Slots live on entry to each block + -> UniqSet Int -- ^ Slots that have been spilled, but not reloaded from + -> [LiveInstr instr] -- ^ acc + -> [LiveInstr instr] -- ^ Instrs to clean (in forwards order) + -> CleanM [LiveInstr instr] -- ^ Cleaned instrs (in backwards order) + +cleanBackward liveSlotsOnEntry noReloads acc lis + = do reloadedBy <- gets sReloadedBy + cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc lis + + +cleanBackward' + :: Instruction instr + => BlockMap IntSet + -> UniqFM [BlockId] + -> UniqSet Int + -> [LiveInstr instr] + -> [LiveInstr instr] + -> State CleanS [LiveInstr instr] + +cleanBackward' _ _ _ acc [] + = return acc + +cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc (li : instrs) + + -- If nothing ever reloads from this slot then we don't need the spill. + | LiveInstr (SPILL _ slot) _ <- li + , Nothing <- lookupUFM reloadedBy (SSlot slot) + = do modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 } + cleanBackward liveSlotsOnEntry noReloads acc instrs + + | LiveInstr (SPILL _ slot) _ <- li + = if elementOfUniqSet slot noReloads + + -- We can erase this spill because the slot won't be read until + -- after the next one + then do + modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 } + cleanBackward liveSlotsOnEntry noReloads acc instrs + + else do + -- This slot is being spilled to, but we haven't seen any reloads yet. + let noReloads' = addOneToUniqSet noReloads slot + cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs + + -- if we reload from a slot then it's no longer unused + | LiveInstr (RELOAD slot _) _ <- li + , noReloads' <- delOneFromUniqSet noReloads slot + = cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs + + -- If a slot is live in a jump target then assume it's reloaded there. + -- + -- TODO: A real dataflow analysis would do a better job here. + -- If the target block _ever_ used the slot then we assume + -- it always does, but if those reloads are cleaned the slot + -- liveness map doesn't get updated. + | LiveInstr instr _ <- li + , targets <- jumpDestsOfInstr instr + = do + let slotsReloadedByTargets + = IntSet.unions + $ catMaybes + $ map (flip mapLookup liveSlotsOnEntry) + $ targets + + let noReloads' + = foldl' delOneFromUniqSet noReloads + $ IntSet.toList slotsReloadedByTargets + + cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs + +#if __GLASGOW_HASKELL__ <= 810 + -- some other instruction + | otherwise + = cleanBackward liveSlotsOnEntry noReloads (li : acc) instrs +#endif + + +-- | Combine the associations from all the inward control flow edges. +-- +collateJoinPoints :: CleanM () +collateJoinPoints + = modify $ \s -> s + { sJumpValid = mapUFM intersects (sJumpValidAcc s) + , sJumpValidAcc = emptyUFM } + +intersects :: [Assoc Store] -> Assoc Store +intersects [] = emptyAssoc +intersects assocs = foldl1' intersectAssoc assocs + + +-- | See if we have a reg with the same value as this slot in the association table. +findRegOfSlot :: Assoc Store -> Int -> Maybe Reg +findRegOfSlot assoc slot + | close <- closeAssoc (SSlot slot) assoc + , Just (SReg reg) <- find isStoreReg $ nonDetEltsUniqSet close + -- See Note [Unique Determinism and code generation] + = Just reg + + | otherwise + = Nothing + + +------------------------------------------------------------------------------- +-- | Cleaner monad. +type CleanM + = State CleanS + +-- | Cleaner state. +data CleanS + = CleanS + { -- | Regs which are valid at the start of each block. + sJumpValid :: UniqFM (Assoc Store) + + -- | Collecting up what regs were valid across each jump. + -- in the next pass we can collate these and write the results + -- to sJumpValid. + , sJumpValidAcc :: UniqFM [Assoc Store] + + -- | Map of (slot -> blocks which reload from this slot) + -- used to decide if whether slot spilled to will ever be + -- reloaded from on this path. + , sReloadedBy :: UniqFM [BlockId] + + -- | Spills and reloads cleaned each pass (latest at front) + , sCleanedCount :: [(Int, Int)] + + -- | Spills and reloads that have been cleaned in this pass so far. + , sCleanedSpillsAcc :: Int + , sCleanedReloadsAcc :: Int } + + +-- | Construct the initial cleaner state. +initCleanS :: CleanS +initCleanS + = CleanS + { sJumpValid = emptyUFM + , sJumpValidAcc = emptyUFM + + , sReloadedBy = emptyUFM + + , sCleanedCount = [] + + , sCleanedSpillsAcc = 0 + , sCleanedReloadsAcc = 0 } + + +-- | Remember the associations before a jump. +accJumpValid :: Assoc Store -> BlockId -> CleanM () +accJumpValid assocs target + = modify $ \s -> s { + sJumpValidAcc = addToUFM_C (++) + (sJumpValidAcc s) + target + [assocs] } + + +accBlockReloadsSlot :: BlockId -> Slot -> CleanM () +accBlockReloadsSlot blockId slot + = modify $ \s -> s { + sReloadedBy = addToUFM_C (++) + (sReloadedBy s) + (SSlot slot) + [blockId] } + + +------------------------------------------------------------------------------- +-- A store location can be a stack slot or a register +data Store + = SSlot Int + | SReg Reg + + +-- | Check if this is a reg store. +isStoreReg :: Store -> Bool +isStoreReg ss + = case ss of + SSlot _ -> False + SReg _ -> True + + +-- Spill cleaning is only done once all virtuals have been allocated to realRegs +instance Uniquable Store where + getUnique (SReg r) + | RegReal (RealRegSingle i) <- r + = mkRegSingleUnique i + + | RegReal (RealRegPair r1 r2) <- r + = mkRegPairUnique (r1 * 65535 + r2) + + | otherwise + = error $ "RegSpillClean.getUnique: found virtual reg during spill clean," + ++ "only real regs expected." + + getUnique (SSlot i) = mkRegSubUnique i -- [SLPJ] I hope "SubUnique" is ok + + +instance Outputable Store where + ppr (SSlot i) = text "slot" <> int i + ppr (SReg r) = ppr r + + +------------------------------------------------------------------------------- +-- Association graphs. +-- In the spill cleaner, two store locations are associated if they are known +-- to hold the same value. +-- +type Assoc a = UniqFM (UniqSet a) + +-- | An empty association +emptyAssoc :: Assoc a +emptyAssoc = emptyUFM + + +-- | Add an association between these two things. +addAssoc :: Uniquable a + => a -> a -> Assoc a -> Assoc a + +addAssoc a b m + = let m1 = addToUFM_C unionUniqSets m a (unitUniqSet b) + m2 = addToUFM_C unionUniqSets m1 b (unitUniqSet a) + in m2 + + +-- | Delete all associations to a node. +delAssoc :: (Uniquable a) + => a -> Assoc a -> Assoc a + +delAssoc a m + | Just aSet <- lookupUFM m a + , m1 <- delFromUFM m a + = nonDetFoldUniqSet (\x m -> delAssoc1 x a m) m1 aSet + -- It's OK to use nonDetFoldUFM here because deletion is commutative + + | otherwise = m + + +-- | Delete a single association edge (a -> b). +delAssoc1 :: Uniquable a + => a -> a -> Assoc a -> Assoc a + +delAssoc1 a b m + | Just aSet <- lookupUFM m a + = addToUFM m a (delOneFromUniqSet aSet b) + + | otherwise = m + + +-- | Check if these two things are associated. +elemAssoc :: (Uniquable a) + => a -> a -> Assoc a -> Bool + +elemAssoc a b m + = elementOfUniqSet b (closeAssoc a m) + + +-- | Find the refl. trans. closure of the association from this point. +closeAssoc :: (Uniquable a) + => a -> Assoc a -> UniqSet a + +closeAssoc a assoc + = closeAssoc' assoc emptyUniqSet (unitUniqSet a) + where + closeAssoc' assoc visited toVisit + = case nonDetEltsUniqSet toVisit of + -- See Note [Unique Determinism and code generation] + + -- nothing else to visit, we're done + [] -> visited + + (x:_) + -- we've already seen this node + | elementOfUniqSet x visited + -> closeAssoc' assoc visited (delOneFromUniqSet toVisit x) + + -- haven't seen this node before, + -- remember to visit all its neighbors + | otherwise + -> let neighbors + = case lookupUFM assoc x of + Nothing -> emptyUniqSet + Just set -> set + + in closeAssoc' assoc + (addOneToUniqSet visited x) + (unionUniqSets toVisit neighbors) + +-- | Intersect two associations. +intersectAssoc :: Assoc a -> Assoc a -> Assoc a +intersectAssoc a b + = intersectUFM_C (intersectUniqSets) a b diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs b/compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs new file mode 100644 index 0000000000..e3e456e98d --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs @@ -0,0 +1,317 @@ +{-# LANGUAGE ScopedTypeVariables, GADTs, BangPatterns #-} +module GHC.CmmToAsm.Reg.Graph.SpillCost ( + SpillCostRecord, + plusSpillCostRecord, + pprSpillCostRecord, + + SpillCostInfo, + zeroSpillCostInfo, + plusSpillCostInfo, + + slurpSpillCostInfo, + chooseSpill, + + lifeMapFromSpillCostInfo +) where +import GhcPrelude + +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Instr +import GHC.Platform.Reg.Class +import GHC.Platform.Reg + +import GraphBase + +import GHC.Cmm.Dataflow.Collections (mapLookup) +import GHC.Cmm.Dataflow.Label +import GHC.Cmm +import UniqFM +import UniqSet +import Digraph (flattenSCCs) +import Outputable +import GHC.Platform +import State +import GHC.CmmToAsm.CFG + +import Data.List (nub, minimumBy) +import Data.Maybe +import Control.Monad (join) + + +-- | Records the expected cost to spill some register. +type SpillCostRecord + = ( VirtualReg -- register name + , Int -- number of writes to this reg + , Int -- number of reads from this reg + , Int) -- number of instrs this reg was live on entry to + + +-- | Map of `SpillCostRecord` +type SpillCostInfo + = UniqFM SpillCostRecord + +type SpillCostState = State (UniqFM SpillCostRecord) () + +-- | An empty map of spill costs. +zeroSpillCostInfo :: SpillCostInfo +zeroSpillCostInfo = emptyUFM + + +-- | Add two spill cost infos. +plusSpillCostInfo :: SpillCostInfo -> SpillCostInfo -> SpillCostInfo +plusSpillCostInfo sc1 sc2 + = plusUFM_C plusSpillCostRecord sc1 sc2 + + +-- | Add two spill cost records. +plusSpillCostRecord :: SpillCostRecord -> SpillCostRecord -> SpillCostRecord +plusSpillCostRecord (r1, a1, b1, c1) (r2, a2, b2, c2) + | r1 == r2 = (r1, a1 + a2, b1 + b2, c1 + c2) + | otherwise = error "RegSpillCost.plusRegInt: regs don't match" + + +-- | Slurp out information used for determining spill costs. +-- +-- For each vreg, the number of times it was written to, read from, +-- and the number of instructions it was live on entry to (lifetime) +-- +slurpSpillCostInfo :: forall instr statics. (Outputable instr, Instruction instr) + => Platform + -> Maybe CFG + -> LiveCmmDecl statics instr + -> SpillCostInfo + +slurpSpillCostInfo platform cfg cmm + = execState (countCmm cmm) zeroSpillCostInfo + where + countCmm CmmData{} = return () + countCmm (CmmProc info _ _ sccs) + = mapM_ (countBlock info freqMap) + $ flattenSCCs sccs + where + LiveInfo _ entries _ _ = info + freqMap = (fst . mkGlobalWeights (head entries)) <$> cfg + + -- Lookup the regs that are live on entry to this block in + -- the info table from the CmmProc. + countBlock info freqMap (BasicBlock blockId instrs) + | LiveInfo _ _ blockLive _ <- info + , Just rsLiveEntry <- mapLookup blockId blockLive + , rsLiveEntry_virt <- takeVirtuals rsLiveEntry + = countLIs (ceiling $ blockFreq freqMap blockId) rsLiveEntry_virt instrs + + | otherwise + = error "RegAlloc.SpillCost.slurpSpillCostInfo: bad block" + + + countLIs :: Int -> UniqSet VirtualReg -> [LiveInstr instr] -> SpillCostState + countLIs _ _ [] + = return () + + -- Skip over comment and delta pseudo instrs. + countLIs scale rsLive (LiveInstr instr Nothing : lis) + | isMetaInstr instr + = countLIs scale rsLive lis + + | otherwise + = pprPanic "RegSpillCost.slurpSpillCostInfo" + $ text "no liveness information on instruction " <> ppr instr + + countLIs scale rsLiveEntry (LiveInstr instr (Just live) : lis) + = do + -- Increment the lifetime counts for regs live on entry to this instr. + mapM_ incLifetime $ nonDetEltsUniqSet rsLiveEntry + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + + -- Increment counts for what regs were read/written from. + let (RU read written) = regUsageOfInstr platform instr + mapM_ (incUses scale) $ catMaybes $ map takeVirtualReg $ nub read + mapM_ (incDefs scale) $ catMaybes $ map takeVirtualReg $ nub written + + -- Compute liveness for entry to next instruction. + let liveDieRead_virt = takeVirtuals (liveDieRead live) + let liveDieWrite_virt = takeVirtuals (liveDieWrite live) + let liveBorn_virt = takeVirtuals (liveBorn live) + + let rsLiveAcross + = rsLiveEntry `minusUniqSet` liveDieRead_virt + + let rsLiveNext + = (rsLiveAcross `unionUniqSets` liveBorn_virt) + `minusUniqSet` liveDieWrite_virt + + countLIs scale rsLiveNext lis + + incDefs count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, count, 0, 0) + incUses count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, count, 0) + incLifetime reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, 0, 1) + + blockFreq :: Maybe (LabelMap Double) -> Label -> Double + blockFreq freqs bid + | Just freq <- join (mapLookup bid <$> freqs) + = max 1.0 (10000 * freq) + | otherwise + = 1.0 -- Only if no cfg given + +-- | Take all the virtual registers from this set. +takeVirtuals :: UniqSet Reg -> UniqSet VirtualReg +takeVirtuals set = mkUniqSet + [ vr | RegVirtual vr <- nonDetEltsUniqSet set ] + -- See Note [Unique Determinism and code generation] + + +-- | Choose a node to spill from this graph +chooseSpill + :: SpillCostInfo + -> Graph VirtualReg RegClass RealReg + -> VirtualReg + +chooseSpill info graph + = let cost = spillCost_length info graph + node = minimumBy (\n1 n2 -> compare (cost $ nodeId n1) (cost $ nodeId n2)) + $ nonDetEltsUFM $ graphMap graph + -- See Note [Unique Determinism and code generation] + + in nodeId node + + +------------------------------------------------------------------------------- +-- | Chaitins spill cost function is: +-- +-- cost = sum loadCost * freq (u) + sum storeCost * freq (d) +-- u <- uses (v) d <- defs (v) +-- +-- There are no loops in our code at the moment, so we can set the freq's to 1. +-- +-- If we don't have live range splitting then Chaitins function performs badly +-- if we have lots of nested live ranges and very few registers. +-- +-- v1 v2 v3 +-- def v1 . +-- use v1 . +-- def v2 . . +-- def v3 . . . +-- use v1 . . . +-- use v3 . . . +-- use v2 . . +-- use v1 . +-- +-- defs uses degree cost +-- v1: 1 3 3 1.5 +-- v2: 1 2 3 1.0 +-- v3: 1 1 3 0.666 +-- +-- v3 has the lowest cost, but if we only have 2 hardregs and we insert +-- spill code for v3 then this isn't going to improve the colorability of +-- the graph. +-- +-- When compiling SHA1, which as very long basic blocks and some vregs +-- with very long live ranges the allocator seems to try and spill from +-- the inside out and eventually run out of stack slots. +-- +-- Without live range splitting, its's better to spill from the outside +-- in so set the cost of very long live ranges to zero +-- + +-- spillCost_chaitin +-- :: SpillCostInfo +-- -> Graph VirtualReg RegClass RealReg +-- -> VirtualReg +-- -> Float + +-- spillCost_chaitin info graph reg +-- -- Spilling a live range that only lives for 1 instruction +-- -- isn't going to help us at all - and we definitely want to avoid +-- -- trying to re-spill previously inserted spill code. +-- | lifetime <= 1 = 1/0 + +-- -- It's unlikely that we'll find a reg for a live range this long +-- -- better to spill it straight up and not risk trying to keep it around +-- -- and have to go through the build/color cycle again. + +-- -- To facility this we scale down the spill cost of long ranges. +-- -- This makes sure long ranges are still spilled first. +-- -- But this way spill cost remains relevant for long live +-- -- ranges. +-- | lifetime >= 128 +-- = (spillCost / conflicts) / 10.0 + + +-- -- Otherwise revert to chaitin's regular cost function. +-- | otherwise = (spillCost / conflicts) +-- where +-- !spillCost = fromIntegral (uses + defs) :: Float +-- conflicts = fromIntegral (nodeDegree classOfVirtualReg graph reg) +-- (_, defs, uses, lifetime) +-- = fromMaybe (reg, 0, 0, 0) $ lookupUFM info reg + + +-- Just spill the longest live range. +spillCost_length + :: SpillCostInfo + -> Graph VirtualReg RegClass RealReg + -> VirtualReg + -> Float + +spillCost_length info _ reg + | lifetime <= 1 = 1/0 + | otherwise = 1 / fromIntegral lifetime + where (_, _, _, lifetime) + = fromMaybe (reg, 0, 0, 0) + $ lookupUFM info reg + + +-- | Extract a map of register lifetimes from a `SpillCostInfo`. +lifeMapFromSpillCostInfo :: SpillCostInfo -> UniqFM (VirtualReg, Int) +lifeMapFromSpillCostInfo info + = listToUFM + $ map (\(r, _, _, life) -> (r, (r, life))) + $ nonDetEltsUFM info + -- See Note [Unique Determinism and code generation] + + +-- | Determine the degree (number of neighbors) of this node which +-- have the same class. +nodeDegree + :: (VirtualReg -> RegClass) + -> Graph VirtualReg RegClass RealReg + -> VirtualReg + -> Int + +nodeDegree classOfVirtualReg graph reg + | Just node <- lookupUFM (graphMap graph) reg + + , virtConflicts + <- length + $ filter (\r -> classOfVirtualReg r == classOfVirtualReg reg) + $ nonDetEltsUniqSet + -- See Note [Unique Determinism and code generation] + $ nodeConflicts node + + = virtConflicts + sizeUniqSet (nodeExclusions node) + + | otherwise + = 0 + + +-- | Show a spill cost record, including the degree from the graph +-- and final calculated spill cost. +pprSpillCostRecord + :: (VirtualReg -> RegClass) + -> (Reg -> SDoc) + -> Graph VirtualReg RegClass RealReg + -> SpillCostRecord + -> SDoc + +pprSpillCostRecord regClass pprReg graph (reg, uses, defs, life) + = hsep + [ pprReg (RegVirtual reg) + , ppr uses + , ppr defs + , ppr life + , ppr $ nodeDegree regClass graph reg + , text $ show $ (fromIntegral (uses + defs) + / fromIntegral (nodeDegree regClass graph reg) :: Float) ] + diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/Stats.hs b/compiler/GHC/CmmToAsm/Reg/Graph/Stats.hs new file mode 100644 index 0000000000..05d2e814af --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph/Stats.hs @@ -0,0 +1,346 @@ +{-# LANGUAGE BangPatterns, CPP #-} + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +-- | Carries interesting info for debugging / profiling of the +-- graph coloring register allocator. +module GHC.CmmToAsm.Reg.Graph.Stats ( + RegAllocStats (..), + + pprStats, + pprStatsSpills, + pprStatsLifetimes, + pprStatsConflict, + pprStatsLifeConflict, + + countSRMs, addSRM +) where + +import GhcPrelude + +import qualified GraphColor as Color +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Reg.Graph.Spill +import GHC.CmmToAsm.Reg.Graph.SpillCost +import GHC.CmmToAsm.Reg.Graph.TrivColorable +import GHC.CmmToAsm.Instr +import GHC.Platform.Reg.Class +import GHC.Platform.Reg +import GHC.CmmToAsm.Reg.Target + +import Outputable +import UniqFM +import UniqSet +import State + +-- | Holds interesting statistics from the register allocator. +data RegAllocStats statics instr + + -- Information about the initial conflict graph. + = RegAllocStatsStart + { -- | Initial code, with liveness. + raLiveCmm :: [LiveCmmDecl statics instr] + + -- | The initial, uncolored graph. + , raGraph :: Color.Graph VirtualReg RegClass RealReg + + -- | Information to help choose which regs to spill. + , raSpillCosts :: SpillCostInfo } + + + -- Information about an intermediate graph. + -- This is one that we couldn't color, so had to insert spill code + -- instruction stream. + | RegAllocStatsSpill + { -- | Code we tried to allocate registers for. + raCode :: [LiveCmmDecl statics instr] + + -- | Partially colored graph. + , raGraph :: Color.Graph VirtualReg RegClass RealReg + + -- | The regs that were coalesced. + , raCoalesced :: UniqFM VirtualReg + + -- | Spiller stats. + , raSpillStats :: SpillStats + + -- | Number of instructions each reg lives for. + , raSpillCosts :: SpillCostInfo + + -- | Code with spill instructions added. + , raSpilled :: [LiveCmmDecl statics instr] } + + + -- a successful coloring + | RegAllocStatsColored + { -- | Code we tried to allocate registers for. + raCode :: [LiveCmmDecl statics instr] + + -- | Uncolored graph. + , raGraph :: Color.Graph VirtualReg RegClass RealReg + + -- | Coalesced and colored graph. + , raGraphColored :: Color.Graph VirtualReg RegClass RealReg + + -- | Regs that were coalesced. + , raCoalesced :: UniqFM VirtualReg + + -- | Code with coalescings applied. + , raCodeCoalesced :: [LiveCmmDecl statics instr] + + -- | Code with vregs replaced by hregs. + , raPatched :: [LiveCmmDecl statics instr] + + -- | Code with unneeded spill\/reloads cleaned out. + , raSpillClean :: [LiveCmmDecl statics instr] + + -- | Final code. + , raFinal :: [NatCmmDecl statics instr] + + -- | Spill\/reload\/reg-reg moves present in this code. + , raSRMs :: (Int, Int, Int) } + + +instance (Outputable statics, Outputable instr) + => Outputable (RegAllocStats statics instr) where + + ppr (s@RegAllocStatsStart{}) = sdocWithPlatform $ \platform -> + text "# Start" + $$ text "# Native code with liveness information." + $$ ppr (raLiveCmm s) + $$ text "" + $$ text "# Initial register conflict graph." + $$ Color.dotGraph + (targetRegDotColor platform) + (trivColorable platform + (targetVirtualRegSqueeze platform) + (targetRealRegSqueeze platform)) + (raGraph s) + + + ppr (s@RegAllocStatsSpill{}) = + text "# Spill" + + $$ text "# Code with liveness information." + $$ ppr (raCode s) + $$ text "" + + $$ (if (not $ isNullUFM $ raCoalesced s) + then text "# Registers coalesced." + $$ pprUFMWithKeys (raCoalesced s) (vcat . map ppr) + $$ text "" + else empty) + + $$ text "# Spills inserted." + $$ ppr (raSpillStats s) + $$ text "" + + $$ text "# Code with spills inserted." + $$ ppr (raSpilled s) + + + ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) }) + = sdocWithPlatform $ \platform -> + text "# Colored" + + $$ text "# Code with liveness information." + $$ ppr (raCode s) + $$ text "" + + $$ text "# Register conflict graph (colored)." + $$ Color.dotGraph + (targetRegDotColor platform) + (trivColorable platform + (targetVirtualRegSqueeze platform) + (targetRealRegSqueeze platform)) + (raGraphColored s) + $$ text "" + + $$ (if (not $ isNullUFM $ raCoalesced s) + then text "# Registers coalesced." + $$ pprUFMWithKeys (raCoalesced s) (vcat . map ppr) + $$ text "" + else empty) + + $$ text "# Native code after coalescings applied." + $$ ppr (raCodeCoalesced s) + $$ text "" + + $$ text "# Native code after register allocation." + $$ ppr (raPatched s) + $$ text "" + + $$ text "# Clean out unneeded spill/reloads." + $$ ppr (raSpillClean s) + $$ text "" + + $$ text "# Final code, after rewriting spill/rewrite pseudo instrs." + $$ ppr (raFinal s) + $$ text "" + $$ text "# Score:" + $$ (text "# spills inserted: " <> int spills) + $$ (text "# reloads inserted: " <> int reloads) + $$ (text "# reg-reg moves remaining: " <> int moves) + $$ text "" + + +-- | Do all the different analysis on this list of RegAllocStats +pprStats + :: [RegAllocStats statics instr] + -> Color.Graph VirtualReg RegClass RealReg + -> SDoc + +pprStats stats graph + = let outSpills = pprStatsSpills stats + outLife = pprStatsLifetimes stats + outConflict = pprStatsConflict stats + outScatter = pprStatsLifeConflict stats graph + + in vcat [outSpills, outLife, outConflict, outScatter] + + +-- | Dump a table of how many spill loads \/ stores were inserted for each vreg. +pprStatsSpills + :: [RegAllocStats statics instr] -> SDoc + +pprStatsSpills stats + = let + finals = [ s | s@RegAllocStatsColored{} <- stats] + + -- sum up how many stores\/loads\/reg-reg-moves were left in the code + total = foldl' addSRM (0, 0, 0) + $ map raSRMs finals + + in ( text "-- spills-added-total" + $$ text "-- (stores, loads, reg_reg_moves_remaining)" + $$ ppr total + $$ text "") + + +-- | Dump a table of how long vregs tend to live for in the initial code. +pprStatsLifetimes + :: [RegAllocStats statics instr] -> SDoc + +pprStatsLifetimes stats + = let info = foldl' plusSpillCostInfo zeroSpillCostInfo + [ raSpillCosts s + | s@RegAllocStatsStart{} <- stats ] + + lifeBins = binLifetimeCount $ lifeMapFromSpillCostInfo info + + in ( text "-- vreg-population-lifetimes" + $$ text "-- (instruction_count, number_of_vregs_that_lived_that_long)" + $$ pprUFM lifeBins (vcat . map ppr) + $$ text "\n") + + +binLifetimeCount :: UniqFM (VirtualReg, Int) -> UniqFM (Int, Int) +binLifetimeCount fm + = let lifes = map (\l -> (l, (l, 1))) + $ map snd + $ nonDetEltsUFM fm + -- See Note [Unique Determinism and code generation] + + in addListToUFM_C + (\(l1, c1) (_, c2) -> (l1, c1 + c2)) + emptyUFM + lifes + + +-- | Dump a table of how many conflicts vregs tend to have in the initial code. +pprStatsConflict + :: [RegAllocStats statics instr] -> SDoc + +pprStatsConflict stats + = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2))) + emptyUFM + $ map Color.slurpNodeConflictCount + [ raGraph s | s@RegAllocStatsStart{} <- stats ] + + in ( text "-- vreg-conflicts" + $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)" + $$ pprUFM confMap (vcat . map ppr) + $$ text "\n") + + +-- | For every vreg, dump how many conflicts it has, and its lifetime. +-- Good for making a scatter plot. +pprStatsLifeConflict + :: [RegAllocStats statics instr] + -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph + -> SDoc + +pprStatsLifeConflict stats graph + = let lifeMap = lifeMapFromSpillCostInfo + $ foldl' plusSpillCostInfo zeroSpillCostInfo + $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ] + + scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of + Just (_, l) -> l + Nothing -> 0 + Just node = Color.lookupNode graph r + in parens $ hcat $ punctuate (text ", ") + [ doubleQuotes $ ppr $ Color.nodeId node + , ppr $ sizeUniqSet (Color.nodeConflicts node) + , ppr $ lifetime ]) + $ map Color.nodeId + $ nonDetEltsUFM + -- See Note [Unique Determinism and code generation] + $ Color.graphMap graph + + in ( text "-- vreg-conflict-lifetime" + $$ text "-- (vreg, vreg_conflicts, vreg_lifetime)" + $$ (vcat scatter) + $$ text "\n") + + +-- | Count spill/reload/reg-reg moves. +-- Lets us see how well the register allocator has done. +countSRMs + :: Instruction instr + => LiveCmmDecl statics instr -> (Int, Int, Int) + +countSRMs cmm + = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0) + + +countSRM_block + :: Instruction instr + => GenBasicBlock (LiveInstr instr) + -> State (Int, Int, Int) (GenBasicBlock (LiveInstr instr)) + +countSRM_block (BasicBlock i instrs) + = do instrs' <- mapM countSRM_instr instrs + return $ BasicBlock i instrs' + + +countSRM_instr + :: Instruction instr + => LiveInstr instr -> State (Int, Int, Int) (LiveInstr instr) + +countSRM_instr li + | LiveInstr SPILL{} _ <- li + = do modify $ \(s, r, m) -> (s + 1, r, m) + return li + + | LiveInstr RELOAD{} _ <- li + = do modify $ \(s, r, m) -> (s, r + 1, m) + return li + + | LiveInstr instr _ <- li + , Just _ <- takeRegRegMoveInstr instr + = do modify $ \(s, r, m) -> (s, r, m + 1) + return li + + | otherwise + = return li + + +-- sigh.. +addSRM :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int) +addSRM (s1, r1, m1) (s2, r2, m2) + = let !s = s1 + s2 + !r = r1 + r2 + !m = m1 + m2 + in (s, r, m) + diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs b/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs new file mode 100644 index 0000000000..ec7c5ad13e --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs @@ -0,0 +1,274 @@ +{-# LANGUAGE CPP #-} + +module GHC.CmmToAsm.Reg.Graph.TrivColorable ( + trivColorable, +) + +where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Platform.Reg.Class +import GHC.Platform.Reg + +import GraphBase + +import UniqSet +import GHC.Platform +import Panic + +-- trivColorable --------------------------------------------------------------- + +-- trivColorable function for the graph coloring allocator +-- +-- This gets hammered by scanGraph during register allocation, +-- so needs to be fairly efficient. +-- +-- NOTE: This only works for architectures with just RcInteger and RcDouble +-- (which are disjoint) ie. x86, x86_64 and ppc +-- +-- The number of allocatable regs is hard coded in here so we can do +-- a fast comparison in trivColorable. +-- +-- It's ok if these numbers are _less_ than the actual number of free +-- regs, but they can't be more or the register conflict +-- graph won't color. +-- +-- If the graph doesn't color then the allocator will panic, but it won't +-- generate bad object code or anything nasty like that. +-- +-- There is an allocatableRegsInClass :: RegClass -> Int, but doing +-- the unboxing is too slow for us here. +-- TODO: Is that still true? Could we use allocatableRegsInClass +-- without losing performance now? +-- +-- Look at includes/stg/MachRegs.h to get the numbers. +-- + + +-- Disjoint registers ---------------------------------------------------------- +-- +-- The definition has been unfolded into individual cases for speed. +-- Each architecture has a different register setup, so we use a +-- different regSqueeze function for each. +-- +accSqueeze + :: Int + -> Int + -> (reg -> Int) + -> UniqSet reg + -> Int + +accSqueeze count maxCount squeeze us = acc count (nonDetEltsUniqSet us) + -- See Note [Unique Determinism and code generation] + where acc count [] = count + acc count _ | count >= maxCount = count + acc count (r:rs) = acc (count + squeeze r) rs + +{- Note [accSqueeze] +~~~~~~~~~~~~~~~~~~~~ +BL 2007/09 +Doing a nice fold over the UniqSet makes trivColorable use +32% of total compile time and 42% of total alloc when compiling SHA1.hs from darcs. +Therefore the UniqFM is made non-abstract and we use custom fold. + +MS 2010/04 +When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal +representation any more. But it is imperative that the accSqueeze stops +the folding if the count gets greater or equal to maxCount. We thus convert +UniqFM to a (lazy) list, do the fold and stops if necessary, which was +the most efficient variant tried. Benchmark compiling 10-times SHA1.hs follows. +(original = previous implementation, folding = fold of the whole UFM, + lazyFold = the current implementation, + hackFold = using internal representation of Data.IntMap) + + original folding hackFold lazyFold + -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s + 100.00% 96.44% 97.72% 97.12% + -fregs-graph 67.938s 74.875s 62.673s 64.679s + 100.00% 110.21% 92.25% 95.20% + -fregs-iterative 89.761s 143.913s 81.075s 86.912s + 100.00% 160.33% 90.32% 96.83% + -fnew-codegen 38.225s 37.142s 37.551s 37.119s + 100.00% 97.17% 98.24% 97.11% + -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s + 100.00% 99.70% 95.19% 94.65% + -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s + 100.00% 166.23% 94.18% 100.95% +-} + +trivColorable + :: Platform + -> (RegClass -> VirtualReg -> Int) + -> (RegClass -> RealReg -> Int) + -> Triv VirtualReg RegClass RealReg + +trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions + | let cALLOCATABLE_REGS_INTEGER + = (case platformArch platform of + ArchX86 -> 3 + ArchX86_64 -> 5 + ArchPPC -> 16 + ArchSPARC -> 14 + ArchSPARC64 -> panic "trivColorable ArchSPARC64" + ArchPPC_64 _ -> 15 + ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchARM64 -> panic "trivColorable ArchARM64" + ArchAlpha -> panic "trivColorable ArchAlpha" + ArchMipseb -> panic "trivColorable ArchMipseb" + ArchMipsel -> panic "trivColorable ArchMipsel" + ArchS390X -> panic "trivColorable ArchS390X" + ArchJavaScript-> panic "trivColorable ArchJavaScript" + ArchUnknown -> panic "trivColorable ArchUnknown") + , count2 <- accSqueeze 0 cALLOCATABLE_REGS_INTEGER + (virtualRegSqueeze RcInteger) + conflicts + + , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER + (realRegSqueeze RcInteger) + exclusions + + = count3 < cALLOCATABLE_REGS_INTEGER + +trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions + | let cALLOCATABLE_REGS_FLOAT + = (case platformArch platform of + -- On x86_64 and x86, Float and RcDouble + -- use the same registers, + -- so we only use RcDouble to represent the + -- register allocation problem on those types. + ArchX86 -> 0 + ArchX86_64 -> 0 + ArchPPC -> 0 + ArchSPARC -> 22 + ArchSPARC64 -> panic "trivColorable ArchSPARC64" + ArchPPC_64 _ -> 0 + ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchARM64 -> panic "trivColorable ArchARM64" + ArchAlpha -> panic "trivColorable ArchAlpha" + ArchMipseb -> panic "trivColorable ArchMipseb" + ArchMipsel -> panic "trivColorable ArchMipsel" + ArchS390X -> panic "trivColorable ArchS390X" + ArchJavaScript-> panic "trivColorable ArchJavaScript" + ArchUnknown -> panic "trivColorable ArchUnknown") + , count2 <- accSqueeze 0 cALLOCATABLE_REGS_FLOAT + (virtualRegSqueeze RcFloat) + conflicts + + , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT + (realRegSqueeze RcFloat) + exclusions + + = count3 < cALLOCATABLE_REGS_FLOAT + +trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions + | let cALLOCATABLE_REGS_DOUBLE + = (case platformArch platform of + ArchX86 -> 8 + -- in x86 32bit mode sse2 there are only + -- 8 XMM registers xmm0 ... xmm7 + ArchX86_64 -> 10 + -- in x86_64 there are 16 XMM registers + -- xmm0 .. xmm15, here 10 is a + -- "dont need to solve conflicts" count that + -- was chosen at some point in the past. + ArchPPC -> 26 + ArchSPARC -> 11 + ArchSPARC64 -> panic "trivColorable ArchSPARC64" + ArchPPC_64 _ -> 20 + ArchARM _ _ _ -> panic "trivColorable ArchARM" + ArchARM64 -> panic "trivColorable ArchARM64" + ArchAlpha -> panic "trivColorable ArchAlpha" + ArchMipseb -> panic "trivColorable ArchMipseb" + ArchMipsel -> panic "trivColorable ArchMipsel" + ArchS390X -> panic "trivColorable ArchS390X" + ArchJavaScript-> panic "trivColorable ArchJavaScript" + ArchUnknown -> panic "trivColorable ArchUnknown") + , count2 <- accSqueeze 0 cALLOCATABLE_REGS_DOUBLE + (virtualRegSqueeze RcDouble) + conflicts + + , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE + (realRegSqueeze RcDouble) + exclusions + + = count3 < cALLOCATABLE_REGS_DOUBLE + + + + +-- Specification Code ---------------------------------------------------------- +-- +-- The trivColorable function for each particular architecture should +-- implement the following function, but faster. +-- + +{- +trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool +trivColorable classN conflicts exclusions + = let + + acc :: Reg -> (Int, Int) -> (Int, Int) + acc r (cd, cf) + = case regClass r of + RcInteger -> (cd+1, cf) + RcFloat -> (cd, cf+1) + _ -> panic "Regs.trivColorable: reg class not handled" + + tmp = nonDetFoldUFM acc (0, 0) conflicts + (countInt, countFloat) = nonDetFoldUFM acc tmp exclusions + + squeese = worst countInt classN RcInteger + + worst countFloat classN RcFloat + + in squeese < allocatableRegsInClass classN + +-- | Worst case displacement +-- node N of classN has n neighbors of class C. +-- +-- We currently only have RcInteger and RcDouble, which don't conflict at all. +-- This is a bit boring compared to what's in RegArchX86. +-- +worst :: Int -> RegClass -> RegClass -> Int +worst n classN classC + = case classN of + RcInteger + -> case classC of + RcInteger -> min n (allocatableRegsInClass RcInteger) + RcFloat -> 0 + + RcDouble + -> case classC of + RcFloat -> min n (allocatableRegsInClass RcFloat) + RcInteger -> 0 + +-- allocatableRegs is allMachRegNos with the fixed-use regs removed. +-- i.e., these are the regs for which we are prepared to allow the +-- register allocator to attempt to map VRegs to. +allocatableRegs :: [RegNo] +allocatableRegs + = let isFree i = freeReg i + in filter isFree allMachRegNos + + +-- | The number of regs in each class. +-- We go via top level CAFs to ensure that we're not recomputing +-- the length of these lists each time the fn is called. +allocatableRegsInClass :: RegClass -> Int +allocatableRegsInClass cls + = case cls of + RcInteger -> allocatableRegsInteger + RcFloat -> allocatableRegsDouble + +allocatableRegsInteger :: Int +allocatableRegsInteger + = length $ filter (\r -> regClass r == RcInteger) + $ map RealReg allocatableRegs + +allocatableRegsFloat :: Int +allocatableRegsFloat + = length $ filter (\r -> regClass r == RcFloat + $ map RealReg allocatableRegs +-} diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/X86.hs b/compiler/GHC/CmmToAsm/Reg/Graph/X86.hs new file mode 100644 index 0000000000..0d4c56ba21 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Graph/X86.hs @@ -0,0 +1,161 @@ + +-- | A description of the register set of the X86. +-- +-- This isn't used directly in GHC proper. +-- +-- See RegArchBase.hs for the reference. +-- See MachRegs.hs for the actual trivColorable function used in GHC. +-- +module GHC.CmmToAsm.Reg.Graph.X86 ( + classOfReg, + regsOfClass, + regName, + regAlias, + worst, + squeese, +) where + +import GhcPrelude + +import GHC.CmmToAsm.Reg.Graph.Base (Reg(..), RegSub(..), RegClass(..)) +import UniqSet + +import qualified Data.Array as A + + +-- | Determine the class of a register +classOfReg :: Reg -> RegClass +classOfReg reg + = case reg of + Reg c _ -> c + + RegSub SubL16 _ -> ClassG16 + RegSub SubL8 _ -> ClassG8 + RegSub SubL8H _ -> ClassG8 + + +-- | Determine all the regs that make up a certain class. +regsOfClass :: RegClass -> UniqSet Reg +regsOfClass c + = case c of + ClassG32 + -> mkUniqSet [ Reg ClassG32 i + | i <- [0..7] ] + + ClassG16 + -> mkUniqSet [ RegSub SubL16 (Reg ClassG32 i) + | i <- [0..7] ] + + ClassG8 + -> unionUniqSets + (mkUniqSet [ RegSub SubL8 (Reg ClassG32 i) | i <- [0..3] ]) + (mkUniqSet [ RegSub SubL8H (Reg ClassG32 i) | i <- [0..3] ]) + + ClassF64 + -> mkUniqSet [ Reg ClassF64 i + | i <- [0..5] ] + + +-- | Determine the common name of a reg +-- returns Nothing if this reg is not part of the machine. +regName :: Reg -> Maybe String +regName reg + = case reg of + Reg ClassG32 i + | i <= 7 -> + let names = A.listArray (0,8) + [ "eax", "ebx", "ecx", "edx" + , "ebp", "esi", "edi", "esp" ] + in Just $ names A.! i + + RegSub SubL16 (Reg ClassG32 i) + | i <= 7 -> + let names = A.listArray (0,8) + [ "ax", "bx", "cx", "dx" + , "bp", "si", "di", "sp"] + in Just $ names A.! i + + RegSub SubL8 (Reg ClassG32 i) + | i <= 3 -> + let names = A.listArray (0,4) [ "al", "bl", "cl", "dl"] + in Just $ names A.! i + + RegSub SubL8H (Reg ClassG32 i) + | i <= 3 -> + let names = A.listArray (0,4) [ "ah", "bh", "ch", "dh"] + in Just $ names A.! i + + _ -> Nothing + + +-- | Which regs alias what other regs. +regAlias :: Reg -> UniqSet Reg +regAlias reg + = case reg of + + -- 32 bit regs alias all of the subregs + Reg ClassG32 i + + -- for eax, ebx, ecx, eds + | i <= 3 + -> mkUniqSet + $ [ Reg ClassG32 i, RegSub SubL16 reg + , RegSub SubL8 reg, RegSub SubL8H reg ] + + -- for esi, edi, esp, ebp + | 4 <= i && i <= 7 + -> mkUniqSet + $ [ Reg ClassG32 i, RegSub SubL16 reg ] + + -- 16 bit subregs alias the whole reg + RegSub SubL16 r@(Reg ClassG32 _) + -> regAlias r + + -- 8 bit subregs alias the 32 and 16, but not the other 8 bit subreg + RegSub SubL8 r@(Reg ClassG32 _) + -> mkUniqSet $ [ r, RegSub SubL16 r, RegSub SubL8 r ] + + RegSub SubL8H r@(Reg ClassG32 _) + -> mkUniqSet $ [ r, RegSub SubL16 r, RegSub SubL8H r ] + + -- fp + Reg ClassF64 _ + -> unitUniqSet reg + + _ -> error "regAlias: invalid register" + + +-- | Optimised versions of RegColorBase.{worst, squeese} specific to x86 +worst :: Int -> RegClass -> RegClass -> Int +worst n classN classC + = case classN of + ClassG32 + -> case classC of + ClassG32 -> min n 8 + ClassG16 -> min n 8 + ClassG8 -> min n 4 + ClassF64 -> 0 + + ClassG16 + -> case classC of + ClassG32 -> min n 8 + ClassG16 -> min n 8 + ClassG8 -> min n 4 + ClassF64 -> 0 + + ClassG8 + -> case classC of + ClassG32 -> min (n*2) 8 + ClassG16 -> min (n*2) 8 + ClassG8 -> min n 8 + ClassF64 -> 0 + + ClassF64 + -> case classC of + ClassF64 -> min n 6 + _ -> 0 + +squeese :: RegClass -> [(Int, RegClass)] -> Int +squeese classN countCs + = sum (map (\(i, classC) -> worst i classN classC) countCs) + diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs new file mode 100644 index 0000000000..9b263889d8 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs @@ -0,0 +1,920 @@ +{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +----------------------------------------------------------------------------- +-- +-- The register allocator +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +{- +The algorithm is roughly: + + 1) Compute strongly connected components of the basic block list. + + 2) Compute liveness (mapping from pseudo register to + point(s) of death?). + + 3) Walk instructions in each basic block. We keep track of + (a) Free real registers (a bitmap?) + (b) Current assignment of temporaries to machine registers and/or + spill slots (call this the "assignment"). + (c) Partial mapping from basic block ids to a virt-to-loc mapping. + When we first encounter a branch to a basic block, + we fill in its entry in this table with the current mapping. + + For each instruction: + (a) For each temporary *read* by the instruction: + If the temporary does not have a real register allocation: + - Allocate a real register from the free list. If + the list is empty: + - Find a temporary to spill. Pick one that is + not used in this instruction (ToDo: not + used for a while...) + - generate a spill instruction + - If the temporary was previously spilled, + generate an instruction to read the temp from its spill loc. + (optimisation: if we can see that a real register is going to + be used soon, then don't use it for allocation). + + (b) For each real register clobbered by this instruction: + If a temporary resides in it, + If the temporary is live after this instruction, + Move the temporary to another (non-clobbered & free) reg, + or spill it to memory. Mark the temporary as residing + in both memory and a register if it was spilled (it might + need to be read by this instruction). + + (ToDo: this is wrong for jump instructions?) + + We do this after step (a), because if we start with + movq v1, %rsi + which is an instruction that clobbers %rsi, if v1 currently resides + in %rsi we want to get + movq %rsi, %freereg + movq %rsi, %rsi -- will disappear + instead of + movq %rsi, %freereg + movq %freereg, %rsi + + (c) Update the current assignment + + (d) If the instruction is a branch: + if the destination block already has a register assignment, + Generate a new block with fixup code and redirect the + jump to the new block. + else, + Update the block id->assignment mapping with the current + assignment. + + (e) Delete all register assignments for temps which are read + (only) and die here. Update the free register list. + + (f) Mark all registers clobbered by this instruction as not free, + and mark temporaries which have been spilled due to clobbering + as in memory (step (a) marks then as in both mem & reg). + + (g) For each temporary *written* by this instruction: + Allocate a real register as for (b), spilling something + else if necessary. + - except when updating the assignment, drop any memory + locations that the temporary was previously in, since + they will be no longer valid after this instruction. + + (h) Delete all register assignments for temps which are + written and die here (there should rarely be any). Update + the free register list. + + (i) Rewrite the instruction with the new mapping. + + (j) For each spilled reg known to be now dead, re-add its stack slot + to the free list. + +-} + +module GHC.CmmToAsm.Reg.Linear ( + regAlloc, + module GHC.CmmToAsm.Reg.Linear.Base, + module GHC.CmmToAsm.Reg.Linear.Stats + ) where + +#include "HsVersions.h" + + +import GhcPrelude + +import GHC.CmmToAsm.Reg.Linear.State +import GHC.CmmToAsm.Reg.Linear.Base +import GHC.CmmToAsm.Reg.Linear.StackMap +import GHC.CmmToAsm.Reg.Linear.FreeRegs +import GHC.CmmToAsm.Reg.Linear.Stats +import GHC.CmmToAsm.Reg.Linear.JoinToTargets +import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC +import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC +import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 +import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import GHC.CmmToAsm.Reg.Target +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Instr +import GHC.Platform.Reg + +import GHC.Cmm.BlockId +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm hiding (RegSet) + +import Digraph +import GHC.Driver.Session +import Unique +import UniqSet +import UniqFM +import UniqSupply +import Outputable +import GHC.Platform + +import Data.Maybe +import Data.List +import Control.Monad + +-- ----------------------------------------------------------------------------- +-- Top level of the register allocator + +-- Allocate registers +regAlloc + :: (Outputable instr, Instruction instr) + => DynFlags + -> LiveCmmDecl statics instr + -> UniqSM ( NatCmmDecl statics instr + , Maybe Int -- number of extra stack slots required, + -- beyond maxSpillSlots + , Maybe RegAllocStats + ) + +regAlloc _ (CmmData sec d) + = return + ( CmmData sec d + , Nothing + , Nothing ) + +regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl live []) + = return ( CmmProc info lbl live (ListGraph []) + , Nothing + , Nothing ) + +regAlloc dflags (CmmProc static lbl live sccs) + | LiveInfo info entry_ids@(first_id:_) block_live _ <- static + = do + -- do register allocation on each component. + (final_blocks, stats, stack_use) + <- linearRegAlloc dflags entry_ids block_live sccs + + -- make sure the block that was first in the input list + -- stays at the front of the output + let ((first':_), rest') + = partition ((== first_id) . blockId) final_blocks + + let max_spill_slots = maxSpillSlots dflags + extra_stack + | stack_use > max_spill_slots + = Just (stack_use - max_spill_slots) + | otherwise + = Nothing + + return ( CmmProc info lbl live (ListGraph (first' : rest')) + , extra_stack + , Just stats) + +-- bogus. to make non-exhaustive match warning go away. +regAlloc _ (CmmProc _ _ _ _) + = panic "RegAllocLinear.regAlloc: no match" + + +-- ----------------------------------------------------------------------------- +-- Linear sweep to allocate registers + + +-- | Do register allocation on some basic blocks. +-- But be careful to allocate a block in an SCC only if it has +-- an entry in the block map or it is the first block. +-- +linearRegAlloc + :: (Outputable instr, Instruction instr) + => DynFlags + -> [BlockId] -- ^ entry points + -> BlockMap RegSet + -- ^ live regs on entry to each basic block + -> [SCC (LiveBasicBlock instr)] + -- ^ instructions annotated with "deaths" + -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) + +linearRegAlloc dflags entry_ids block_live sccs + = case platformArch platform of + ArchX86 -> go $ (frInitFreeRegs platform :: X86.FreeRegs) + ArchX86_64 -> go $ (frInitFreeRegs platform :: X86_64.FreeRegs) + ArchS390X -> panic "linearRegAlloc ArchS390X" + ArchSPARC -> go $ (frInitFreeRegs platform :: SPARC.FreeRegs) + ArchSPARC64 -> panic "linearRegAlloc ArchSPARC64" + ArchPPC -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) + ArchARM _ _ _ -> panic "linearRegAlloc ArchARM" + ArchARM64 -> panic "linearRegAlloc ArchARM64" + ArchPPC_64 _ -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) + ArchAlpha -> panic "linearRegAlloc ArchAlpha" + ArchMipseb -> panic "linearRegAlloc ArchMipseb" + ArchMipsel -> panic "linearRegAlloc ArchMipsel" + ArchJavaScript -> panic "linearRegAlloc ArchJavaScript" + ArchUnknown -> panic "linearRegAlloc ArchUnknown" + where + go f = linearRegAlloc' dflags f entry_ids block_live sccs + platform = targetPlatform dflags + +linearRegAlloc' + :: (FR freeRegs, Outputable instr, Instruction instr) + => DynFlags + -> freeRegs + -> [BlockId] -- ^ entry points + -> BlockMap RegSet -- ^ live regs on entry to each basic block + -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" + -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) + +linearRegAlloc' dflags initFreeRegs entry_ids block_live sccs + = do us <- getUniqueSupplyM + let (_, stack, stats, blocks) = + runR dflags mapEmpty initFreeRegs emptyRegMap (emptyStackMap dflags) us + $ linearRA_SCCs entry_ids block_live [] sccs + return (blocks, stats, getStackUse stack) + + +linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr) + => [BlockId] + -> BlockMap RegSet + -> [NatBasicBlock instr] + -> [SCC (LiveBasicBlock instr)] + -> RegM freeRegs [NatBasicBlock instr] + +linearRA_SCCs _ _ blocksAcc [] + = return $ reverse blocksAcc + +linearRA_SCCs entry_ids block_live blocksAcc (AcyclicSCC block : sccs) + = do blocks' <- processBlock block_live block + linearRA_SCCs entry_ids block_live + ((reverse blocks') ++ blocksAcc) + sccs + +linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs) + = do + blockss' <- process entry_ids block_live blocks [] (return []) False + linearRA_SCCs entry_ids block_live + (reverse (concat blockss') ++ blocksAcc) + sccs + +{- from John Dias's patch 2008/10/16: + The linear-scan allocator sometimes allocates a block + before allocating one of its predecessors, which could lead to + inconsistent allocations. Make it so a block is only allocated + if a predecessor has set the "incoming" assignments for the block, or + if it's the procedure's entry block. + + BL 2009/02: Careful. If the assignment for a block doesn't get set for + some reason then this function will loop. We should probably do some + more sanity checking to guard against this eventuality. +-} + +process :: (FR freeRegs, Instruction instr, Outputable instr) + => [BlockId] + -> BlockMap RegSet + -> [GenBasicBlock (LiveInstr instr)] + -> [GenBasicBlock (LiveInstr instr)] + -> [[NatBasicBlock instr]] + -> Bool + -> RegM freeRegs [[NatBasicBlock instr]] + +process _ _ [] [] accum _ + = return $ reverse accum + +process entry_ids block_live [] next_round accum madeProgress + | not madeProgress + + {- BUGS: There are so many unreachable blocks in the code the warnings are overwhelming. + pprTrace "RegAlloc.Linear.Main.process: no progress made, bailing out." + ( text "Unreachable blocks:" + $$ vcat (map ppr next_round)) -} + = return $ reverse accum + + | otherwise + = process entry_ids block_live + next_round [] accum False + +process entry_ids block_live (b@(BasicBlock id _) : blocks) + next_round accum madeProgress + = do + block_assig <- getBlockAssigR + + if isJust (mapLookup id block_assig) + || id `elem` entry_ids + then do + b' <- processBlock block_live b + process entry_ids block_live blocks + next_round (b' : accum) True + + else process entry_ids block_live blocks + (b : next_round) accum madeProgress + + +-- | Do register allocation on this basic block +-- +processBlock + :: (FR freeRegs, Outputable instr, Instruction instr) + => BlockMap RegSet -- ^ live regs on entry to each basic block + -> LiveBasicBlock instr -- ^ block to do register allocation on + -> RegM freeRegs [NatBasicBlock instr] -- ^ block with registers allocated + +processBlock block_live (BasicBlock id instrs) + = do initBlock id block_live + (instrs', fixups) + <- linearRA block_live [] [] id instrs + return $ BasicBlock id instrs' : fixups + + +-- | Load the freeregs and current reg assignment into the RegM state +-- for the basic block with this BlockId. +initBlock :: FR freeRegs + => BlockId -> BlockMap RegSet -> RegM freeRegs () +initBlock id block_live + = do dflags <- getDynFlags + let platform = targetPlatform dflags + block_assig <- getBlockAssigR + case mapLookup id block_assig of + -- no prior info about this block: we must consider + -- any fixed regs to be allocated, but we can ignore + -- virtual regs (presumably this is part of a loop, + -- and we'll iterate again). The assignment begins + -- empty. + Nothing + -> do -- pprTrace "initFreeRegs" (text $ show initFreeRegs) (return ()) + case mapLookup id block_live of + Nothing -> + setFreeRegsR (frInitFreeRegs platform) + Just live -> + setFreeRegsR $ foldl' (flip $ frAllocateReg platform) (frInitFreeRegs platform) + [ r | RegReal r <- nonDetEltsUniqSet live ] + -- See Note [Unique Determinism and code generation] + setAssigR emptyRegMap + + -- load info about register assignments leading into this block. + Just (freeregs, assig) + -> do setFreeRegsR freeregs + setAssigR assig + + +-- | Do allocation for a sequence of instructions. +linearRA + :: (FR freeRegs, Outputable instr, Instruction instr) + => BlockMap RegSet -- ^ map of what vregs are live on entry to each block. + -> [instr] -- ^ accumulator for instructions already processed. + -> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code. + -> BlockId -- ^ id of the current block, for debugging. + -> [LiveInstr instr] -- ^ liveness annotated instructions in this block. + + -> RegM freeRegs + ( [instr] -- instructions after register allocation + , [NatBasicBlock instr]) -- fresh blocks of fixup code. + + +linearRA _ accInstr accFixup _ [] + = return + ( reverse accInstr -- instrs need to be returned in the correct order. + , accFixup) -- it doesn't matter what order the fixup blocks are returned in. + + +linearRA block_live accInstr accFixups id (instr:instrs) + = do + (accInstr', new_fixups) <- raInsn block_live accInstr id instr + + linearRA block_live accInstr' (new_fixups ++ accFixups) id instrs + + +-- | Do allocation for a single instruction. +raInsn + :: (FR freeRegs, Outputable instr, Instruction instr) + => BlockMap RegSet -- ^ map of what vregs are love on entry to each block. + -> [instr] -- ^ accumulator for instructions already processed. + -> BlockId -- ^ the id of the current block, for debugging + -> LiveInstr instr -- ^ the instr to have its regs allocated, with liveness info. + -> RegM freeRegs + ( [instr] -- new instructions + , [NatBasicBlock instr]) -- extra fixup blocks + +raInsn _ new_instrs _ (LiveInstr ii Nothing) + | Just n <- takeDeltaInstr ii + = do setDeltaR n + return (new_instrs, []) + +raInsn _ new_instrs _ (LiveInstr ii@(Instr i) Nothing) + | isMetaInstr ii + = return (i : new_instrs, []) + + +raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live)) + = do + assig <- getAssigR + + -- If we have a reg->reg move between virtual registers, where the + -- src register is not live after this instruction, and the dst + -- register does not already have an assignment, + -- and the source register is assigned to a register, not to a spill slot, + -- then we can eliminate the instruction. + -- (we can't eliminate it if the source register is on the stack, because + -- we do not want to use one spill slot for different virtual registers) + case takeRegRegMoveInstr instr of + Just (src,dst) | src `elementOfUniqSet` (liveDieRead live), + isVirtualReg dst, + not (dst `elemUFM` assig), + isRealReg src || isInReg src assig -> do + case src of + (RegReal rr) -> setAssigR (addToUFM assig dst (InReg rr)) + -- if src is a fixed reg, then we just map dest to this + -- reg in the assignment. src must be an allocatable reg, + -- otherwise it wouldn't be in r_dying. + _virt -> case lookupUFM assig src of + Nothing -> panic "raInsn" + Just loc -> + setAssigR (addToUFM (delFromUFM assig src) dst loc) + + -- we have eliminated this instruction + {- + freeregs <- getFreeRegsR + assig <- getAssigR + pprTrace "raInsn" (text "ELIMINATED: " <> docToSDoc (pprInstr instr) + $$ ppr r_dying <+> ppr w_dying $$ text (show freeregs) $$ ppr assig) $ do + -} + return (new_instrs, []) + + _ -> genRaInsn block_live new_instrs id instr + (nonDetEltsUniqSet $ liveDieRead live) + (nonDetEltsUniqSet $ liveDieWrite live) + -- See Note [Unique Determinism and code generation] + +raInsn _ _ _ instr + = pprPanic "raInsn" (text "no match for:" <> ppr instr) + +-- ToDo: what can we do about +-- +-- R1 = x +-- jump I64[x] // [R1] +-- +-- where x is mapped to the same reg as R1. We want to coalesce x and +-- R1, but the register allocator doesn't know whether x will be +-- assigned to again later, in which case x and R1 should be in +-- different registers. Right now we assume the worst, and the +-- assignment to R1 will clobber x, so we'll spill x into another reg, +-- generating another reg->reg move. + + +isInReg :: Reg -> RegMap Loc -> Bool +isInReg src assig | Just (InReg _) <- lookupUFM assig src = True + | otherwise = False + + +genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr) + => BlockMap RegSet + -> [instr] + -> BlockId + -> instr + -> [Reg] + -> [Reg] + -> RegM freeRegs ([instr], [NatBasicBlock instr]) + +genRaInsn block_live new_instrs block_id instr r_dying w_dying = do + dflags <- getDynFlags + let platform = targetPlatform dflags + case regUsageOfInstr platform instr of { RU read written -> + do + let real_written = [ rr | (RegReal rr) <- written ] + let virt_written = [ vr | (RegVirtual vr) <- written ] + + -- we don't need to do anything with real registers that are + -- only read by this instr. (the list is typically ~2 elements, + -- so using nub isn't a problem). + let virt_read = nub [ vr | (RegVirtual vr) <- read ] + + -- debugging +{- freeregs <- getFreeRegsR + assig <- getAssigR + pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform } undefined) trace "genRaInsn" + (ppr instr + $$ text "r_dying = " <+> ppr r_dying + $$ text "w_dying = " <+> ppr w_dying + $$ text "virt_read = " <+> ppr virt_read + $$ text "virt_written = " <+> ppr virt_written + $$ text "freeregs = " <+> text (show freeregs) + $$ text "assig = " <+> ppr assig) + $ do +-} + + -- (a), (b) allocate real regs for all regs read by this instruction. + (r_spills, r_allocd) <- + allocateRegsAndSpill True{-reading-} virt_read [] [] virt_read + + -- (c) save any temporaries which will be clobbered by this instruction + clobber_saves <- saveClobberedTemps real_written r_dying + + -- (d) Update block map for new destinations + -- NB. do this before removing dead regs from the assignment, because + -- these dead regs might in fact be live in the jump targets (they're + -- only dead in the code that follows in the current basic block). + (fixup_blocks, adjusted_instr) + <- joinToTargets block_live block_id instr + + -- Debugging - show places where the reg alloc inserted + -- assignment fixup blocks. + -- when (not $ null fixup_blocks) $ + -- pprTrace "fixup_blocks" (ppr fixup_blocks) (return ()) + + -- (e) Delete all register assignments for temps which are read + -- (only) and die here. Update the free register list. + releaseRegs r_dying + + -- (f) Mark regs which are clobbered as unallocatable + clobberRegs real_written + + -- (g) Allocate registers for temporaries *written* (only) + (w_spills, w_allocd) <- + allocateRegsAndSpill False{-writing-} virt_written [] [] virt_written + + -- (h) Release registers for temps which are written here and not + -- used again. + releaseRegs w_dying + + let + -- (i) Patch the instruction + patch_map + = listToUFM + [ (t, RegReal r) + | (t, r) <- zip virt_read r_allocd + ++ zip virt_written w_allocd ] + + patched_instr + = patchRegsOfInstr adjusted_instr patchLookup + + patchLookup x + = case lookupUFM patch_map x of + Nothing -> x + Just y -> y + + + -- (j) free up stack slots for dead spilled regs + -- TODO (can't be bothered right now) + + -- erase reg->reg moves where the source and destination are the same. + -- If the src temp didn't die in this instr but happened to be allocated + -- to the same real reg as the destination, then we can erase the move anyway. + let squashed_instr = case takeRegRegMoveInstr patched_instr of + Just (src, dst) + | src == dst -> [] + _ -> [patched_instr] + + let code = squashed_instr ++ w_spills ++ reverse r_spills + ++ clobber_saves ++ new_instrs + +-- pprTrace "patched-code" ((vcat $ map (docToSDoc . pprInstr) code)) $ do +-- pprTrace "pached-fixup" ((ppr fixup_blocks)) $ do + + return (code, fixup_blocks) + + } + +-- ----------------------------------------------------------------------------- +-- releaseRegs + +releaseRegs :: FR freeRegs => [Reg] -> RegM freeRegs () +releaseRegs regs = do + dflags <- getDynFlags + let platform = targetPlatform dflags + assig <- getAssigR + free <- getFreeRegsR + let loop assig !free [] = do setAssigR assig; setFreeRegsR free; return () + loop assig !free (RegReal rr : rs) = loop assig (frReleaseReg platform rr free) rs + loop assig !free (r:rs) = + case lookupUFM assig r of + Just (InBoth real _) -> loop (delFromUFM assig r) + (frReleaseReg platform real free) rs + Just (InReg real) -> loop (delFromUFM assig r) + (frReleaseReg platform real free) rs + _ -> loop (delFromUFM assig r) free rs + loop assig free regs + + +-- ----------------------------------------------------------------------------- +-- Clobber real registers + +-- For each temp in a register that is going to be clobbered: +-- - if the temp dies after this instruction, do nothing +-- - otherwise, put it somewhere safe (another reg if possible, +-- otherwise spill and record InBoth in the assignment). +-- - for allocateRegs on the temps *read*, +-- - clobbered regs are allocatable. +-- +-- for allocateRegs on the temps *written*, +-- - clobbered regs are not allocatable. +-- + +saveClobberedTemps + :: (Instruction instr, FR freeRegs) + => [RealReg] -- real registers clobbered by this instruction + -> [Reg] -- registers which are no longer live after this insn + -> RegM freeRegs [instr] -- return: instructions to spill any temps that will + -- be clobbered. + +saveClobberedTemps [] _ + = return [] + +saveClobberedTemps clobbered dying + = do + assig <- getAssigR + let to_spill + = [ (temp,reg) + | (temp, InReg reg) <- nonDetUFMToList assig + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + , any (realRegsAlias reg) clobbered + , temp `notElem` map getUnique dying ] + + (instrs,assig') <- clobber assig [] to_spill + setAssigR assig' + return instrs + + where + clobber assig instrs [] + = return (instrs, assig) + + clobber assig instrs ((temp, reg) : rest) + = do dflags <- getDynFlags + let platform = targetPlatform dflags + + freeRegs <- getFreeRegsR + let regclass = targetClassOfRealReg platform reg + freeRegs_thisClass = frGetFreeRegs platform regclass freeRegs + + case filter (`notElem` clobbered) freeRegs_thisClass of + + -- (1) we have a free reg of the right class that isn't + -- clobbered by this instruction; use it to save the + -- clobbered value. + (my_reg : _) -> do + setFreeRegsR (frAllocateReg platform my_reg freeRegs) + + let new_assign = addToUFM assig temp (InReg my_reg) + let instr = mkRegRegMoveInstr platform + (RegReal reg) (RegReal my_reg) + + clobber new_assign (instr : instrs) rest + + -- (2) no free registers: spill the value + [] -> do + (spill, slot) <- spillR (RegReal reg) temp + + -- record why this reg was spilled for profiling + recordSpill (SpillClobber temp) + + let new_assign = addToUFM assig temp (InBoth reg slot) + + clobber new_assign (spill : instrs) rest + + + +-- | Mark all these real regs as allocated, +-- and kick out their vreg assignments. +-- +clobberRegs :: FR freeRegs => [RealReg] -> RegM freeRegs () +clobberRegs [] + = return () + +clobberRegs clobbered + = do dflags <- getDynFlags + let platform = targetPlatform dflags + + freeregs <- getFreeRegsR + setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered + + assig <- getAssigR + setAssigR $! clobber assig (nonDetUFMToList assig) + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + + where + -- if the temp was InReg and clobbered, then we will have + -- saved it in saveClobberedTemps above. So the only case + -- we have to worry about here is InBoth. Note that this + -- also catches temps which were loaded up during allocation + -- of read registers, not just those saved in saveClobberedTemps. + + clobber assig [] + = assig + + clobber assig ((temp, InBoth reg slot) : rest) + | any (realRegsAlias reg) clobbered + = clobber (addToUFM assig temp (InMem slot)) rest + + clobber assig (_:rest) + = clobber assig rest + +-- ----------------------------------------------------------------------------- +-- allocateRegsAndSpill + +-- Why are we performing a spill? +data SpillLoc = ReadMem StackSlot -- reading from register only in memory + | WriteNew -- writing to a new variable + | WriteMem -- writing to register only in memory +-- Note that ReadNew is not valid, since you don't want to be reading +-- from an uninitialized register. We also don't need the location of +-- the register in memory, since that will be invalidated by the write. +-- Technically, we could coalesce WriteNew and WriteMem into a single +-- entry as well. -- EZY + +-- This function does several things: +-- For each temporary referred to by this instruction, +-- we allocate a real register (spilling another temporary if necessary). +-- We load the temporary up from memory if necessary. +-- We also update the register assignment in the process, and +-- the list of free registers and free stack slots. + +allocateRegsAndSpill + :: (FR freeRegs, Outputable instr, Instruction instr) + => Bool -- True <=> reading (load up spilled regs) + -> [VirtualReg] -- don't push these out + -> [instr] -- spill insns + -> [RealReg] -- real registers allocated (accum.) + -> [VirtualReg] -- temps to allocate + -> RegM freeRegs ( [instr] , [RealReg]) + +allocateRegsAndSpill _ _ spills alloc [] + = return (spills, reverse alloc) + +allocateRegsAndSpill reading keep spills alloc (r:rs) + = do assig <- getAssigR + let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig + case lookupUFM assig r of + -- case (1a): already in a register + Just (InReg my_reg) -> + allocateRegsAndSpill reading keep spills (my_reg:alloc) rs + + -- case (1b): already in a register (and memory) + -- NB1. if we're writing this register, update its assignment to be + -- InReg, because the memory value is no longer valid. + -- NB2. This is why we must process written registers here, even if they + -- are also read by the same instruction. + Just (InBoth my_reg _) + -> do when (not reading) (setAssigR (addToUFM assig r (InReg my_reg))) + allocateRegsAndSpill reading keep spills (my_reg:alloc) rs + + -- Not already in a register, so we need to find a free one... + Just (InMem slot) | reading -> doSpill (ReadMem slot) + | otherwise -> doSpill WriteMem + Nothing | reading -> + pprPanic "allocateRegsAndSpill: Cannot read from uninitialized register" (ppr r) + -- NOTE: if the input to the NCG contains some + -- unreachable blocks with junk code, this panic + -- might be triggered. Make sure you only feed + -- sensible code into the NCG. In GHC.Cmm.Pipeline we + -- call removeUnreachableBlocks at the end for this + -- reason. + + | otherwise -> doSpill WriteNew + + +-- reading is redundant with reason, but we keep it around because it's +-- convenient and it maintains the recursive structure of the allocator. -- EZY +allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr) + => Bool + -> [VirtualReg] + -> [instr] + -> [RealReg] + -> VirtualReg + -> [VirtualReg] + -> UniqFM Loc + -> SpillLoc + -> RegM freeRegs ([instr], [RealReg]) +allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc + = do dflags <- getDynFlags + let platform = targetPlatform dflags + freeRegs <- getFreeRegsR + let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs + + case freeRegs_thisClass of + + -- case (2): we have a free register + (my_reg : _) -> + do spills' <- loadTemp r spill_loc my_reg spills + + setAssigR (addToUFM assig r $! newLocation spill_loc my_reg) + setFreeRegsR $ frAllocateReg platform my_reg freeRegs + + allocateRegsAndSpill reading keep spills' (my_reg : alloc) rs + + + -- case (3): we need to push something out to free up a register + [] -> + do let inRegOrBoth (InReg _) = True + inRegOrBoth (InBoth _ _) = True + inRegOrBoth _ = False + let candidates' = + flip delListFromUFM keep $ + filterUFM inRegOrBoth $ + assig + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + let candidates = nonDetUFMToList candidates' + + -- the vregs we could kick out that are already in a slot + let candidates_inBoth + = [ (temp, reg, mem) + | (temp, InBoth reg mem) <- candidates + , targetClassOfRealReg platform reg == classOfVirtualReg r ] + + -- the vregs we could kick out that are only in a reg + -- this would require writing the reg to a new slot before using it. + let candidates_inReg + = [ (temp, reg) + | (temp, InReg reg) <- candidates + , targetClassOfRealReg platform reg == classOfVirtualReg r ] + + let result + + -- we have a temporary that is in both register and mem, + -- just free up its register for use. + | (temp, my_reg, slot) : _ <- candidates_inBoth + = do spills' <- loadTemp r spill_loc my_reg spills + let assig1 = addToUFM assig temp (InMem slot) + let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg + + setAssigR assig2 + allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs + + -- otherwise, we need to spill a temporary that currently + -- resides in a register. + | (temp_to_push_out, (my_reg :: RealReg)) : _ + <- candidates_inReg + = do + (spill_insn, slot) <- spillR (RegReal my_reg) temp_to_push_out + let spill_store = (if reading then id else reverse) + [ -- COMMENT (fsLit "spill alloc") + spill_insn ] + + -- record that this temp was spilled + recordSpill (SpillAlloc temp_to_push_out) + + -- update the register assignment + let assig1 = addToUFM assig temp_to_push_out (InMem slot) + let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg + setAssigR assig2 + + -- if need be, load up a spilled temp into the reg we've just freed up. + spills' <- loadTemp r spill_loc my_reg spills + + allocateRegsAndSpill reading keep + (spill_store ++ spills') + (my_reg:alloc) rs + + + -- there wasn't anything to spill, so we're screwed. + | otherwise + = pprPanic ("RegAllocLinear.allocRegsAndSpill: no spill candidates\n") + $ vcat + [ text "allocating vreg: " <> text (show r) + , text "assignment: " <> ppr assig + , text "freeRegs: " <> text (show freeRegs) + , text "initFreeRegs: " <> text (show (frInitFreeRegs platform `asTypeOf` freeRegs)) ] + + result + + +-- | Calculate a new location after a register has been loaded. +newLocation :: SpillLoc -> RealReg -> Loc +-- if the tmp was read from a slot, then now its in a reg as well +newLocation (ReadMem slot) my_reg = InBoth my_reg slot +-- writes will always result in only the register being available +newLocation _ my_reg = InReg my_reg + +-- | Load up a spilled temporary if we need to (read from memory). +loadTemp + :: (Instruction instr) + => VirtualReg -- the temp being loaded + -> SpillLoc -- the current location of this temp + -> RealReg -- the hreg to load the temp into + -> [instr] + -> RegM freeRegs [instr] + +loadTemp vreg (ReadMem slot) hreg spills + = do + insn <- loadR (RegReal hreg) slot + recordSpill (SpillLoad $ getUnique vreg) + return $ {- COMMENT (fsLit "spill load") : -} insn : spills + +loadTemp _ _ _ spills = + return spills + diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs new file mode 100644 index 0000000000..43dbab843b --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs @@ -0,0 +1,141 @@ + +-- | Put common type definitions here to break recursive module dependencies. + +module GHC.CmmToAsm.Reg.Linear.Base ( + BlockAssignment, + + Loc(..), + regsOfLoc, + + -- for stats + SpillReason(..), + RegAllocStats(..), + + -- the allocator monad + RA_State(..), +) + +where + +import GhcPrelude + +import GHC.CmmToAsm.Reg.Linear.StackMap +import GHC.CmmToAsm.Reg.Liveness +import GHC.Platform.Reg + +import GHC.Driver.Session +import Outputable +import Unique +import UniqFM +import UniqSupply +import GHC.Cmm.BlockId + + +-- | Used to store the register assignment on entry to a basic block. +-- We use this to handle join points, where multiple branch instructions +-- target a particular label. We have to insert fixup code to make +-- the register assignments from the different sources match up. +-- +type BlockAssignment freeRegs + = BlockMap (freeRegs, RegMap Loc) + + +-- | Where a vreg is currently stored +-- A temporary can be marked as living in both a register and memory +-- (InBoth), for example if it was recently loaded from a spill location. +-- This makes it cheap to spill (no save instruction required), but we +-- have to be careful to turn this into InReg if the value in the +-- register is changed. + +-- This is also useful when a temporary is about to be clobbered. We +-- save it in a spill location, but mark it as InBoth because the current +-- instruction might still want to read it. +-- +data Loc + -- | vreg is in a register + = InReg !RealReg + + -- | vreg is held in a stack slot + | InMem {-# UNPACK #-} !StackSlot + + + -- | vreg is held in both a register and a stack slot + | InBoth !RealReg + {-# UNPACK #-} !StackSlot + deriving (Eq, Show, Ord) + +instance Outputable Loc where + ppr l = text (show l) + + +-- | Get the reg numbers stored in this Loc. +regsOfLoc :: Loc -> [RealReg] +regsOfLoc (InReg r) = [r] +regsOfLoc (InBoth r _) = [r] +regsOfLoc (InMem _) = [] + + +-- | Reasons why instructions might be inserted by the spiller. +-- Used when generating stats for -ddrop-asm-stats. +-- +data SpillReason + -- | vreg was spilled to a slot so we could use its + -- current hreg for another vreg + = SpillAlloc !Unique + + -- | vreg was moved because its hreg was clobbered + | SpillClobber !Unique + + -- | vreg was loaded from a spill slot + | SpillLoad !Unique + + -- | reg-reg move inserted during join to targets + | SpillJoinRR !Unique + + -- | reg-mem move inserted during join to targets + | SpillJoinRM !Unique + + +-- | Used to carry interesting stats out of the register allocator. +data RegAllocStats + = RegAllocStats + { ra_spillInstrs :: UniqFM [Int] + , ra_fixupList :: [(BlockId,BlockId,BlockId)] + -- ^ (from,fixup,to) : We inserted fixup code between from and to + } + + +-- | The register allocator state +data RA_State freeRegs + = RA_State + + { + -- | the current mapping from basic blocks to + -- the register assignments at the beginning of that block. + ra_blockassig :: BlockAssignment freeRegs + + -- | free machine registers + , ra_freeregs :: !freeRegs + + -- | assignment of temps to locations + , ra_assig :: RegMap Loc + + -- | current stack delta + , ra_delta :: Int + + -- | free stack slots for spilling + , ra_stack :: StackMap + + -- | unique supply for generating names for join point fixup blocks. + , ra_us :: UniqSupply + + -- | Record why things were spilled, for -ddrop-asm-stats. + -- Just keep a list here instead of a map of regs -> reasons. + -- We don't want to slow down the allocator if we're not going to emit the stats. + , ra_spills :: [SpillReason] + , ra_DynFlags :: DynFlags + + -- | (from,fixup,to) : We inserted fixup code between from and to + , ra_fixups :: [(BlockId,BlockId,BlockId)] } + + diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs new file mode 100644 index 0000000000..0d72d8b6e9 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs @@ -0,0 +1,89 @@ +{-# LANGUAGE CPP #-} + +module GHC.CmmToAsm.Reg.Linear.FreeRegs ( + FR(..), + maxSpillSlots +) + +#include "HsVersions.h" + +where + +import GhcPrelude + +import GHC.Platform.Reg +import GHC.Platform.Reg.Class + +import GHC.Driver.Session +import Panic +import GHC.Platform + +-- ----------------------------------------------------------------------------- +-- The free register set +-- This needs to be *efficient* +-- Here's an inefficient 'executable specification' of the FreeRegs data type: +-- +-- type FreeRegs = [RegNo] +-- noFreeRegs = 0 +-- releaseReg n f = if n `elem` f then f else (n : f) +-- initFreeRegs = allocatableRegs +-- getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f +-- allocateReg f r = filter (/= r) f + +import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC +import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC +import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 +import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 + +import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr +import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr +import qualified GHC.CmmToAsm.X86.Instr as X86.Instr + +class Show freeRegs => FR freeRegs where + frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs + frGetFreeRegs :: Platform -> RegClass -> freeRegs -> [RealReg] + frInitFreeRegs :: Platform -> freeRegs + frReleaseReg :: Platform -> RealReg -> freeRegs -> freeRegs + +instance FR X86.FreeRegs where + frAllocateReg = \_ -> X86.allocateReg + frGetFreeRegs = X86.getFreeRegs + frInitFreeRegs = X86.initFreeRegs + frReleaseReg = \_ -> X86.releaseReg + +instance FR X86_64.FreeRegs where + frAllocateReg = \_ -> X86_64.allocateReg + frGetFreeRegs = X86_64.getFreeRegs + frInitFreeRegs = X86_64.initFreeRegs + frReleaseReg = \_ -> X86_64.releaseReg + +instance FR PPC.FreeRegs where + frAllocateReg = \_ -> PPC.allocateReg + frGetFreeRegs = \_ -> PPC.getFreeRegs + frInitFreeRegs = PPC.initFreeRegs + frReleaseReg = \_ -> PPC.releaseReg + +instance FR SPARC.FreeRegs where + frAllocateReg = SPARC.allocateReg + frGetFreeRegs = \_ -> SPARC.getFreeRegs + frInitFreeRegs = SPARC.initFreeRegs + frReleaseReg = SPARC.releaseReg + +maxSpillSlots :: DynFlags -> Int +maxSpillSlots dflags + = case platformArch (targetPlatform dflags) of + ArchX86 -> X86.Instr.maxSpillSlots dflags + ArchX86_64 -> X86.Instr.maxSpillSlots dflags + ArchPPC -> PPC.Instr.maxSpillSlots dflags + ArchS390X -> panic "maxSpillSlots ArchS390X" + ArchSPARC -> SPARC.Instr.maxSpillSlots dflags + ArchSPARC64 -> panic "maxSpillSlots ArchSPARC64" + ArchARM _ _ _ -> panic "maxSpillSlots ArchARM" + ArchARM64 -> panic "maxSpillSlots ArchARM64" + ArchPPC_64 _ -> PPC.Instr.maxSpillSlots dflags + ArchAlpha -> panic "maxSpillSlots ArchAlpha" + ArchMipseb -> panic "maxSpillSlots ArchMipseb" + ArchMipsel -> panic "maxSpillSlots ArchMipsel" + ArchJavaScript-> panic "maxSpillSlots ArchJavaScript" + ArchUnknown -> panic "maxSpillSlots ArchUnknown" + diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs new file mode 100644 index 0000000000..b4ad1b948c --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs @@ -0,0 +1,378 @@ +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +-- | Handles joining of a jump instruction to its targets. + +-- The first time we encounter a jump to a particular basic block, we +-- record the assignment of temporaries. The next time we encounter a +-- jump to the same block, we compare our current assignment to the +-- stored one. They might be different if spilling has occurred in one +-- branch; so some fixup code will be required to match up the assignments. +-- +module GHC.CmmToAsm.Reg.Linear.JoinToTargets (joinToTargets) where + +import GhcPrelude + +import GHC.CmmToAsm.Reg.Linear.State +import GHC.CmmToAsm.Reg.Linear.Base +import GHC.CmmToAsm.Reg.Linear.FreeRegs +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Instr +import GHC.Platform.Reg + +import GHC.Cmm.BlockId +import GHC.Cmm.Dataflow.Collections +import Digraph +import GHC.Driver.Session +import Outputable +import Unique +import UniqFM +import UniqSet + +-- | For a jump instruction at the end of a block, generate fixup code so its +-- vregs are in the correct regs for its destination. +-- +joinToTargets + :: (FR freeRegs, Instruction instr, Outputable instr) + => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs + -- that are known to be live on the entry to each block. + + -> BlockId -- ^ id of the current block + -> instr -- ^ branch instr on the end of the source block. + + -> RegM freeRegs ([NatBasicBlock instr] -- fresh blocks of fixup code. + , instr) -- the original branch + -- instruction, but maybe + -- patched to jump + -- to a fixup block first. + +joinToTargets block_live id instr + + -- we only need to worry about jump instructions. + | not $ isJumpishInstr instr + = return ([], instr) + + | otherwise + = joinToTargets' block_live [] id instr (jumpDestsOfInstr instr) + +----- +joinToTargets' + :: (FR freeRegs, Instruction instr, Outputable instr) + => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs + -- that are known to be live on the entry to each block. + + -> [NatBasicBlock instr] -- ^ acc blocks of fixup code. + + -> BlockId -- ^ id of the current block + -> instr -- ^ branch instr on the end of the source block. + + -> [BlockId] -- ^ branch destinations still to consider. + + -> RegM freeRegs ([NatBasicBlock instr], instr) + +-- no more targets to consider. all done. +joinToTargets' _ new_blocks _ instr [] + = return (new_blocks, instr) + +-- handle a branch target. +joinToTargets' block_live new_blocks block_id instr (dest:dests) + = do + -- get the map of where the vregs are stored on entry to each basic block. + block_assig <- getBlockAssigR + + -- get the assignment on entry to the branch instruction. + assig <- getAssigR + + -- adjust the current assignment to remove any vregs that are not live + -- on entry to the destination block. + let Just live_set = mapLookup dest block_live + let still_live uniq _ = uniq `elemUniqSet_Directly` live_set + let adjusted_assig = filterUFM_Directly still_live assig + + -- and free up those registers which are now free. + let to_free = + [ r | (reg, loc) <- nonDetUFMToList assig + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + , not (elemUniqSet_Directly reg live_set) + , r <- regsOfLoc loc ] + + case mapLookup dest block_assig of + Nothing + -> joinToTargets_first + block_live new_blocks block_id instr dest dests + block_assig adjusted_assig to_free + + Just (_, dest_assig) + -> joinToTargets_again + block_live new_blocks block_id instr dest dests + adjusted_assig dest_assig + + +-- this is the first time we jumped to this block. +joinToTargets_first :: (FR freeRegs, Instruction instr, Outputable instr) + => BlockMap RegSet + -> [NatBasicBlock instr] + -> BlockId + -> instr + -> BlockId + -> [BlockId] + -> BlockAssignment freeRegs + -> RegMap Loc + -> [RealReg] + -> RegM freeRegs ([NatBasicBlock instr], instr) +joinToTargets_first block_live new_blocks block_id instr dest dests + block_assig src_assig + to_free + + = do dflags <- getDynFlags + let platform = targetPlatform dflags + + -- free up the regs that are not live on entry to this block. + freeregs <- getFreeRegsR + let freeregs' = foldl' (flip $ frReleaseReg platform) freeregs to_free + + -- remember the current assignment on entry to this block. + setBlockAssigR (mapInsert dest (freeregs', src_assig) block_assig) + + joinToTargets' block_live new_blocks block_id instr dests + + +-- we've jumped to this block before +joinToTargets_again :: (Instruction instr, FR freeRegs, Outputable instr) + => BlockMap RegSet + -> [NatBasicBlock instr] + -> BlockId + -> instr + -> BlockId + -> [BlockId] + -> UniqFM Loc + -> UniqFM Loc + -> RegM freeRegs ([NatBasicBlock instr], instr) +joinToTargets_again + block_live new_blocks block_id instr dest dests + src_assig dest_assig + + -- the assignments already match, no problem. + | nonDetUFMToList dest_assig == nonDetUFMToList src_assig + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + = joinToTargets' block_live new_blocks block_id instr dests + + -- assignments don't match, need fixup code + | otherwise + = do + + -- make a graph of what things need to be moved where. + let graph = makeRegMovementGraph src_assig dest_assig + + -- look for cycles in the graph. This can happen if regs need to be swapped. + -- Note that we depend on the fact that this function does a + -- bottom up traversal of the tree-like portions of the graph. + -- + -- eg, if we have + -- R1 -> R2 -> R3 + -- + -- ie move value in R1 to R2 and value in R2 to R3. + -- + -- We need to do the R2 -> R3 move before R1 -> R2. + -- + let sccs = stronglyConnCompFromEdgedVerticesOrdR graph + + -- debugging + {- + pprTrace + ("joinToTargets: making fixup code") + (vcat [ text " in block: " <> ppr block_id + , text " jmp instruction: " <> ppr instr + , text " src assignment: " <> ppr src_assig + , text " dest assignment: " <> ppr dest_assig + , text " movement graph: " <> ppr graph + , text " sccs of graph: " <> ppr sccs + , text ""]) + (return ()) + -} + delta <- getDeltaR + fixUpInstrs_ <- mapM (handleComponent delta instr) sccs + let fixUpInstrs = concat fixUpInstrs_ + + -- make a new basic block containing the fixup code. + -- A the end of the current block we will jump to the fixup one, + -- then that will jump to our original destination. + fixup_block_id <- mkBlockId <$> getUniqueR + let block = BasicBlock fixup_block_id + $ fixUpInstrs ++ mkJumpInstr dest + + -- if we didn't need any fixups, then don't include the block + case fixUpInstrs of + [] -> joinToTargets' block_live new_blocks block_id instr dests + + -- patch the original branch instruction so it goes to our + -- fixup block instead. + _ -> let instr' = patchJumpInstr instr + (\bid -> if bid == dest + then fixup_block_id + else bid) -- no change! + + in do + {- --debugging + pprTrace "FixUpEdge info:" + ( + text "inBlock:" <> ppr block_id $$ + text "instr:" <> ppr instr $$ + text "instr':" <> ppr instr' $$ + text "fixup_block_id':" <> + ppr fixup_block_id $$ + text "dest:" <> ppr dest + ) (return ()) + -} + recordFixupBlock block_id fixup_block_id dest + joinToTargets' block_live (block : new_blocks) + block_id instr' dests + + +-- | Construct a graph of register\/spill movements. +-- +-- Cyclic components seem to occur only very rarely. +-- +-- We cut some corners by not handling memory-to-memory moves. +-- This shouldn't happen because every temporary gets its own stack slot. +-- +makeRegMovementGraph :: RegMap Loc -> RegMap Loc -> [Node Loc Unique] +makeRegMovementGraph adjusted_assig dest_assig + = [ node | (vreg, src) <- nonDetUFMToList adjusted_assig + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + -- source reg might not be needed at the dest: + , Just loc <- [lookupUFM_Directly dest_assig vreg] + , node <- expandNode vreg src loc ] + + +-- | Expand out the destination, so InBoth destinations turn into +-- a combination of InReg and InMem. + +-- The InBoth handling is a little tricky here. If the destination is +-- InBoth, then we must ensure that the value ends up in both locations. +-- An InBoth destination must conflict with an InReg or InMem source, so +-- we expand an InBoth destination as necessary. +-- +-- An InBoth source is slightly different: we only care about the register +-- that the source value is in, so that we can move it to the destinations. +-- +expandNode + :: a + -> Loc -- ^ source of move + -> Loc -- ^ destination of move + -> [Node Loc a ] + +expandNode vreg loc@(InReg src) (InBoth dst mem) + | src == dst = [DigraphNode vreg loc [InMem mem]] + | otherwise = [DigraphNode vreg loc [InReg dst, InMem mem]] + +expandNode vreg loc@(InMem src) (InBoth dst mem) + | src == mem = [DigraphNode vreg loc [InReg dst]] + | otherwise = [DigraphNode vreg loc [InReg dst, InMem mem]] + +expandNode _ (InBoth _ src) (InMem dst) + | src == dst = [] -- guaranteed to be true + +expandNode _ (InBoth src _) (InReg dst) + | src == dst = [] + +expandNode vreg (InBoth src _) dst + = expandNode vreg (InReg src) dst + +expandNode vreg src dst + | src == dst = [] + | otherwise = [DigraphNode vreg src [dst]] + + +-- | Generate fixup code for a particular component in the move graph +-- This component tells us what values need to be moved to what +-- destinations. We have eliminated any possibility of single-node +-- cycles in expandNode above. +-- +handleComponent + :: Instruction instr + => Int -> instr -> SCC (Node Loc Unique) + -> RegM freeRegs [instr] + +-- If the graph is acyclic then we won't get the swapping problem below. +-- In this case we can just do the moves directly, and avoid having to +-- go via a spill slot. +-- +handleComponent delta _ (AcyclicSCC (DigraphNode vreg src dsts)) + = mapM (makeMove delta vreg src) dsts + + +-- Handle some cyclic moves. +-- This can happen if we have two regs that need to be swapped. +-- eg: +-- vreg source loc dest loc +-- (vreg1, InReg r1, [InReg r2]) +-- (vreg2, InReg r2, [InReg r1]) +-- +-- To avoid needing temp register, we just spill all the source regs, then +-- reaload them into their destination regs. +-- +-- Note that we can not have cycles that involve memory locations as +-- sources as single destination because memory locations (stack slots) +-- are allocated exclusively for a virtual register and therefore can not +-- require a fixup. +-- +handleComponent delta instr + (CyclicSCC ((DigraphNode vreg (InReg sreg) ((InReg dreg: _))) : rest)) + -- dest list may have more than one element, if the reg is also InMem. + = do + -- spill the source into its slot + (instrSpill, slot) + <- spillR (RegReal sreg) vreg + + -- reload into destination reg + instrLoad <- loadR (RegReal dreg) slot + + remainingFixUps <- mapM (handleComponent delta instr) + (stronglyConnCompFromEdgedVerticesOrdR rest) + + -- make sure to do all the reloads after all the spills, + -- so we don't end up clobbering the source values. + return ([instrSpill] ++ concat remainingFixUps ++ [instrLoad]) + +handleComponent _ _ (CyclicSCC _) + = panic "Register Allocator: handleComponent cyclic" + + +-- | Move a vreg between these two locations. +-- +makeMove + :: Instruction instr + => Int -- ^ current C stack delta. + -> Unique -- ^ unique of the vreg that we're moving. + -> Loc -- ^ source location. + -> Loc -- ^ destination location. + -> RegM freeRegs instr -- ^ move instruction. + +makeMove delta vreg src dst + = do dflags <- getDynFlags + let platform = targetPlatform dflags + + case (src, dst) of + (InReg s, InReg d) -> + do recordSpill (SpillJoinRR vreg) + return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d) + (InMem s, InReg d) -> + do recordSpill (SpillJoinRM vreg) + return $ mkLoadInstr dflags (RegReal d) delta s + (InReg s, InMem d) -> + do recordSpill (SpillJoinRM vreg) + return $ mkSpillInstr dflags (RegReal s) delta d + _ -> + -- we don't handle memory to memory moves. + -- they shouldn't happen because we don't share + -- stack slots between vregs. + panic ("makeMove " ++ show vreg ++ " (" ++ show src ++ ") (" + ++ show dst ++ ")" + ++ " we don't handle mem->mem moves.") + diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs new file mode 100644 index 0000000000..ce0a187647 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs @@ -0,0 +1,60 @@ +-- | Free regs map for PowerPC +module GHC.CmmToAsm.Reg.Linear.PPC where + +import GhcPrelude + +import GHC.CmmToAsm.PPC.Regs +import GHC.Platform.Reg.Class +import GHC.Platform.Reg + +import Outputable +import GHC.Platform + +import Data.Word +import Data.Bits + +-- The PowerPC has 32 integer and 32 floating point registers. +-- This is 32bit PowerPC, so Word64 is inefficient - two Word32s are much +-- better. +-- Note that when getFreeRegs scans for free registers, it starts at register +-- 31 and counts down. This is a hack for the PowerPC - the higher-numbered +-- registers are callee-saves, while the lower regs are caller-saves, so it +-- makes sense to start at the high end. +-- Apart from that, the code does nothing PowerPC-specific, so feel free to +-- add your favourite platform to the #if (if you have 64 registers but only +-- 32-bit words). + +data FreeRegs = FreeRegs !Word32 !Word32 + deriving( Show ) -- The Show is used in an ASSERT + +noFreeRegs :: FreeRegs +noFreeRegs = FreeRegs 0 0 + +releaseReg :: RealReg -> FreeRegs -> FreeRegs +releaseReg (RealRegSingle r) (FreeRegs g f) + | r > 31 = FreeRegs g (f .|. (1 `shiftL` (r - 32))) + | otherwise = FreeRegs (g .|. (1 `shiftL` r)) f + +releaseReg _ _ + = panic "RegAlloc.Linear.PPC.releaseReg: bad reg" + +initFreeRegs :: Platform -> FreeRegs +initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) + +getFreeRegs :: RegClass -> FreeRegs -> [RealReg] -- lazily +getFreeRegs cls (FreeRegs g f) + | RcDouble <- cls = go f (0x80000000) 63 + | RcInteger <- cls = go g (0x80000000) 31 + | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class" (ppr cls) + where + go _ 0 _ = [] + go x m i | x .&. m /= 0 = RealRegSingle i : (go x (m `shiftR` 1) $! i-1) + | otherwise = go x (m `shiftR` 1) $! i-1 + +allocateReg :: RealReg -> FreeRegs -> FreeRegs +allocateReg (RealRegSingle r) (FreeRegs g f) + | r > 31 = FreeRegs g (f .&. complement (1 `shiftL` (r - 32))) + | otherwise = FreeRegs (g .&. complement (1 `shiftL` r)) f + +allocateReg _ _ + = panic "RegAlloc.Linear.PPC.allocateReg: bad reg" diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs new file mode 100644 index 0000000000..7fa85f0913 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs @@ -0,0 +1,188 @@ +{-# LANGUAGE CPP #-} + +-- | Free regs map for SPARC +module GHC.CmmToAsm.Reg.Linear.SPARC where + +import GhcPrelude + +import GHC.CmmToAsm.SPARC.Regs +import GHC.Platform.Reg.Class +import GHC.Platform.Reg + +import GHC.Platform.Regs +import Outputable +import GHC.Platform + +import Data.Word +import Data.Bits + + +-------------------------------------------------------------------------------- +-- SPARC is like PPC, except for twinning of floating point regs. +-- When we allocate a double reg we must take an even numbered +-- float reg, as well as the one after it. + + +-- Holds bitmaps showing what registers are currently allocated. +-- The float and double reg bitmaps overlap, but we only alloc +-- float regs into the float map, and double regs into the double map. +-- +-- Free regs have a bit set in the corresponding bitmap. +-- +data FreeRegs + = FreeRegs + !Word32 -- int reg bitmap regs 0..31 + !Word32 -- float reg bitmap regs 32..63 + !Word32 -- double reg bitmap regs 32..63 + +instance Show FreeRegs where + show = showFreeRegs + +-- | A reg map where no regs are free to be allocated. +noFreeRegs :: FreeRegs +noFreeRegs = FreeRegs 0 0 0 + + +-- | The initial set of free regs. +initFreeRegs :: Platform -> FreeRegs +initFreeRegs platform + = foldl' (flip $ releaseReg platform) noFreeRegs allocatableRegs + + +-- | Get all the free registers of this class. +getFreeRegs :: RegClass -> FreeRegs -> [RealReg] -- lazily +getFreeRegs cls (FreeRegs g f d) + | RcInteger <- cls = map RealRegSingle $ go 1 g 1 0 + | RcFloat <- cls = map RealRegSingle $ go 1 f 1 32 + | RcDouble <- cls = map (\i -> RealRegPair i (i+1)) $ go 2 d 1 32 +#if __GLASGOW_HASKELL__ <= 810 + | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class " (ppr cls) +#endif + where + go _ _ 0 _ + = [] + + go step bitmap mask ix + | bitmap .&. mask /= 0 + = ix : (go step bitmap (mask `shiftL` step) $! ix + step) + + | otherwise + = go step bitmap (mask `shiftL` step) $! ix + step + + +-- | Grab a register. +allocateReg :: Platform -> RealReg -> FreeRegs -> FreeRegs +allocateReg platform + reg@(RealRegSingle r) + (FreeRegs g f d) + + -- can't allocate free regs + | not $ freeReg platform r + = pprPanic "SPARC.FreeRegs.allocateReg: not allocating pinned reg" (ppr reg) + + -- a general purpose reg + | r <= 31 + = let mask = complement (bitMask r) + in FreeRegs + (g .&. mask) + f + d + + -- a float reg + | r >= 32, r <= 63 + = let mask = complement (bitMask (r - 32)) + + -- the mask of the double this FP reg aliases + maskLow = if r `mod` 2 == 0 + then complement (bitMask (r - 32)) + else complement (bitMask (r - 32 - 1)) + in FreeRegs + g + (f .&. mask) + (d .&. maskLow) + + | otherwise + = pprPanic "SPARC.FreeRegs.releaseReg: not allocating bad reg" (ppr reg) + +allocateReg _ + reg@(RealRegPair r1 r2) + (FreeRegs g f d) + + | r1 >= 32, r1 <= 63, r1 `mod` 2 == 0 + , r2 >= 32, r2 <= 63 + = let mask1 = complement (bitMask (r1 - 32)) + mask2 = complement (bitMask (r2 - 32)) + in + FreeRegs + g + ((f .&. mask1) .&. mask2) + (d .&. mask1) + + | otherwise + = pprPanic "SPARC.FreeRegs.releaseReg: not allocating bad reg" (ppr reg) + + + +-- | Release a register from allocation. +-- The register liveness information says that most regs die after a C call, +-- but we still don't want to allocate to some of them. +-- +releaseReg :: Platform -> RealReg -> FreeRegs -> FreeRegs +releaseReg platform + reg@(RealRegSingle r) + regs@(FreeRegs g f d) + + -- don't release pinned reg + | not $ freeReg platform r + = regs + + -- a general purpose reg + | r <= 31 + = let mask = bitMask r + in FreeRegs (g .|. mask) f d + + -- a float reg + | r >= 32, r <= 63 + = let mask = bitMask (r - 32) + + -- the mask of the double this FP reg aliases + maskLow = if r `mod` 2 == 0 + then bitMask (r - 32) + else bitMask (r - 32 - 1) + in FreeRegs + g + (f .|. mask) + (d .|. maskLow) + + | otherwise + = pprPanic "SPARC.FreeRegs.releaseReg: not releasing bad reg" (ppr reg) + +releaseReg _ + reg@(RealRegPair r1 r2) + (FreeRegs g f d) + + | r1 >= 32, r1 <= 63, r1 `mod` 2 == 0 + , r2 >= 32, r2 <= 63 + = let mask1 = bitMask (r1 - 32) + mask2 = bitMask (r2 - 32) + in + FreeRegs + g + ((f .|. mask1) .|. mask2) + (d .|. mask1) + + | otherwise + = pprPanic "SPARC.FreeRegs.releaseReg: not releasing bad reg" (ppr reg) + + + +bitMask :: Int -> Word32 +bitMask n = 1 `shiftL` n + + +showFreeRegs :: FreeRegs -> String +showFreeRegs regs + = "FreeRegs\n" + ++ " integer: " ++ (show $ getFreeRegs RcInteger regs) ++ "\n" + ++ " float: " ++ (show $ getFreeRegs RcFloat regs) ++ "\n" + ++ " double: " ++ (show $ getFreeRegs RcDouble regs) ++ "\n" diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs b/compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs new file mode 100644 index 0000000000..630b101fc7 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/StackMap.hs @@ -0,0 +1,61 @@ + +-- | The assignment of virtual registers to stack slots + +-- We have lots of stack slots. Memory-to-memory moves are a pain on most +-- architectures. Therefore, we avoid having to generate memory-to-memory moves +-- by simply giving every virtual register its own stack slot. + +-- The StackMap stack map keeps track of virtual register - stack slot +-- associations and of which stack slots are still free. Once it has been +-- associated, a stack slot is never "freed" or removed from the StackMap again, +-- it remains associated until we are done with the current CmmProc. +-- +module GHC.CmmToAsm.Reg.Linear.StackMap ( + StackSlot, + StackMap(..), + emptyStackMap, + getStackSlotFor, + getStackUse +) + +where + +import GhcPrelude + +import GHC.Driver.Session +import UniqFM +import Unique + + +-- | Identifier for a stack slot. +type StackSlot = Int + +data StackMap + = StackMap + { -- | The slots that are still available to be allocated. + stackMapNextFreeSlot :: !Int + + -- | Assignment of vregs to stack slots. + , stackMapAssignment :: UniqFM StackSlot } + + +-- | An empty stack map, with all slots available. +emptyStackMap :: DynFlags -> StackMap +emptyStackMap _ = StackMap 0 emptyUFM + + +-- | If this vreg unique already has a stack assignment then return the slot number, +-- otherwise allocate a new slot, and update the map. +-- +getStackSlotFor :: StackMap -> Unique -> (StackMap, Int) + +getStackSlotFor fs@(StackMap _ reserved) reg + | Just slot <- lookupUFM reserved reg = (fs, slot) + +getStackSlotFor (StackMap freeSlot reserved) reg = + (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot) + +-- | Return the number of stack slots that were allocated +getStackUse :: StackMap -> Int +getStackUse (StackMap freeSlot _) = freeSlot + diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs new file mode 100644 index 0000000000..a167cc7e00 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs @@ -0,0 +1,184 @@ +{-# LANGUAGE CPP, PatternSynonyms, DeriveFunctor #-} + +#if !defined(GHC_LOADED_INTO_GHCI) +{-# LANGUAGE UnboxedTuples #-} +#endif + +-- | State monad for the linear register allocator. + +-- Here we keep all the state that the register allocator keeps track +-- of as it walks the instructions in a basic block. + +module GHC.CmmToAsm.Reg.Linear.State ( + RA_State(..), + RegM, + runR, + + spillR, + loadR, + + getFreeRegsR, + setFreeRegsR, + + getAssigR, + setAssigR, + + getBlockAssigR, + setBlockAssigR, + + setDeltaR, + getDeltaR, + + getUniqueR, + + recordSpill, + recordFixupBlock +) +where + +import GhcPrelude + +import GHC.CmmToAsm.Reg.Linear.Stats +import GHC.CmmToAsm.Reg.Linear.StackMap +import GHC.CmmToAsm.Reg.Linear.Base +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Instr +import GHC.Platform.Reg +import GHC.Cmm.BlockId + +import GHC.Driver.Session +import Unique +import UniqSupply + +import Control.Monad (ap) + +-- Avoids using unboxed tuples when loading into GHCi +#if !defined(GHC_LOADED_INTO_GHCI) + +type RA_Result freeRegs a = (# RA_State freeRegs, a #) + +pattern RA_Result :: a -> b -> (# a, b #) +pattern RA_Result a b = (# a, b #) +{-# COMPLETE RA_Result #-} +#else + +data RA_Result freeRegs a = RA_Result {-# UNPACK #-} !(RA_State freeRegs) !a + deriving (Functor) + +#endif + +-- | The register allocator monad type. +newtype RegM freeRegs a + = RegM { unReg :: RA_State freeRegs -> RA_Result freeRegs a } + deriving (Functor) + +instance Applicative (RegM freeRegs) where + pure a = RegM $ \s -> RA_Result s a + (<*>) = ap + +instance Monad (RegM freeRegs) where + m >>= k = RegM $ \s -> case unReg m s of { RA_Result s a -> unReg (k a) s } + +instance HasDynFlags (RegM a) where + getDynFlags = RegM $ \s -> RA_Result s (ra_DynFlags s) + + +-- | Run a computation in the RegM register allocator monad. +runR :: DynFlags + -> BlockAssignment freeRegs + -> freeRegs + -> RegMap Loc + -> StackMap + -> UniqSupply + -> RegM freeRegs a + -> (BlockAssignment freeRegs, StackMap, RegAllocStats, a) + +runR dflags block_assig freeregs assig stack us thing = + case unReg thing + (RA_State + { ra_blockassig = block_assig + , ra_freeregs = freeregs + , ra_assig = assig + , ra_delta = 0{-???-} + , ra_stack = stack + , ra_us = us + , ra_spills = [] + , ra_DynFlags = dflags + , ra_fixups = [] }) + of + RA_Result state returned_thing + -> (ra_blockassig state, ra_stack state, makeRAStats state, returned_thing) + + +-- | Make register allocator stats from its final state. +makeRAStats :: RA_State freeRegs -> RegAllocStats +makeRAStats state + = RegAllocStats + { ra_spillInstrs = binSpillReasons (ra_spills state) + , ra_fixupList = ra_fixups state } + + +spillR :: Instruction instr + => Reg -> Unique -> RegM freeRegs (instr, Int) + +spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack0} -> + let dflags = ra_DynFlags s + (stack1,slot) = getStackSlotFor stack0 temp + instr = mkSpillInstr dflags reg delta slot + in + RA_Result s{ra_stack=stack1} (instr,slot) + + +loadR :: Instruction instr + => Reg -> Int -> RegM freeRegs instr + +loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} -> + let dflags = ra_DynFlags s + in RA_Result s (mkLoadInstr dflags reg delta slot) + +getFreeRegsR :: RegM freeRegs freeRegs +getFreeRegsR = RegM $ \ s@RA_State{ra_freeregs = freeregs} -> + RA_Result s freeregs + +setFreeRegsR :: freeRegs -> RegM freeRegs () +setFreeRegsR regs = RegM $ \ s -> + RA_Result s{ra_freeregs = regs} () + +getAssigR :: RegM freeRegs (RegMap Loc) +getAssigR = RegM $ \ s@RA_State{ra_assig = assig} -> + RA_Result s assig + +setAssigR :: RegMap Loc -> RegM freeRegs () +setAssigR assig = RegM $ \ s -> + RA_Result s{ra_assig=assig} () + +getBlockAssigR :: RegM freeRegs (BlockAssignment freeRegs) +getBlockAssigR = RegM $ \ s@RA_State{ra_blockassig = assig} -> + RA_Result s assig + +setBlockAssigR :: BlockAssignment freeRegs -> RegM freeRegs () +setBlockAssigR assig = RegM $ \ s -> + RA_Result s{ra_blockassig = assig} () + +setDeltaR :: Int -> RegM freeRegs () +setDeltaR n = RegM $ \ s -> + RA_Result s{ra_delta = n} () + +getDeltaR :: RegM freeRegs Int +getDeltaR = RegM $ \s -> RA_Result s (ra_delta s) + +getUniqueR :: RegM freeRegs Unique +getUniqueR = RegM $ \s -> + case takeUniqFromSupply (ra_us s) of + (uniq, us) -> RA_Result s{ra_us = us} uniq + + +-- | Record that a spill instruction was inserted, for profiling. +recordSpill :: SpillReason -> RegM freeRegs () +recordSpill spill + = RegM $ \s -> RA_Result (s { ra_spills = spill : ra_spills s }) () + +-- | Record a created fixup block +recordFixupBlock :: BlockId -> BlockId -> BlockId -> RegM freeRegs () +recordFixupBlock from between to + = RegM $ \s -> RA_Result (s { ra_fixups = (from,between,to) : ra_fixups s }) () diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/Stats.hs b/compiler/GHC/CmmToAsm/Reg/Linear/Stats.hs new file mode 100644 index 0000000000..1176b220a3 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/Stats.hs @@ -0,0 +1,87 @@ +module GHC.CmmToAsm.Reg.Linear.Stats ( + binSpillReasons, + countRegRegMovesNat, + pprStats +) + +where + +import GhcPrelude + +import GHC.CmmToAsm.Reg.Linear.Base +import GHC.CmmToAsm.Reg.Liveness +import GHC.CmmToAsm.Instr + +import UniqFM +import Outputable + +import State + +-- | Build a map of how many times each reg was alloced, clobbered, loaded etc. +binSpillReasons + :: [SpillReason] -> UniqFM [Int] + +binSpillReasons reasons + = addListToUFM_C + (zipWith (+)) + emptyUFM + (map (\reason -> case reason of + SpillAlloc r -> (r, [1, 0, 0, 0, 0]) + SpillClobber r -> (r, [0, 1, 0, 0, 0]) + SpillLoad r -> (r, [0, 0, 1, 0, 0]) + SpillJoinRR r -> (r, [0, 0, 0, 1, 0]) + SpillJoinRM r -> (r, [0, 0, 0, 0, 1])) reasons) + + +-- | Count reg-reg moves remaining in this code. +countRegRegMovesNat + :: Instruction instr + => NatCmmDecl statics instr -> Int + +countRegRegMovesNat cmm + = execState (mapGenBlockTopM countBlock cmm) 0 + where + countBlock b@(BasicBlock _ instrs) + = do mapM_ countInstr instrs + return b + + countInstr instr + | Just _ <- takeRegRegMoveInstr instr + = do modify (+ 1) + return instr + + | otherwise + = return instr + + +-- | Pretty print some RegAllocStats +pprStats + :: Instruction instr + => [NatCmmDecl statics instr] -> [RegAllocStats] -> SDoc + +pprStats code statss + = let -- sum up all the instrs inserted by the spiller + spills = foldl' (plusUFM_C (zipWith (+))) + emptyUFM + $ map ra_spillInstrs statss + + spillTotals = foldl' (zipWith (+)) + [0, 0, 0, 0, 0] + $ nonDetEltsUFM spills + -- See Note [Unique Determinism and code generation] + + -- count how many reg-reg-moves remain in the code + moves = sum $ map countRegRegMovesNat code + + pprSpill (reg, spills) + = parens $ (hcat $ punctuate (text ", ") (doubleQuotes (ppr reg) : map ppr spills)) + + in ( text "-- spills-added-total" + $$ text "-- (allocs, clobbers, loads, joinRR, joinRM, reg_reg_moves_remaining)" + $$ (parens $ (hcat $ punctuate (text ", ") (map ppr spillTotals ++ [ppr moves]))) + $$ text "" + $$ text "-- spills-added" + $$ text "-- (reg_name, allocs, clobbers, loads, joinRR, joinRM)" + $$ (pprUFMWithKeys spills (vcat . map pprSpill)) + $$ text "") + diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs new file mode 100644 index 0000000000..ce103bd6b2 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs @@ -0,0 +1,52 @@ + +-- | Free regs map for i386 +module GHC.CmmToAsm.Reg.Linear.X86 where + +import GhcPrelude + +import GHC.CmmToAsm.X86.Regs +import GHC.Platform.Reg.Class +import GHC.Platform.Reg +import Panic +import GHC.Platform + +import Data.Word +import Data.Bits + +newtype FreeRegs = FreeRegs Word32 + deriving Show + +noFreeRegs :: FreeRegs +noFreeRegs = FreeRegs 0 + +releaseReg :: RealReg -> FreeRegs -> FreeRegs +releaseReg (RealRegSingle n) (FreeRegs f) + = FreeRegs (f .|. (1 `shiftL` n)) + +releaseReg _ _ + = panic "RegAlloc.Linear.X86.FreeRegs.releaseReg: no reg" + +initFreeRegs :: Platform -> FreeRegs +initFreeRegs platform + = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) + +getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily +getFreeRegs platform cls (FreeRegs f) = go f 0 + + where go 0 _ = [] + go n m + | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls + = RealRegSingle m : (go (n `shiftR` 1) $! (m+1)) + + | otherwise + = go (n `shiftR` 1) $! (m+1) + -- ToDo: there's no point looking through all the integer registers + -- in order to find a floating-point one. + +allocateReg :: RealReg -> FreeRegs -> FreeRegs +allocateReg (RealRegSingle r) (FreeRegs f) + = FreeRegs (f .&. complement (1 `shiftL` r)) + +allocateReg _ _ + = panic "RegAlloc.Linear.X86.FreeRegs.allocateReg: no reg" + diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs new file mode 100644 index 0000000000..322ddd6bdd --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs @@ -0,0 +1,53 @@ + +-- | Free regs map for x86_64 +module GHC.CmmToAsm.Reg.Linear.X86_64 where + +import GhcPrelude + +import GHC.CmmToAsm.X86.Regs +import GHC.Platform.Reg.Class +import GHC.Platform.Reg +import Panic +import GHC.Platform + +import Data.Word +import Data.Bits + +newtype FreeRegs = FreeRegs Word64 + deriving Show + +noFreeRegs :: FreeRegs +noFreeRegs = FreeRegs 0 + +releaseReg :: RealReg -> FreeRegs -> FreeRegs +releaseReg (RealRegSingle n) (FreeRegs f) + = FreeRegs (f .|. (1 `shiftL` n)) + +releaseReg _ _ + = panic "RegAlloc.Linear.X86_64.FreeRegs.releaseReg: no reg" + +initFreeRegs :: Platform -> FreeRegs +initFreeRegs platform + = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) + +getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily +getFreeRegs platform cls (FreeRegs f) = go f 0 + + where go 0 _ = [] + go n m + | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls + = RealRegSingle m : (go (n `shiftR` 1) $! (m+1)) + + | otherwise + = go (n `shiftR` 1) $! (m+1) + -- ToDo: there's no point looking through all the integer registers + -- in order to find a floating-point one. + +allocateReg :: RealReg -> FreeRegs -> FreeRegs +allocateReg (RealRegSingle r) (FreeRegs f) + = FreeRegs (f .&. complement (1 `shiftL` r)) + +allocateReg _ _ + = panic "RegAlloc.Linear.X86_64.FreeRegs.allocateReg: no reg" + + diff --git a/compiler/GHC/CmmToAsm/Reg/Liveness.hs b/compiler/GHC/CmmToAsm/Reg/Liveness.hs new file mode 100644 index 0000000000..03b8123f93 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Liveness.hs @@ -0,0 +1,1025 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +----------------------------------------------------------------------------- +-- +-- The register liveness determinator +-- +-- (c) The University of Glasgow 2004-2013 +-- +----------------------------------------------------------------------------- + +module GHC.CmmToAsm.Reg.Liveness ( + RegSet, + RegMap, emptyRegMap, + BlockMap, mapEmpty, + LiveCmmDecl, + InstrSR (..), + LiveInstr (..), + Liveness (..), + LiveInfo (..), + LiveBasicBlock, + + mapBlockTop, mapBlockTopM, mapSCCM, + mapGenBlockTop, mapGenBlockTopM, + stripLive, + stripLiveBlock, + slurpConflicts, + slurpReloadCoalesce, + eraseDeltasLive, + patchEraseLive, + patchRegsLiveInstr, + reverseBlocksInTops, + regLiveness, + cmmTopLiveness + ) where +import GhcPrelude + +import GHC.Platform.Reg +import GHC.CmmToAsm.Instr + +import GHC.Cmm.BlockId +import GHC.CmmToAsm.CFG +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label +import GHC.Cmm hiding (RegSet, emptyRegSet) + +import Digraph +import GHC.Driver.Session +import MonadUtils +import Outputable +import GHC.Platform +import UniqSet +import UniqFM +import UniqSupply +import Bag +import State + +import Data.List +import Data.Maybe +import Data.IntSet (IntSet) + +----------------------------------------------------------------------------- +type RegSet = UniqSet Reg + +type RegMap a = UniqFM a + +emptyRegMap :: UniqFM a +emptyRegMap = emptyUFM + +emptyRegSet :: RegSet +emptyRegSet = emptyUniqSet + +type BlockMap a = LabelMap a + + +-- | A top level thing which carries liveness information. +type LiveCmmDecl statics instr + = GenCmmDecl + statics + LiveInfo + [SCC (LiveBasicBlock instr)] + + +-- | The register allocator also wants to use SPILL/RELOAD meta instructions, +-- so we'll keep those here. +data InstrSR instr + -- | A real machine instruction + = Instr instr + + -- | spill this reg to a stack slot + | SPILL Reg Int + + -- | reload this reg from a stack slot + | RELOAD Int Reg + +instance Instruction instr => Instruction (InstrSR instr) where + regUsageOfInstr platform i + = case i of + Instr instr -> regUsageOfInstr platform instr + SPILL reg _ -> RU [reg] [] + RELOAD _ reg -> RU [] [reg] + + patchRegsOfInstr i f + = case i of + Instr instr -> Instr (patchRegsOfInstr instr f) + SPILL reg slot -> SPILL (f reg) slot + RELOAD slot reg -> RELOAD slot (f reg) + + isJumpishInstr i + = case i of + Instr instr -> isJumpishInstr instr + _ -> False + + jumpDestsOfInstr i + = case i of + Instr instr -> jumpDestsOfInstr instr + _ -> [] + + patchJumpInstr i f + = case i of + Instr instr -> Instr (patchJumpInstr instr f) + _ -> i + + mkSpillInstr = error "mkSpillInstr[InstrSR]: Not making SPILL meta-instr" + mkLoadInstr = error "mkLoadInstr[InstrSR]: Not making LOAD meta-instr" + + takeDeltaInstr i + = case i of + Instr instr -> takeDeltaInstr instr + _ -> Nothing + + isMetaInstr i + = case i of + Instr instr -> isMetaInstr instr + _ -> False + + mkRegRegMoveInstr platform r1 r2 + = Instr (mkRegRegMoveInstr platform r1 r2) + + takeRegRegMoveInstr i + = case i of + Instr instr -> takeRegRegMoveInstr instr + _ -> Nothing + + mkJumpInstr target = map Instr (mkJumpInstr target) + + mkStackAllocInstr platform amount = + Instr <$> mkStackAllocInstr platform amount + + mkStackDeallocInstr platform amount = + Instr <$> mkStackDeallocInstr platform amount + + +-- | An instruction with liveness information. +data LiveInstr instr + = LiveInstr (InstrSR instr) (Maybe Liveness) + +-- | Liveness information. +-- The regs which die are ones which are no longer live in the *next* instruction +-- in this sequence. +-- (NB. if the instruction is a jump, these registers might still be live +-- at the jump target(s) - you have to check the liveness at the destination +-- block to find out). + +data Liveness + = Liveness + { liveBorn :: RegSet -- ^ registers born in this instruction (written to for first time). + , liveDieRead :: RegSet -- ^ registers that died because they were read for the last time. + , liveDieWrite :: RegSet } -- ^ registers that died because they were clobbered by something. + + +-- | Stash regs live on entry to each basic block in the info part of the cmm code. +data LiveInfo + = LiveInfo + (LabelMap RawCmmStatics) -- cmm info table static stuff + [BlockId] -- entry points (first one is the + -- entry point for the proc). + (BlockMap RegSet) -- argument locals live on entry to this block + (BlockMap IntSet) -- stack slots live on entry to this block + + +-- | A basic block with liveness information. +type LiveBasicBlock instr + = GenBasicBlock (LiveInstr instr) + + +instance Outputable instr + => Outputable (InstrSR instr) where + + ppr (Instr realInstr) + = ppr realInstr + + ppr (SPILL reg slot) + = hcat [ + text "\tSPILL", + char ' ', + ppr reg, + comma, + text "SLOT" <> parens (int slot)] + + ppr (RELOAD slot reg) + = hcat [ + text "\tRELOAD", + char ' ', + text "SLOT" <> parens (int slot), + comma, + ppr reg] + +instance Outputable instr + => Outputable (LiveInstr instr) where + + ppr (LiveInstr instr Nothing) + = ppr instr + + ppr (LiveInstr instr (Just live)) + = ppr instr + $$ (nest 8 + $ vcat + [ pprRegs (text "# born: ") (liveBorn live) + , pprRegs (text "# r_dying: ") (liveDieRead live) + , pprRegs (text "# w_dying: ") (liveDieWrite live) ] + $+$ space) + + where pprRegs :: SDoc -> RegSet -> SDoc + pprRegs name regs + | isEmptyUniqSet regs = empty + | otherwise = name <> + (pprUFM (getUniqSet regs) (hcat . punctuate space . map ppr)) + +instance Outputable LiveInfo where + ppr (LiveInfo mb_static entryIds liveVRegsOnEntry liveSlotsOnEntry) + = (ppr mb_static) + $$ text "# entryIds = " <> ppr entryIds + $$ text "# liveVRegsOnEntry = " <> ppr liveVRegsOnEntry + $$ text "# liveSlotsOnEntry = " <> text (show liveSlotsOnEntry) + + + +-- | map a function across all the basic blocks in this code +-- +mapBlockTop + :: (LiveBasicBlock instr -> LiveBasicBlock instr) + -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr + +mapBlockTop f cmm + = evalState (mapBlockTopM (\x -> return $ f x) cmm) () + + +-- | map a function across all the basic blocks in this code (monadic version) +-- +mapBlockTopM + :: Monad m + => (LiveBasicBlock instr -> m (LiveBasicBlock instr)) + -> LiveCmmDecl statics instr -> m (LiveCmmDecl statics instr) + +mapBlockTopM _ cmm@(CmmData{}) + = return cmm + +mapBlockTopM f (CmmProc header label live sccs) + = do sccs' <- mapM (mapSCCM f) sccs + return $ CmmProc header label live sccs' + +mapSCCM :: Monad m => (a -> m b) -> SCC a -> m (SCC b) +mapSCCM f (AcyclicSCC x) + = do x' <- f x + return $ AcyclicSCC x' + +mapSCCM f (CyclicSCC xs) + = do xs' <- mapM f xs + return $ CyclicSCC xs' + + +-- map a function across all the basic blocks in this code +mapGenBlockTop + :: (GenBasicBlock i -> GenBasicBlock i) + -> (GenCmmDecl d h (ListGraph i) -> GenCmmDecl d h (ListGraph i)) + +mapGenBlockTop f cmm + = evalState (mapGenBlockTopM (\x -> return $ f x) cmm) () + + +-- | map a function across all the basic blocks in this code (monadic version) +mapGenBlockTopM + :: Monad m + => (GenBasicBlock i -> m (GenBasicBlock i)) + -> (GenCmmDecl d h (ListGraph i) -> m (GenCmmDecl d h (ListGraph i))) + +mapGenBlockTopM _ cmm@(CmmData{}) + = return cmm + +mapGenBlockTopM f (CmmProc header label live (ListGraph blocks)) + = do blocks' <- mapM f blocks + return $ CmmProc header label live (ListGraph blocks') + + +-- | Slurp out the list of register conflicts and reg-reg moves from this top level thing. +-- Slurping of conflicts and moves is wrapped up together so we don't have +-- to make two passes over the same code when we want to build the graph. +-- +slurpConflicts + :: Instruction instr + => LiveCmmDecl statics instr + -> (Bag (UniqSet Reg), Bag (Reg, Reg)) + +slurpConflicts live + = slurpCmm (emptyBag, emptyBag) live + + where slurpCmm rs CmmData{} = rs + slurpCmm rs (CmmProc info _ _ sccs) + = foldl' (slurpSCC info) rs sccs + + slurpSCC info rs (AcyclicSCC b) + = slurpBlock info rs b + + slurpSCC info rs (CyclicSCC bs) + = foldl' (slurpBlock info) rs bs + + slurpBlock info rs (BasicBlock blockId instrs) + | LiveInfo _ _ blockLive _ <- info + , Just rsLiveEntry <- mapLookup blockId blockLive + , (conflicts, moves) <- slurpLIs rsLiveEntry rs instrs + = (consBag rsLiveEntry conflicts, moves) + + | otherwise + = panic "Liveness.slurpConflicts: bad block" + + slurpLIs rsLive (conflicts, moves) [] + = (consBag rsLive conflicts, moves) + + slurpLIs rsLive rs (LiveInstr _ Nothing : lis) + = slurpLIs rsLive rs lis + + slurpLIs rsLiveEntry (conflicts, moves) (LiveInstr instr (Just live) : lis) + = let + -- regs that die because they are read for the last time at the start of an instruction + -- are not live across it. + rsLiveAcross = rsLiveEntry `minusUniqSet` (liveDieRead live) + + -- regs live on entry to the next instruction. + -- be careful of orphans, make sure to delete dying regs _after_ unioning + -- in the ones that are born here. + rsLiveNext = (rsLiveAcross `unionUniqSets` (liveBorn live)) + `minusUniqSet` (liveDieWrite live) + + -- orphan vregs are the ones that die in the same instruction they are born in. + -- these are likely to be results that are never used, but we still + -- need to assign a hreg to them.. + rsOrphans = intersectUniqSets + (liveBorn live) + (unionUniqSets (liveDieWrite live) (liveDieRead live)) + + -- + rsConflicts = unionUniqSets rsLiveNext rsOrphans + + in case takeRegRegMoveInstr instr of + Just rr -> slurpLIs rsLiveNext + ( consBag rsConflicts conflicts + , consBag rr moves) lis + + Nothing -> slurpLIs rsLiveNext + ( consBag rsConflicts conflicts + , moves) lis + + +-- | For spill\/reloads +-- +-- SPILL v1, slot1 +-- ... +-- RELOAD slot1, v2 +-- +-- If we can arrange that v1 and v2 are allocated to the same hreg it's more likely +-- the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move. +-- +-- +slurpReloadCoalesce + :: forall statics instr. Instruction instr + => LiveCmmDecl statics instr + -> Bag (Reg, Reg) + +slurpReloadCoalesce live + = slurpCmm emptyBag live + + where + slurpCmm :: Bag (Reg, Reg) + -> GenCmmDecl t t1 [SCC (LiveBasicBlock instr)] + -> Bag (Reg, Reg) + slurpCmm cs CmmData{} = cs + slurpCmm cs (CmmProc _ _ _ sccs) + = slurpComp cs (flattenSCCs sccs) + + slurpComp :: Bag (Reg, Reg) + -> [LiveBasicBlock instr] + -> Bag (Reg, Reg) + slurpComp cs blocks + = let (moveBags, _) = runState (slurpCompM blocks) emptyUFM + in unionManyBags (cs : moveBags) + + slurpCompM :: [LiveBasicBlock instr] + -> State (UniqFM [UniqFM Reg]) [Bag (Reg, Reg)] + slurpCompM blocks + = do -- run the analysis once to record the mapping across jumps. + mapM_ (slurpBlock False) blocks + + -- run it a second time while using the information from the last pass. + -- We /could/ run this many more times to deal with graphical control + -- flow and propagating info across multiple jumps, but it's probably + -- not worth the trouble. + mapM (slurpBlock True) blocks + + slurpBlock :: Bool -> LiveBasicBlock instr + -> State (UniqFM [UniqFM Reg]) (Bag (Reg, Reg)) + slurpBlock propagate (BasicBlock blockId instrs) + = do -- grab the slot map for entry to this block + slotMap <- if propagate + then getSlotMap blockId + else return emptyUFM + + (_, mMoves) <- mapAccumLM slurpLI slotMap instrs + return $ listToBag $ catMaybes mMoves + + slurpLI :: UniqFM Reg -- current slotMap + -> LiveInstr instr + -> State (UniqFM [UniqFM Reg]) -- blockId -> [slot -> reg] + -- for tracking slotMaps across jumps + + ( UniqFM Reg -- new slotMap + , Maybe (Reg, Reg)) -- maybe a new coalesce edge + + slurpLI slotMap li + + -- remember what reg was stored into the slot + | LiveInstr (SPILL reg slot) _ <- li + , slotMap' <- addToUFM slotMap slot reg + = return (slotMap', Nothing) + + -- add an edge between the this reg and the last one stored into the slot + | LiveInstr (RELOAD slot reg) _ <- li + = case lookupUFM slotMap slot of + Just reg2 + | reg /= reg2 -> return (slotMap, Just (reg, reg2)) + | otherwise -> return (slotMap, Nothing) + + Nothing -> return (slotMap, Nothing) + + -- if we hit a jump, remember the current slotMap + | LiveInstr (Instr instr) _ <- li + , targets <- jumpDestsOfInstr instr + , not $ null targets + = do mapM_ (accSlotMap slotMap) targets + return (slotMap, Nothing) + + | otherwise + = return (slotMap, Nothing) + + -- record a slotmap for an in edge to this block + accSlotMap slotMap blockId + = modify (\s -> addToUFM_C (++) s blockId [slotMap]) + + -- work out the slot map on entry to this block + -- if we have slot maps for multiple in-edges then we need to merge them. + getSlotMap blockId + = do map <- get + let slotMaps = fromMaybe [] (lookupUFM map blockId) + return $ foldr mergeSlotMaps emptyUFM slotMaps + + mergeSlotMaps :: UniqFM Reg -> UniqFM Reg -> UniqFM Reg + mergeSlotMaps map1 map2 + = listToUFM + $ [ (k, r1) + | (k, r1) <- nonDetUFMToList map1 + -- This is non-deterministic but we do not + -- currently support deterministic code-generation. + -- See Note [Unique Determinism and code generation] + , case lookupUFM map2 k of + Nothing -> False + Just r2 -> r1 == r2 ] + + +-- | Strip away liveness information, yielding NatCmmDecl +stripLive + :: (Outputable statics, Outputable instr, Instruction instr) + => DynFlags + -> LiveCmmDecl statics instr + -> NatCmmDecl statics instr + +stripLive dflags live + = stripCmm live + + where stripCmm :: (Outputable statics, Outputable instr, Instruction instr) + => LiveCmmDecl statics instr -> NatCmmDecl statics instr + stripCmm (CmmData sec ds) = CmmData sec ds + stripCmm (CmmProc (LiveInfo info (first_id:_) _ _) label live sccs) + = let final_blocks = flattenSCCs sccs + + -- make sure the block that was first in the input list + -- stays at the front of the output. This is the entry point + -- of the proc, and it needs to come first. + ((first':_), rest') + = partition ((== first_id) . blockId) final_blocks + + in CmmProc info label live + (ListGraph $ map (stripLiveBlock dflags) $ first' : rest') + + -- If the proc has blocks but we don't know what the first one was, then we're dead. + stripCmm proc + = pprPanic "RegAlloc.Liveness.stripLive: no first_id on proc" (ppr proc) + +-- | Strip away liveness information from a basic block, +-- and make real spill instructions out of SPILL, RELOAD pseudos along the way. + +stripLiveBlock + :: Instruction instr + => DynFlags + -> LiveBasicBlock instr + -> NatBasicBlock instr + +stripLiveBlock dflags (BasicBlock i lis) + = BasicBlock i instrs' + + where (instrs', _) + = runState (spillNat [] lis) 0 + + spillNat acc [] + = return (reverse acc) + + spillNat acc (LiveInstr (SPILL reg slot) _ : instrs) + = do delta <- get + spillNat (mkSpillInstr dflags reg delta slot : acc) instrs + + spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs) + = do delta <- get + spillNat (mkLoadInstr dflags reg delta slot : acc) instrs + + spillNat acc (LiveInstr (Instr instr) _ : instrs) + | Just i <- takeDeltaInstr instr + = do put i + spillNat acc instrs + + spillNat acc (LiveInstr (Instr instr) _ : instrs) + = spillNat (instr : acc) instrs + + +-- | Erase Delta instructions. + +eraseDeltasLive + :: Instruction instr + => LiveCmmDecl statics instr + -> LiveCmmDecl statics instr + +eraseDeltasLive cmm + = mapBlockTop eraseBlock cmm + where + eraseBlock (BasicBlock id lis) + = BasicBlock id + $ filter (\(LiveInstr i _) -> not $ isJust $ takeDeltaInstr i) + $ lis + + +-- | Patch the registers in this code according to this register mapping. +-- also erase reg -> reg moves when the reg is the same. +-- also erase reg -> reg moves when the destination dies in this instr. +patchEraseLive + :: Instruction instr + => (Reg -> Reg) + -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr + +patchEraseLive patchF cmm + = patchCmm cmm + where + patchCmm cmm@CmmData{} = cmm + + patchCmm (CmmProc info label live sccs) + | LiveInfo static id blockMap mLiveSlots <- info + = let + patchRegSet set = mkUniqSet $ map patchF $ nonDetEltsUFM set + -- See Note [Unique Determinism and code generation] + blockMap' = mapMap (patchRegSet . getUniqSet) blockMap + + info' = LiveInfo static id blockMap' mLiveSlots + in CmmProc info' label live $ map patchSCC sccs + + patchSCC (AcyclicSCC b) = AcyclicSCC (patchBlock b) + patchSCC (CyclicSCC bs) = CyclicSCC (map patchBlock bs) + + patchBlock (BasicBlock id lis) + = BasicBlock id $ patchInstrs lis + + patchInstrs [] = [] + patchInstrs (li : lis) + + | LiveInstr i (Just live) <- li' + , Just (r1, r2) <- takeRegRegMoveInstr i + , eatMe r1 r2 live + = patchInstrs lis + + | otherwise + = li' : patchInstrs lis + + where li' = patchRegsLiveInstr patchF li + + eatMe r1 r2 live + -- source and destination regs are the same + | r1 == r2 = True + + -- destination reg is never used + | elementOfUniqSet r2 (liveBorn live) + , elementOfUniqSet r2 (liveDieRead live) || elementOfUniqSet r2 (liveDieWrite live) + = True + + | otherwise = False + + +-- | Patch registers in this LiveInstr, including the liveness information. +-- +patchRegsLiveInstr + :: Instruction instr + => (Reg -> Reg) + -> LiveInstr instr -> LiveInstr instr + +patchRegsLiveInstr patchF li + = case li of + LiveInstr instr Nothing + -> LiveInstr (patchRegsOfInstr instr patchF) Nothing + + LiveInstr instr (Just live) + -> LiveInstr + (patchRegsOfInstr instr patchF) + (Just live + { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg + liveBorn = mapUniqSet patchF $ liveBorn live + , liveDieRead = mapUniqSet patchF $ liveDieRead live + , liveDieWrite = mapUniqSet patchF $ liveDieWrite live }) + -- See Note [Unique Determinism and code generation] + + +-------------------------------------------------------------------------------- +-- | Convert a NatCmmDecl to a LiveCmmDecl, with liveness information + +cmmTopLiveness + :: (Outputable instr, Instruction instr) + => Maybe CFG -> Platform + -> NatCmmDecl statics instr + -> UniqSM (LiveCmmDecl statics instr) +cmmTopLiveness cfg platform cmm + = regLiveness platform $ natCmmTopToLive cfg cmm + +natCmmTopToLive + :: (Instruction instr, Outputable instr) + => Maybe CFG -> NatCmmDecl statics instr + -> LiveCmmDecl statics instr + +natCmmTopToLive _ (CmmData i d) + = CmmData i d + +natCmmTopToLive _ (CmmProc info lbl live (ListGraph [])) + = CmmProc (LiveInfo info [] mapEmpty mapEmpty) lbl live [] + +natCmmTopToLive mCfg proc@(CmmProc info lbl live (ListGraph blocks@(first : _))) + = CmmProc (LiveInfo info' (first_id : entry_ids) mapEmpty mapEmpty) + lbl live sccsLive + where + first_id = blockId first + all_entry_ids = entryBlocks proc + sccs = sccBlocks blocks all_entry_ids mCfg + sccsLive = map (fmap (\(BasicBlock l instrs) -> + BasicBlock l (map (\i -> LiveInstr (Instr i) Nothing) instrs))) + $ sccs + + entry_ids = filter (reachable_node) . + filter (/= first_id) $ all_entry_ids + info' = mapFilterWithKey (\node _ -> reachable_node node) info + reachable_node + | Just cfg <- mCfg + = hasNode cfg + | otherwise + = const True + +-- +-- Compute the liveness graph of the set of basic blocks. Important: +-- we also discard any unreachable code here, starting from the entry +-- points (the first block in the list, and any blocks with info +-- tables). Unreachable code arises when code blocks are orphaned in +-- earlier optimisation passes, and may confuse the register allocator +-- by referring to registers that are not initialised. It's easy to +-- discard the unreachable code as part of the SCC pass, so that's +-- exactly what we do. (#7574) +-- +sccBlocks + :: forall instr . Instruction instr + => [NatBasicBlock instr] + -> [BlockId] + -> Maybe CFG + -> [SCC (NatBasicBlock instr)] + +sccBlocks blocks entries mcfg = map (fmap node_payload) sccs + where + nodes :: [ Node BlockId (NatBasicBlock instr) ] + nodes = [ DigraphNode block id (getOutEdges instrs) + | block@(BasicBlock id instrs) <- blocks ] + + g1 = graphFromEdgedVerticesUniq nodes + + reachable :: LabelSet + reachable + | Just cfg <- mcfg + -- Our CFG only contains reachable nodes by construction at this point. + = setFromList $ getCfgNodes cfg + | otherwise + = setFromList $ [ node_key node | node <- reachablesG g1 roots ] + + g2 = graphFromEdgedVerticesUniq [ node | node <- nodes + , node_key node + `setMember` reachable ] + + sccs = stronglyConnCompG g2 + + getOutEdges :: Instruction instr => [instr] -> [BlockId] + getOutEdges instrs = concatMap jumpDestsOfInstr instrs + + -- This is truly ugly, but I don't see a good alternative. + -- Digraph just has the wrong API. We want to identify nodes + -- by their keys (BlockId), but Digraph requires the whole + -- node: (NatBasicBlock, BlockId, [BlockId]). This takes + -- advantage of the fact that Digraph only looks at the key, + -- even though it asks for the whole triple. + roots = [DigraphNode (panic "sccBlocks") b (panic "sccBlocks") + | b <- entries ] + +-------------------------------------------------------------------------------- +-- Annotate code with register liveness information +-- + +regLiveness + :: (Outputable instr, Instruction instr) + => Platform + -> LiveCmmDecl statics instr + -> UniqSM (LiveCmmDecl statics instr) + +regLiveness _ (CmmData i d) + = return $ CmmData i d + +regLiveness _ (CmmProc info lbl live []) + | LiveInfo static mFirst _ _ <- info + = return $ CmmProc + (LiveInfo static mFirst mapEmpty mapEmpty) + lbl live [] + +regLiveness platform (CmmProc info lbl live sccs) + | LiveInfo static mFirst _ liveSlotsOnEntry <- info + = let (ann_sccs, block_live) = computeLiveness platform sccs + + in return $ CmmProc (LiveInfo static mFirst block_live liveSlotsOnEntry) + lbl live ann_sccs + + +-- ----------------------------------------------------------------------------- +-- | Check ordering of Blocks +-- The computeLiveness function requires SCCs to be in reverse +-- dependent order. If they're not the liveness information will be +-- wrong, and we'll get a bad allocation. Better to check for this +-- precondition explicitly or some other poor sucker will waste a +-- day staring at bad assembly code.. +-- +checkIsReverseDependent + :: Instruction instr + => [SCC (LiveBasicBlock instr)] -- ^ SCCs of blocks that we're about to run the liveness determinator on. + -> Maybe BlockId -- ^ BlockIds that fail the test (if any) + +checkIsReverseDependent sccs' + = go emptyUniqSet sccs' + + where go _ [] + = Nothing + + go blocksSeen (AcyclicSCC block : sccs) + = let dests = slurpJumpDestsOfBlock block + blocksSeen' = unionUniqSets blocksSeen $ mkUniqSet [blockId block] + badDests = dests `minusUniqSet` blocksSeen' + in case nonDetEltsUniqSet badDests of + -- See Note [Unique Determinism and code generation] + [] -> go blocksSeen' sccs + bad : _ -> Just bad + + go blocksSeen (CyclicSCC blocks : sccs) + = let dests = unionManyUniqSets $ map slurpJumpDestsOfBlock blocks + blocksSeen' = unionUniqSets blocksSeen $ mkUniqSet $ map blockId blocks + badDests = dests `minusUniqSet` blocksSeen' + in case nonDetEltsUniqSet badDests of + -- See Note [Unique Determinism and code generation] + [] -> go blocksSeen' sccs + bad : _ -> Just bad + + slurpJumpDestsOfBlock (BasicBlock _ instrs) + = unionManyUniqSets + $ map (mkUniqSet . jumpDestsOfInstr) + [ i | LiveInstr i _ <- instrs] + + +-- | If we've compute liveness info for this code already we have to reverse +-- the SCCs in each top to get them back to the right order so we can do it again. +reverseBlocksInTops :: LiveCmmDecl statics instr -> LiveCmmDecl statics instr +reverseBlocksInTops top + = case top of + CmmData{} -> top + CmmProc info lbl live sccs -> CmmProc info lbl live (reverse sccs) + + +-- | Computing liveness +-- +-- On entry, the SCCs must be in "reverse" order: later blocks may transfer +-- control to earlier ones only, else `panic`. +-- +-- The SCCs returned are in the *opposite* order, which is exactly what we +-- want for the next pass. +-- +computeLiveness + :: (Outputable instr, Instruction instr) + => Platform + -> [SCC (LiveBasicBlock instr)] + -> ([SCC (LiveBasicBlock instr)], -- instructions annotated with list of registers + -- which are "dead after this instruction". + BlockMap RegSet) -- blocks annotated with set of live registers + -- on entry to the block. + +computeLiveness platform sccs + = case checkIsReverseDependent sccs of + Nothing -> livenessSCCs platform mapEmpty [] sccs + Just bad -> pprPanic "RegAlloc.Liveness.computeLiveness" + (vcat [ text "SCCs aren't in reverse dependent order" + , text "bad blockId" <+> ppr bad + , ppr sccs]) + +livenessSCCs + :: Instruction instr + => Platform + -> BlockMap RegSet + -> [SCC (LiveBasicBlock instr)] -- accum + -> [SCC (LiveBasicBlock instr)] + -> ( [SCC (LiveBasicBlock instr)] + , BlockMap RegSet) + +livenessSCCs _ blockmap done [] + = (done, blockmap) + +livenessSCCs platform blockmap done (AcyclicSCC block : sccs) + = let (blockmap', block') = livenessBlock platform blockmap block + in livenessSCCs platform blockmap' (AcyclicSCC block' : done) sccs + +livenessSCCs platform blockmap done + (CyclicSCC blocks : sccs) = + livenessSCCs platform blockmap' (CyclicSCC blocks':done) sccs + where (blockmap', blocks') + = iterateUntilUnchanged linearLiveness equalBlockMaps + blockmap blocks + + iterateUntilUnchanged + :: (a -> b -> (a,c)) -> (a -> a -> Bool) + -> a -> b + -> (a,c) + + iterateUntilUnchanged f eq a b + = head $ + concatMap tail $ + groupBy (\(a1, _) (a2, _) -> eq a1 a2) $ + iterate (\(a, _) -> f a b) $ + (a, panic "RegLiveness.livenessSCCs") + + + linearLiveness + :: Instruction instr + => BlockMap RegSet -> [LiveBasicBlock instr] + -> (BlockMap RegSet, [LiveBasicBlock instr]) + + linearLiveness = mapAccumL (livenessBlock platform) + + -- probably the least efficient way to compare two + -- BlockMaps for equality. + equalBlockMaps a b + = a' == b' + where a' = map f $ mapToList a + b' = map f $ mapToList b + f (key,elt) = (key, nonDetEltsUniqSet elt) + -- See Note [Unique Determinism and code generation] + + + +-- | Annotate a basic block with register liveness information. +-- +livenessBlock + :: Instruction instr + => Platform + -> BlockMap RegSet + -> LiveBasicBlock instr + -> (BlockMap RegSet, LiveBasicBlock instr) + +livenessBlock platform blockmap (BasicBlock block_id instrs) + = let + (regsLiveOnEntry, instrs1) + = livenessBack platform emptyUniqSet blockmap [] (reverse instrs) + blockmap' = mapInsert block_id regsLiveOnEntry blockmap + + instrs2 = livenessForward platform regsLiveOnEntry instrs1 + + output = BasicBlock block_id instrs2 + + in ( blockmap', output) + +-- | Calculate liveness going forwards, +-- filling in when regs are born + +livenessForward + :: Instruction instr + => Platform + -> RegSet -- regs live on this instr + -> [LiveInstr instr] -> [LiveInstr instr] + +livenessForward _ _ [] = [] +livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis) + | Just live <- mLive + = let + RU _ written = regUsageOfInstr platform instr + -- Regs that are written to but weren't live on entry to this instruction + -- are recorded as being born here. + rsBorn = mkUniqSet + $ filter (\r -> not $ elementOfUniqSet r rsLiveEntry) written + + rsLiveNext = (rsLiveEntry `unionUniqSets` rsBorn) + `minusUniqSet` (liveDieRead live) + `minusUniqSet` (liveDieWrite live) + + in LiveInstr instr (Just live { liveBorn = rsBorn }) + : livenessForward platform rsLiveNext lis + + | otherwise + = li : livenessForward platform rsLiveEntry lis + + +-- | Calculate liveness going backwards, +-- filling in when regs die, and what regs are live across each instruction + +livenessBack + :: Instruction instr + => Platform + -> RegSet -- regs live on this instr + -> BlockMap RegSet -- regs live on entry to other BBs + -> [LiveInstr instr] -- instructions (accum) + -> [LiveInstr instr] -- instructions + -> (RegSet, [LiveInstr instr]) + +livenessBack _ liveregs _ done [] = (liveregs, done) + +livenessBack platform liveregs blockmap acc (instr : instrs) + = let (liveregs', instr') = liveness1 platform liveregs blockmap instr + in livenessBack platform liveregs' blockmap (instr' : acc) instrs + + +-- don't bother tagging comments or deltas with liveness +liveness1 + :: Instruction instr + => Platform + -> RegSet + -> BlockMap RegSet + -> LiveInstr instr + -> (RegSet, LiveInstr instr) + +liveness1 _ liveregs _ (LiveInstr instr _) + | isMetaInstr instr + = (liveregs, LiveInstr instr Nothing) + +liveness1 platform liveregs blockmap (LiveInstr instr _) + + | not_a_branch + = (liveregs1, LiveInstr instr + (Just $ Liveness + { liveBorn = emptyUniqSet + , liveDieRead = mkUniqSet r_dying + , liveDieWrite = mkUniqSet w_dying })) + + | otherwise + = (liveregs_br, LiveInstr instr + (Just $ Liveness + { liveBorn = emptyUniqSet + , liveDieRead = mkUniqSet r_dying_br + , liveDieWrite = mkUniqSet w_dying })) + + where + !(RU read written) = regUsageOfInstr platform instr + + -- registers that were written here are dead going backwards. + -- registers that were read here are live going backwards. + liveregs1 = (liveregs `delListFromUniqSet` written) + `addListToUniqSet` read + + -- registers that are not live beyond this point, are recorded + -- as dying here. + r_dying = [ reg | reg <- read, reg `notElem` written, + not (elementOfUniqSet reg liveregs) ] + + w_dying = [ reg | reg <- written, + not (elementOfUniqSet reg liveregs) ] + + -- union in the live regs from all the jump destinations of this + -- instruction. + targets = jumpDestsOfInstr instr -- where we go from here + not_a_branch = null targets + + targetLiveRegs target + = case mapLookup target blockmap of + Just ra -> ra + Nothing -> emptyRegSet + + live_from_branch = unionManyUniqSets (map targetLiveRegs targets) + + liveregs_br = liveregs1 `unionUniqSets` live_from_branch + + -- registers that are live only in the branch targets should + -- be listed as dying here. + live_branch_only = live_from_branch `minusUniqSet` liveregs + r_dying_br = nonDetEltsUniqSet (mkUniqSet r_dying `unionUniqSets` + live_branch_only) + -- See Note [Unique Determinism and code generation] diff --git a/compiler/GHC/CmmToAsm/Reg/Target.hs b/compiler/GHC/CmmToAsm/Reg/Target.hs new file mode 100644 index 0000000000..a45d70c826 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Target.hs @@ -0,0 +1,135 @@ +{-# LANGUAGE CPP #-} +-- | Hard wired things related to registers. +-- This is module is preventing the native code generator being able to +-- emit code for non-host architectures. +-- +-- TODO: Do a better job of the overloading, and eliminate this module. +-- We'd probably do better with a Register type class, and hook this to +-- Instruction somehow. +-- +-- TODO: We should also make arch specific versions of RegAlloc.Graph.TrivColorable +module GHC.CmmToAsm.Reg.Target ( + targetVirtualRegSqueeze, + targetRealRegSqueeze, + targetClassOfRealReg, + targetMkVirtualReg, + targetRegDotColor, + targetClassOfReg +) + +where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Platform.Reg +import GHC.Platform.Reg.Class +import GHC.CmmToAsm.Format + +import Outputable +import Unique +import GHC.Platform + +import qualified GHC.CmmToAsm.X86.Regs as X86 +import qualified GHC.CmmToAsm.X86.RegInfo as X86 +import qualified GHC.CmmToAsm.PPC.Regs as PPC +import qualified GHC.CmmToAsm.SPARC.Regs as SPARC + +targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int +targetVirtualRegSqueeze platform + = case platformArch platform of + ArchX86 -> X86.virtualRegSqueeze + ArchX86_64 -> X86.virtualRegSqueeze + ArchPPC -> PPC.virtualRegSqueeze + ArchS390X -> panic "targetVirtualRegSqueeze ArchS390X" + ArchSPARC -> SPARC.virtualRegSqueeze + ArchSPARC64 -> panic "targetVirtualRegSqueeze ArchSPARC64" + ArchPPC_64 _ -> PPC.virtualRegSqueeze + ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM" + ArchARM64 -> panic "targetVirtualRegSqueeze ArchARM64" + ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha" + ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb" + ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel" + ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript" + ArchUnknown -> panic "targetVirtualRegSqueeze ArchUnknown" + + +targetRealRegSqueeze :: Platform -> RegClass -> RealReg -> Int +targetRealRegSqueeze platform + = case platformArch platform of + ArchX86 -> X86.realRegSqueeze + ArchX86_64 -> X86.realRegSqueeze + ArchPPC -> PPC.realRegSqueeze + ArchS390X -> panic "targetRealRegSqueeze ArchS390X" + ArchSPARC -> SPARC.realRegSqueeze + ArchSPARC64 -> panic "targetRealRegSqueeze ArchSPARC64" + ArchPPC_64 _ -> PPC.realRegSqueeze + ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM" + ArchARM64 -> panic "targetRealRegSqueeze ArchARM64" + ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha" + ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb" + ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel" + ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript" + ArchUnknown -> panic "targetRealRegSqueeze ArchUnknown" + +targetClassOfRealReg :: Platform -> RealReg -> RegClass +targetClassOfRealReg platform + = case platformArch platform of + ArchX86 -> X86.classOfRealReg platform + ArchX86_64 -> X86.classOfRealReg platform + ArchPPC -> PPC.classOfRealReg + ArchS390X -> panic "targetClassOfRealReg ArchS390X" + ArchSPARC -> SPARC.classOfRealReg + ArchSPARC64 -> panic "targetClassOfRealReg ArchSPARC64" + ArchPPC_64 _ -> PPC.classOfRealReg + ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM" + ArchARM64 -> panic "targetClassOfRealReg ArchARM64" + ArchAlpha -> panic "targetClassOfRealReg ArchAlpha" + ArchMipseb -> panic "targetClassOfRealReg ArchMipseb" + ArchMipsel -> panic "targetClassOfRealReg ArchMipsel" + ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript" + ArchUnknown -> panic "targetClassOfRealReg ArchUnknown" + +targetMkVirtualReg :: Platform -> Unique -> Format -> VirtualReg +targetMkVirtualReg platform + = case platformArch platform of + ArchX86 -> X86.mkVirtualReg + ArchX86_64 -> X86.mkVirtualReg + ArchPPC -> PPC.mkVirtualReg + ArchS390X -> panic "targetMkVirtualReg ArchS390X" + ArchSPARC -> SPARC.mkVirtualReg + ArchSPARC64 -> panic "targetMkVirtualReg ArchSPARC64" + ArchPPC_64 _ -> PPC.mkVirtualReg + ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM" + ArchARM64 -> panic "targetMkVirtualReg ArchARM64" + ArchAlpha -> panic "targetMkVirtualReg ArchAlpha" + ArchMipseb -> panic "targetMkVirtualReg ArchMipseb" + ArchMipsel -> panic "targetMkVirtualReg ArchMipsel" + ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript" + ArchUnknown -> panic "targetMkVirtualReg ArchUnknown" + +targetRegDotColor :: Platform -> RealReg -> SDoc +targetRegDotColor platform + = case platformArch platform of + ArchX86 -> X86.regDotColor platform + ArchX86_64 -> X86.regDotColor platform + ArchPPC -> PPC.regDotColor + ArchS390X -> panic "targetRegDotColor ArchS390X" + ArchSPARC -> SPARC.regDotColor + ArchSPARC64 -> panic "targetRegDotColor ArchSPARC64" + ArchPPC_64 _ -> PPC.regDotColor + ArchARM _ _ _ -> panic "targetRegDotColor ArchARM" + ArchARM64 -> panic "targetRegDotColor ArchARM64" + ArchAlpha -> panic "targetRegDotColor ArchAlpha" + ArchMipseb -> panic "targetRegDotColor ArchMipseb" + ArchMipsel -> panic "targetRegDotColor ArchMipsel" + ArchJavaScript-> panic "targetRegDotColor ArchJavaScript" + ArchUnknown -> panic "targetRegDotColor ArchUnknown" + + +targetClassOfReg :: Platform -> Reg -> RegClass +targetClassOfReg platform reg + = case reg of + RegVirtual vr -> classOfVirtualReg vr + RegReal rr -> targetClassOfRealReg platform rr |