diff options
Diffstat (limited to 'compiler/GHC/CmmToAsm')
-rw-r--r-- | compiler/GHC/CmmToAsm/BlockLayout.hs | 18 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/CFG.hs | 39 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Instr.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear.hs | 79 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/Base.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs | 4 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/State.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/X86.hs | 4 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs | 4 |
10 files changed, 121 insertions, 43 deletions
diff --git a/compiler/GHC/CmmToAsm/BlockLayout.hs b/compiler/GHC/CmmToAsm/BlockLayout.hs index 07faa91473..67eff764e1 100644 --- a/compiler/GHC/CmmToAsm/BlockLayout.hs +++ b/compiler/GHC/CmmToAsm/BlockLayout.hs @@ -636,22 +636,8 @@ sequenceChain :: forall a i. (Instruction i, Outputable i) -> [GenBasicBlock i] -- ^ Blocks placed in sequence. sequenceChain _info _weights [] = [] sequenceChain _info _weights [x] = [x] -sequenceChain info weights' blocks@((BasicBlock entry _):_) = - let weights :: CFG - weights = --pprTrace "cfg'" (pprEdgeWeights cfg') - cfg' - where - (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights' - cfg' = {-# SCC rewriteEdges #-} - mapFoldlWithKey - (\cfg from m -> - mapFoldlWithKey - (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to ) - cfg m ) - weights' - globalEdgeWeights - - directEdges :: [CfgEdge] +sequenceChain info weights blocks@((BasicBlock entry _):_) = + let directEdges :: [CfgEdge] directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights) where relevantWeight :: CfgEdge -> Maybe CfgEdge diff --git a/compiler/GHC/CmmToAsm/CFG.hs b/compiler/GHC/CmmToAsm/CFG.hs index ad3a3cdae7..5c68e77fd1 100644 --- a/compiler/GHC/CmmToAsm/CFG.hs +++ b/compiler/GHC/CmmToAsm/CFG.hs @@ -670,11 +670,21 @@ findBackEdges root cfg = typedEdges = classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)] - -optimizeCFG :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG -optimizeCFG _ (CmmData {}) cfg = cfg -optimizeCFG weights (CmmProc info _lab _live graph) cfg = - {-# SCC optimizeCFG #-} +optimizeCFG :: Bool -> D.CfgWeights -> RawCmmDecl -> CFG -> CFG +optimizeCFG _ _ (CmmData {}) cfg = cfg +optimizeCFG doStaticPred weights proc@(CmmProc _info _lab _live graph) cfg = + (if doStaticPred then staticPredCfg (g_entry graph) else id) $ + optHsPatterns weights proc $ cfg + +-- | Modify branch weights based on educated guess on +-- patterns GHC tends to produce and how they affect +-- performance. +-- +-- Most importantly we penalize jumps across info tables. +optHsPatterns :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG +optHsPatterns _ (CmmData {}) cfg = cfg +optHsPatterns weights (CmmProc info _lab _live graph) cfg = + {-# SCC optHsPatterns #-} -- pprTrace "Initial:" (pprEdgeWeights cfg) $ -- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $ @@ -749,6 +759,21 @@ optimizeCFG weights (CmmProc info _lab _live graph) cfg = | CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True | otherwise = False +-- | Convert block-local branch weights to global weights. +staticPredCfg :: BlockId -> CFG -> CFG +staticPredCfg entry cfg = cfg' + where + (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} + mkGlobalWeights entry cfg + cfg' = {-# SCC rewriteEdges #-} + mapFoldlWithKey + (\cfg from m -> + mapFoldlWithKey + (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to ) + cfg m ) + cfg + globalEdgeWeights + -- | Determine loop membership of blocks based on SCC analysis -- This is faster but only gives yes/no answers. loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool @@ -922,6 +947,10 @@ revPostorderFrom cfg root = -- reverse post order. Which is required for diamond control flow to work probably. -- -- We also apply a few prediction heuristics (based on the same paper) +-- +-- The returned result represents frequences. +-- For blocks it's the expected number of executions and +-- for edges is the number of traversals. {-# NOINLINE mkGlobalWeights #-} {-# SCC mkGlobalWeights #-} diff --git a/compiler/GHC/CmmToAsm/Instr.hs b/compiler/GHC/CmmToAsm/Instr.hs index 833a72a74a..869c5eb238 100644 --- a/compiler/GHC/CmmToAsm/Instr.hs +++ b/compiler/GHC/CmmToAsm/Instr.hs @@ -37,7 +37,10 @@ import GHC.CmmToAsm.Config -- (for allocation purposes, anyway). -- data RegUsage - = RU [Reg] [Reg] + = RU { + reads :: [Reg], + writes :: [Reg] + } -- | No regs read or written to. noUsage :: RegUsage diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs index 00b4915d7b..55cb73af1a 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} +{-# LANGUAGE ConstraintKinds #-} {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} @@ -137,6 +138,7 @@ import GHC.Platform import Data.Maybe import Data.List import Control.Monad +import Control.Applicative -- ----------------------------------------------------------------------------- -- Top level of the register allocator @@ -229,8 +231,13 @@ linearRegAlloc config entry_ids block_live sccs go f = linearRegAlloc' config f entry_ids block_live sccs platform = ncgPlatform config +-- | Constraints on the instruction instances used by the +-- linear allocator. +type OutputableRegConstraint freeRegs instr = + (FR freeRegs, Outputable freeRegs, Outputable instr, Instruction instr) + linearRegAlloc' - :: (FR freeRegs, Outputable instr, Instruction instr) + :: OutputableRegConstraint freeRegs instr => NCGConfig -> freeRegs -> [BlockId] -- ^ entry points @@ -246,7 +253,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs return (blocks, stats, getStackUse stack) -linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr) +linearRA_SCCs :: OutputableRegConstraint freeRegs instr => [BlockId] -> BlockMap RegSet -> [NatBasicBlock instr] @@ -281,7 +288,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs) more sanity checking to guard against this eventuality. -} -process :: (FR freeRegs, Instruction instr, Outputable instr) +process :: OutputableRegConstraint freeRegs instr => [BlockId] -> BlockMap RegSet -> [GenBasicBlock (LiveInstr instr)] @@ -325,15 +332,18 @@ process entry_ids block_live (b@(BasicBlock id _) : blocks) -- | Do register allocation on this basic block -- processBlock - :: (FR freeRegs, Outputable instr, Instruction instr) + :: OutputableRegConstraint freeRegs instr => BlockMap RegSet -- ^ live regs on entry to each basic block -> LiveBasicBlock instr -- ^ block to do register allocation on -> RegM freeRegs [NatBasicBlock instr] -- ^ block with registers allocated processBlock block_live (BasicBlock id instrs) - = do initBlock id block_live + = do -- pprTraceM "processBlock" $ text "" $$ ppr (BasicBlock id instrs) + initBlock id block_live + (instrs', fixups) <- linearRA block_live [] [] id instrs + -- pprTraceM "blockResult" $ ppr (instrs', fixups) return $ BasicBlock id instrs' : fixups @@ -369,7 +379,7 @@ initBlock id block_live -- | Do allocation for a sequence of instructions. linearRA - :: (FR freeRegs, Outputable instr, Instruction instr) + :: OutputableRegConstraint freeRegs instr => BlockMap RegSet -- ^ map of what vregs are live on entry to each block. -> [instr] -- ^ accumulator for instructions already processed. -> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code. @@ -396,7 +406,7 @@ linearRA block_live accInstr accFixups id (instr:instrs) -- | Do allocation for a single instruction. raInsn - :: (FR freeRegs, Outputable instr, Instruction instr) + :: OutputableRegConstraint freeRegs instr => BlockMap RegSet -- ^ map of what vregs are love on entry to each block. -> [instr] -- ^ accumulator for instructions already processed. -> BlockId -- ^ the id of the current block, for debugging @@ -476,7 +486,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True | otherwise = False -genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr) +genRaInsn :: OutputableRegConstraint freeRegs instr => BlockMap RegSet -> [instr] -> BlockId @@ -486,6 +496,7 @@ genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr) -> RegM freeRegs ([instr], [NatBasicBlock instr]) genRaInsn block_live new_instrs block_id instr r_dying w_dying = do +-- pprTraceM "genRaInsn" $ ppr (block_id, instr) platform <- getPlatform case regUsageOfInstr platform instr of { RU read written -> do @@ -525,6 +536,8 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do (fixup_blocks, adjusted_instr) <- joinToTargets block_live block_id instr +-- when (not $ null fixup_blocks) $ pprTraceM "genRA:FixBlocks" $ ppr fixup_blocks + -- Debugging - show places where the reg alloc inserted -- assignment fixup blocks. -- when (not $ null fixup_blocks) $ @@ -737,7 +750,7 @@ data SpillLoc = ReadMem StackSlot -- reading from register only in memory -- the list of free registers and free stack slots. allocateRegsAndSpill - :: (FR freeRegs, Outputable instr, Instruction instr) + :: forall freeRegs instr. (FR freeRegs, Outputable instr, Instruction instr) => Bool -- True <=> reading (load up spilled regs) -> [VirtualReg] -- don't push these out -> [instr] -- spill insns @@ -749,7 +762,8 @@ allocateRegsAndSpill _ _ spills alloc [] = return (spills, reverse alloc) allocateRegsAndSpill reading keep spills alloc (r:rs) - = do assig <- getAssigR + = do assig <- getAssigR :: RegM freeRegs (RegMap Loc) + -- pprTraceM "allocateRegsAndSpill:assig" (ppr (r:rs) $$ ppr assig) let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig case lookupUFM assig r of -- case (1a): already in a register @@ -779,6 +793,26 @@ allocateRegsAndSpill reading keep spills alloc (r:rs) | otherwise -> doSpill WriteNew +-- | Given a virtual reg find a preferred real register. +-- The preferred register is simply the first one the variable +-- was assigned to (if any). This way when we allocate for a loop +-- variables are likely to end up in the same registers at the +-- end and start of the loop, avoiding redundant reg-reg moves. +-- Note: I tried returning a list of past assignments, but that +-- turned out to barely matter but added a few tenths of +-- a percent to compile time. +findPrefRealReg :: forall freeRegs u. Uniquable u + => u -> RegM freeRegs (Maybe RealReg) +findPrefRealReg vreg = do + bassig <- getBlockAssigR :: RegM freeRegs (BlockMap (freeRegs,RegMap Loc)) + return $ foldr (findVirtRegAssig) Nothing bassig + where + findVirtRegAssig :: (freeRegs,RegMap Loc) -> Maybe RealReg -> Maybe RealReg + findVirtRegAssig assig z = + z <|> case lookupUFM (snd assig) vreg of + Just (InReg real_reg) -> Just real_reg + Just (InBoth real_reg _) -> Just real_reg + _ -> z -- reading is redundant with reason, but we keep it around because it's -- convenient and it maintains the recursive structure of the allocator. -- EZY @@ -795,18 +829,26 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr) allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc = do platform <- getPlatform freeRegs <- getFreeRegsR - let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs + let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs :: [RealReg] - case freeRegs_thisClass of + -- Can we put the variable into a register it already was? + pref_reg <- findPrefRealReg r + case freeRegs_thisClass of -- case (2): we have a free register - (my_reg : _) -> - do spills' <- loadTemp r spill_loc my_reg spills + (first_free : _) -> + do let final_reg + | Just reg <- pref_reg + , reg `elem` freeRegs_thisClass + = reg + | otherwise + = first_free + spills' <- loadTemp r spill_loc final_reg spills - setAssigR (addToUFM assig r $! newLocation spill_loc my_reg) - setFreeRegsR $ frAllocateReg platform my_reg freeRegs + setAssigR (addToUFM assig r $! newLocation spill_loc final_reg) + setFreeRegsR $ frAllocateReg platform final_reg freeRegs - allocateRegsAndSpill reading keep spills' (my_reg : alloc) rs + allocateRegsAndSpill reading keep spills' (final_reg : alloc) rs -- case (3): we need to push something out to free up a register @@ -814,7 +856,8 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc do let inRegOrBoth (InReg _) = True inRegOrBoth (InBoth _ _) = True inRegOrBoth _ = False - let candidates' = + let candidates' :: UniqFM Loc + candidates' = flip delListFromUFM keep $ filterUFM inRegOrBoth $ assig diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs index 5784660e3f..6a110f0a48 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs @@ -30,6 +30,7 @@ import GHC.Types.Unique.FM import GHC.Types.Unique.Supply import GHC.Cmm.BlockId +data ReadingOrWriting = Reading | Writing deriving (Eq,Ord) -- | Used to store the register assignment on entry to a basic block. -- We use this to handle join points, where multiple branch instructions @@ -138,6 +139,8 @@ data RA_State freeRegs , ra_config :: !NCGConfig -- | (from,fixup,to) : We inserted fixup code between from and to - , ra_fixups :: [(BlockId,BlockId,BlockId)] } + , ra_fixups :: [(BlockId,BlockId,BlockId)] + + } diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs index fe19164357..fd0719c656 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + -- | Free regs map for PowerPC module GHC.CmmToAsm.Reg.Linear.PPC where @@ -27,6 +29,9 @@ import Data.Bits data FreeRegs = FreeRegs !Word32 !Word32 deriving( Show ) -- The Show is used in an ASSERT +instance Outputable FreeRegs where + ppr = text . show + noFreeRegs :: FreeRegs noFreeRegs = FreeRegs 0 0 diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs index ac7dc85366..063a8836b3 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} -- | Free regs map for SPARC module GHC.CmmToAsm.Reg.Linear.SPARC where @@ -38,6 +39,9 @@ data FreeRegs instance Show FreeRegs where show = showFreeRegs +instance Outputable FreeRegs where + ppr = text . showFreeRegs + -- | A reg map where no regs are free to be allocated. noFreeRegs :: FreeRegs noFreeRegs = FreeRegs 0 0 0 diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs index f96cc71239..ab05ab632a 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP, PatternSynonyms, DeriveFunctor #-} +{-# LANGUAGE ScopedTypeVariables #-} #if !defined(GHC_LOADED_INTO_GHCI) {-# LANGUAGE UnboxedTuples #-} diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs index ae37b0f9d1..42f63b5752 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} -- | Free regs map for i386 module GHC.CmmToAsm.Reg.Linear.X86 where @@ -9,12 +10,13 @@ import GHC.Platform.Reg.Class import GHC.Platform.Reg import GHC.Utils.Panic import GHC.Platform +import GHC.Utils.Outputable import Data.Word import Data.Bits newtype FreeRegs = FreeRegs Word32 - deriving Show + deriving (Show,Outputable) noFreeRegs :: FreeRegs noFreeRegs = FreeRegs 0 diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs index 325e033e85..44eea342a4 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} -- | Free regs map for x86_64 module GHC.CmmToAsm.Reg.Linear.X86_64 where @@ -9,12 +10,13 @@ import GHC.Platform.Reg.Class import GHC.Platform.Reg import GHC.Utils.Panic import GHC.Platform +import GHC.Utils.Outputable import Data.Word import Data.Bits newtype FreeRegs = FreeRegs Word64 - deriving Show + deriving (Show,Outputable) noFreeRegs :: FreeRegs noFreeRegs = FreeRegs 0 |