diff options
Diffstat (limited to 'compiler/nativeGen')
| -rw-r--r-- | compiler/nativeGen/AsmCodeGen.lhs | 85 | ||||
| -rw-r--r-- | compiler/nativeGen/Instruction.hs | 13 | ||||
| -rw-r--r-- | compiler/nativeGen/PPC/Instr.hs | 2 | ||||
| -rw-r--r-- | compiler/nativeGen/RegAlloc/Graph/Stats.hs | 400 | ||||
| -rw-r--r-- | compiler/nativeGen/RegAlloc/Linear/Main.hs | 25 | ||||
| -rw-r--r-- | compiler/nativeGen/RegAlloc/Linear/StackMap.hs | 39 | ||||
| -rw-r--r-- | compiler/nativeGen/RegAlloc/Liveness.hs | 7 | ||||
| -rw-r--r-- | compiler/nativeGen/SPARC/Instr.hs | 2 | ||||
| -rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 30 | ||||
| -rw-r--r-- | compiler/nativeGen/X86/Cond.hs | 69 | ||||
| -rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 117 | ||||
| -rw-r--r-- | compiler/nativeGen/X86/Regs.hs | 6 |
12 files changed, 458 insertions, 337 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 8c608f1bf1..47fd96c426 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -51,7 +51,7 @@ import NCGMonad import BlockId import CgUtils ( fixStgRegisters ) import OldCmm -import CmmOpt ( cmmEliminateDeadBlocks, cmmMiniInline, cmmMachOpFold ) +import CmmOpt ( cmmMachOpFold ) import OldPprCmm import CLabel @@ -133,16 +133,17 @@ The machine-dependent bits break down as follows: data NcgImpl statics instr jumpDest = NcgImpl { cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr], - generateJumpTableForInstr :: DynFlags -> instr -> Maybe (NatCmmDecl statics instr), + generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr), getJumpDestBlockId :: jumpDest -> Maybe BlockId, canShortcut :: instr -> Maybe jumpDest, shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics, shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr, pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc, - maxSpillSlots :: DynFlags -> Int, - allocatableRegs :: Platform -> [RealReg], + maxSpillSlots :: Int, + allocatableRegs :: [RealReg], ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], + ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr, ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr] } @@ -154,15 +155,16 @@ nativeCodeGen dflags h us cmms nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms x86NcgImpl = NcgImpl { cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId ,canShortcut = X86.Instr.canShortcut ,shortcutStatics = X86.Instr.shortcutStatics ,shortcutJump = X86.Instr.shortcutJump ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl - ,maxSpillSlots = X86.Instr.maxSpillSlots - ,allocatableRegs = X86.Regs.allocatableRegs + ,maxSpillSlots = X86.Instr.maxSpillSlots dflags + ,allocatableRegs = X86.Regs.allocatableRegs platform ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform ,ncgExpandTop = id ,ncgMakeFarBranches = id } @@ -172,30 +174,32 @@ nativeCodeGen dflags h us cmms ArchPPC -> nCG' $ NcgImpl { cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId ,canShortcut = PPC.RegInfo.canShortcut ,shortcutStatics = PPC.RegInfo.shortcutStatics ,shortcutJump = PPC.RegInfo.shortcutJump ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl - ,maxSpillSlots = PPC.Instr.maxSpillSlots - ,allocatableRegs = PPC.Regs.allocatableRegs + ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags + ,allocatableRegs = PPC.Regs.allocatableRegs platform ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = noAllocMoreStack ,ncgExpandTop = id ,ncgMakeFarBranches = makeFarBranches } ArchSPARC -> nCG' $ NcgImpl { cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr + ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId ,canShortcut = SPARC.ShortcutJump.canShortcut ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics ,shortcutJump = SPARC.ShortcutJump.shortcutJump ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl - ,maxSpillSlots = SPARC.Instr.maxSpillSlots - ,allocatableRegs = \_ -> SPARC.Regs.allocatableRegs + ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags + ,allocatableRegs = SPARC.Regs.allocatableRegs ,ncg_x86fp_kludge = id + ,ncgAllocMoreStack = noAllocMoreStack ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop ,ncgMakeFarBranches = id } @@ -206,6 +210,23 @@ nativeCodeGen dflags h us cmms ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch" + +-- +-- Allocating more stack space for spilling is currently only +-- supported for the linear register allocator on x86/x86_64, the rest +-- default to the panic below. To support allocating extra stack on +-- more platforms provide a definition of ncgAllocMoreStack. +-- +noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr +noAllocMoreStack amount _ + = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n" + ++ " If you are trying to compile SHA1.hs from the crypto library then this\n" + ++ " is a known limitation in the linear allocator.\n" + ++ "\n" + ++ " Try enabling the graph colouring allocator with -fregs-graph instead." + ++ " You can still file a bug report if you like.\n" + + nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr) => DynFlags -> NcgImpl statics instr jumpDest @@ -419,7 +440,7 @@ cmmNativeGen dflags ncgImpl us cmm count = foldr (\r -> plusUFM_C unionUniqSets $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r)) emptyUFM - $ allocatableRegs ncgImpl platform + $ allocatableRegs ncgImpl -- do the graph coloring register allocation let ((alloced, regAllocStats), usAlloc) @@ -428,7 +449,7 @@ cmmNativeGen dflags ncgImpl us cmm count $ Color.regAlloc dflags alloc_regs - (mkUniqSet [0 .. maxSpillSlots ncgImpl dflags]) + (mkUniqSet [0 .. maxSpillSlots ncgImpl]) withLiveness -- dump out what happened during register allocation @@ -457,11 +478,20 @@ cmmNativeGen dflags ncgImpl us cmm count else do -- do linear register allocation + let reg_alloc proc = do + (alloced, maybe_more_stack, ra_stats) <- + Linear.regAlloc dflags proc + case maybe_more_stack of + Nothing -> return ( alloced, ra_stats ) + Just amount -> + return ( ncgAllocMoreStack ncgImpl amount alloced + , ra_stats ) + let ((alloced, regAllocStats), usAlloc) = {-# SCC "RegAlloc" #-} initUs usLive $ liftM unzip - $ mapM (Linear.regAlloc dflags) withLiveness + $ mapM reg_alloc withLiveness dumpIfSet_dyn dflags Opt_D_dump_asm_regalloc "Registers allocated" @@ -490,7 +520,7 @@ cmmNativeGen dflags ncgImpl us cmm count ---- generate jump tables let tabled = {-# SCC "generateJumpTables" #-} - generateJumpTables dflags ncgImpl kludged + generateJumpTables ncgImpl kludged ---- shortcut branches let shorted = @@ -711,12 +741,12 @@ makeFarBranches blocks -- Analyzes all native code and generates data sections for all jump -- table instructions. generateJumpTables - :: DynFlags -> NcgImpl statics instr jumpDest - -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] -generateJumpTables dflags ncgImpl xs = concatMap f xs + :: NcgImpl statics instr jumpDest + -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] +generateJumpTables ncgImpl xs = concatMap f xs where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs f p = [p] - g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl dflags) xs) + g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs) -- ----------------------------------------------------------------------------- -- Shortcut branches @@ -828,15 +858,13 @@ genMachCode dflags cmmTopCodeGen cmm_top Here we do: (a) Constant folding - (b) Simple inlining: a temporary which is assigned to and then - used, once, can be shorted. (c) Position independent code and dynamic linking (i) introduce the appropriate indirections and position independent refs (ii) compile a list of imported symbols (d) Some arch-specific optimizations -(a) and (b) will be moving to the new Hoopl pipeline, however, (c) and +(a) will be moving to the new Hoopl pipeline, however, (c) and (d) are only needed by the native backend and will continue to live here. @@ -851,14 +879,7 @@ Ideas for other things we could do (put these in Hoopl please!): cmmToCmm :: DynFlags -> RawCmmDecl -> (RawCmmDecl, [CLabel]) cmmToCmm _ top@(CmmData _ _) = (top, []) cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do - let reachable_blocks | dopt Opt_TryNewCodeGen dflags = blocks - | otherwise = cmmEliminateDeadBlocks blocks - -- The new codegen path has already eliminated unreachable blocks by now - - inlined_blocks | dopt Opt_TryNewCodeGen dflags = reachable_blocks - | otherwise = cmmMiniInline dflags reachable_blocks - - blocks' <- mapM cmmBlockConFold inlined_blocks + blocks' <- mapM cmmBlockConFold blocks return $ CmmProc info lbl (ListGraph blocks') newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #)) diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs index 64ba32c6dc..86f5ae435d 100644 --- a/compiler/nativeGen/Instruction.hs +++ b/compiler/nativeGen/Instruction.hs @@ -163,3 +163,16 @@ class Instruction instr where -> [instr] + -- Subtract an amount from the C stack pointer + mkStackAllocInstr + :: Platform -- TODO: remove (needed by x86/x86_64 + -- because they share an Instr type) + -> Int + -> instr + + -- Add an amount to the C stack pointer + mkStackDeallocInstr + :: Platform -- TODO: remove (needed by x86/x86_64 + -- because they share an Instr type) + -> Int + -> instr diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index 464a88a08b..1f5e809abb 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -64,6 +64,8 @@ instance Instruction Instr where mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr takeRegRegMoveInstr = ppc_takeRegRegMoveInstr mkJumpInstr = ppc_mkJumpInstr + mkStackAllocInstr = panic "no ppc_mkStackAllocInstr" + mkStackDeallocInstr = panic "no ppc_mkStackDeallocInstr" -- ----------------------------------------------------------------------------- diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs index 32970336ad..f85cdb7eff 100644 --- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs +++ b/compiler/nativeGen/RegAlloc/Graph/Stats.hs @@ -1,23 +1,16 @@ -{-# OPTIONS -fno-warn-missing-signatures #-} --- | Carries interesting info for debugging / profiling of the --- graph coloring register allocator. -{-# OPTIONS -fno-warn-tabs #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and --- detab the module (please do the detabbing in a separate patch). See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces --- for details +-- | Carries interesting info for debugging / profiling of the +-- graph coloring register allocator. module RegAlloc.Graph.Stats ( - RegAllocStats (..), + RegAllocStats (..), - pprStats, - pprStatsSpills, - pprStatsLifetimes, - pprStatsConflict, - pprStatsLifeConflict, + pprStats, + pprStatsSpills, + pprStatsLifetimes, + pprStatsConflict, + pprStatsLifeConflict, - countSRMs, addSRM + countSRMs, addSRM ) where @@ -45,251 +38,260 @@ import Data.List data RegAllocStats statics instr - -- initial graph - = RegAllocStatsStart - { raLiveCmm :: [LiveCmmDecl statics instr] -- ^ initial code, with liveness - , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the initial, uncolored graph - , raSpillCosts :: SpillCostInfo } -- ^ information to help choose which regs to spill - - -- a spill stage - | RegAllocStatsSpill - { raCode :: [LiveCmmDecl statics instr] -- ^ the code we tried to allocate registers for - , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the partially colored graph - , raCoalesced :: UniqFM VirtualReg -- ^ the regs that were coaleced - , raSpillStats :: SpillStats -- ^ spiller stats - , raSpillCosts :: SpillCostInfo -- ^ number of instrs each reg lives for - , raSpilled :: [LiveCmmDecl statics instr] } -- ^ code with spill instructions added - - -- a successful coloring - | RegAllocStatsColored - { raCode :: [LiveCmmDecl statics instr] -- ^ the code we tried to allocate registers for - , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the uncolored graph - , raGraphColored :: Color.Graph VirtualReg RegClass RealReg -- ^ the coalesced and colored graph - , raCoalesced :: UniqFM VirtualReg -- ^ the regs that were coaleced - , raCodeCoalesced :: [LiveCmmDecl statics instr] -- ^ code with coalescings applied - , raPatched :: [LiveCmmDecl statics instr] -- ^ code with vregs replaced by hregs - , raSpillClean :: [LiveCmmDecl statics instr] -- ^ code with unneeded spill\/reloads cleaned out - , raFinal :: [NatCmmDecl statics instr] -- ^ final code - , raSRMs :: (Int, Int, Int) } -- ^ spill\/reload\/reg-reg moves present in this code + -- initial graph + = RegAllocStatsStart + { raLiveCmm :: [LiveCmmDecl statics instr] -- ^ initial code, with liveness + , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the initial, uncolored graph + , raSpillCosts :: SpillCostInfo } -- ^ information to help choose which regs to spill + + -- a spill stage + | RegAllocStatsSpill + { raCode :: [LiveCmmDecl statics instr] -- ^ the code we tried to allocate registers for + , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the partially colored graph + , raCoalesced :: UniqFM VirtualReg -- ^ the regs that were coaleced + , raSpillStats :: SpillStats -- ^ spiller stats + , raSpillCosts :: SpillCostInfo -- ^ number of instrs each reg lives for + , raSpilled :: [LiveCmmDecl statics instr] } -- ^ code with spill instructions added + + -- a successful coloring + | RegAllocStatsColored + { raCode :: [LiveCmmDecl statics instr] -- ^ the code we tried to allocate registers for + , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the uncolored graph + , raGraphColored :: Color.Graph VirtualReg RegClass RealReg -- ^ the coalesced and colored graph + , raCoalesced :: UniqFM VirtualReg -- ^ the regs that were coaleced + , raCodeCoalesced :: [LiveCmmDecl statics instr] -- ^ code with coalescings applied + , raPatched :: [LiveCmmDecl statics instr] -- ^ code with vregs replaced by hregs + , raSpillClean :: [LiveCmmDecl statics instr] -- ^ code with unneeded spill\/reloads cleaned out + , raFinal :: [NatCmmDecl statics instr] -- ^ final code + , raSRMs :: (Int, Int, Int) } -- ^ spill\/reload\/reg-reg moves present in this code instance (Outputable statics, Outputable instr) => Outputable (RegAllocStats statics instr) where ppr (s@RegAllocStatsStart{}) = sdocWithPlatform $ \platform -> - text "# Start" - $$ text "# Native code with liveness information." - $$ ppr (raLiveCmm s) - $$ text "" - $$ text "# Initial register conflict graph." - $$ Color.dotGraph - (targetRegDotColor platform) - (trivColorable platform - (targetVirtualRegSqueeze platform) - (targetRealRegSqueeze platform)) - (raGraph s) + text "# Start" + $$ text "# Native code with liveness information." + $$ ppr (raLiveCmm s) + $$ text "" + $$ text "# Initial register conflict graph." + $$ Color.dotGraph + (targetRegDotColor platform) + (trivColorable platform + (targetVirtualRegSqueeze platform) + (targetRealRegSqueeze platform)) + (raGraph s) ppr (s@RegAllocStatsSpill{}) = - text "# Spill" + text "# Spill" - $$ text "# Code with liveness information." - $$ ppr (raCode s) - $$ text "" + $$ text "# Code with liveness information." + $$ ppr (raCode s) + $$ text "" - $$ (if (not $ isNullUFM $ raCoalesced s) - then text "# Registers coalesced." - $$ (vcat $ map ppr $ ufmToList $ raCoalesced s) - $$ text "" - else empty) + $$ (if (not $ isNullUFM $ raCoalesced s) + then text "# Registers coalesced." + $$ (vcat $ map ppr $ ufmToList $ raCoalesced s) + $$ text "" + else empty) - $$ text "# Spills inserted." - $$ ppr (raSpillStats s) - $$ text "" + $$ text "# Spills inserted." + $$ ppr (raSpillStats s) + $$ text "" - $$ text "# Code with spills inserted." - $$ ppr (raSpilled s) + $$ text "# Code with spills inserted." + $$ ppr (raSpilled s) ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) }) = sdocWithPlatform $ \platform -> - text "# Colored" - - $$ text "# Code with liveness information." - $$ ppr (raCode s) - $$ text "" - - $$ text "# Register conflict graph (colored)." - $$ Color.dotGraph - (targetRegDotColor platform) - (trivColorable platform - (targetVirtualRegSqueeze platform) - (targetRealRegSqueeze platform)) - (raGraphColored s) - $$ text "" - - $$ (if (not $ isNullUFM $ raCoalesced s) - then text "# Registers coalesced." - $$ (vcat $ map ppr $ ufmToList $ raCoalesced s) - $$ text "" - else empty) - - $$ text "# Native code after coalescings applied." - $$ ppr (raCodeCoalesced s) - $$ text "" - - $$ text "# Native code after register allocation." - $$ ppr (raPatched s) - $$ text "" - - $$ text "# Clean out unneeded spill/reloads." - $$ ppr (raSpillClean s) - $$ text "" - - $$ text "# Final code, after rewriting spill/rewrite pseudo instrs." - $$ ppr (raFinal s) - $$ text "" - $$ text "# Score:" - $$ (text "# spills inserted: " <> int spills) - $$ (text "# reloads inserted: " <> int reloads) - $$ (text "# reg-reg moves remaining: " <> int moves) - $$ text "" + text "# Colored" + + $$ text "# Code with liveness information." + $$ ppr (raCode s) + $$ text "" + + $$ text "# Register conflict graph (colored)." + $$ Color.dotGraph + (targetRegDotColor platform) + (trivColorable platform + (targetVirtualRegSqueeze platform) + (targetRealRegSqueeze platform)) + (raGraphColored s) + $$ text "" + + $$ (if (not $ isNullUFM $ raCoalesced s) + then text "# Registers coalesced." + $$ (vcat $ map ppr $ ufmToList $ raCoalesced s) + $$ text "" + else empty) + + $$ text "# Native code after coalescings applied." + $$ ppr (raCodeCoalesced s) + $$ text "" + + $$ text "# Native code after register allocation." + $$ ppr (raPatched s) + $$ text "" + + $$ text "# Clean out unneeded spill/reloads." + $$ ppr (raSpillClean s) + $$ text "" + + $$ text "# Final code, after rewriting spill/rewrite pseudo instrs." + $$ ppr (raFinal s) + $$ text "" + $$ text "# Score:" + $$ (text "# spills inserted: " <> int spills) + $$ (text "# reloads inserted: " <> int reloads) + $$ (text "# reg-reg moves remaining: " <> int moves) + $$ text "" -- | Do all the different analysis on this list of RegAllocStats -pprStats - :: [RegAllocStats statics instr] - -> Color.Graph VirtualReg RegClass RealReg - -> SDoc - +pprStats + :: [RegAllocStats statics instr] + -> Color.Graph VirtualReg RegClass RealReg + -> SDoc + pprStats stats graph - = let outSpills = pprStatsSpills stats - outLife = pprStatsLifetimes stats - outConflict = pprStatsConflict stats - outScatter = pprStatsLifeConflict stats graph + = let outSpills = pprStatsSpills stats + outLife = pprStatsLifetimes stats + outConflict = pprStatsConflict stats + outScatter = pprStatsLifeConflict stats graph - in vcat [outSpills, outLife, outConflict, outScatter] + in vcat [outSpills, outLife, outConflict, outScatter] -- | Dump a table of how many spill loads \/ stores were inserted for each vreg. pprStatsSpills - :: [RegAllocStats statics instr] -> SDoc + :: [RegAllocStats statics instr] -> SDoc pprStatsSpills stats = let - finals = [ s | s@RegAllocStatsColored{} <- stats] + finals = [ s | s@RegAllocStatsColored{} <- stats] - -- sum up how many stores\/loads\/reg-reg-moves were left in the code - total = foldl' addSRM (0, 0, 0) - $ map raSRMs finals + -- sum up how many stores\/loads\/reg-reg-moves were left in the code + total = foldl' addSRM (0, 0, 0) + $ map raSRMs finals - in ( text "-- spills-added-total" - $$ text "-- (stores, loads, reg_reg_moves_remaining)" - $$ ppr total - $$ text "") + in ( text "-- spills-added-total" + $$ text "-- (stores, loads, reg_reg_moves_remaining)" + $$ ppr total + $$ text "") -- | Dump a table of how long vregs tend to live for in the initial code. pprStatsLifetimes - :: [RegAllocStats statics instr] -> SDoc + :: [RegAllocStats statics instr] -> SDoc pprStatsLifetimes stats - = let info = foldl' plusSpillCostInfo zeroSpillCostInfo - [ raSpillCosts s - | s@RegAllocStatsStart{} <- stats ] + = let info = foldl' plusSpillCostInfo zeroSpillCostInfo + [ raSpillCosts s + | s@RegAllocStatsStart{} <- stats ] - lifeBins = binLifetimeCount $ lifeMapFromSpillCostInfo info + lifeBins = binLifetimeCount $ lifeMapFromSpillCostInfo info - in ( text "-- vreg-population-lifetimes" - $$ text "-- (instruction_count, number_of_vregs_that_lived_that_long)" - $$ (vcat $ map ppr $ eltsUFM lifeBins) - $$ text "\n") + in ( text "-- vreg-population-lifetimes" + $$ text "-- (instruction_count, number_of_vregs_that_lived_that_long)" + $$ (vcat $ map ppr $ eltsUFM lifeBins) + $$ text "\n") binLifetimeCount :: UniqFM (VirtualReg, Int) -> UniqFM (Int, Int) binLifetimeCount fm - = let lifes = map (\l -> (l, (l, 1))) - $ map snd - $ eltsUFM fm + = let lifes = map (\l -> (l, (l, 1))) + $ map snd + $ eltsUFM fm - in addListToUFM_C - (\(l1, c1) (_, c2) -> (l1, c1 + c2)) - emptyUFM - lifes + in addListToUFM_C + (\(l1, c1) (_, c2) -> (l1, c1 + c2)) + emptyUFM + lifes -- | Dump a table of how many conflicts vregs tend to have in the initial code. pprStatsConflict - :: [RegAllocStats statics instr] -> SDoc + :: [RegAllocStats statics instr] -> SDoc pprStatsConflict stats - = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2))) - emptyUFM - $ map Color.slurpNodeConflictCount - [ raGraph s | s@RegAllocStatsStart{} <- stats ] + = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2))) + emptyUFM + $ map Color.slurpNodeConflictCount + [ raGraph s | s@RegAllocStatsStart{} <- stats ] - in ( text "-- vreg-conflicts" - $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)" - $$ (vcat $ map ppr $ eltsUFM confMap) - $$ text "\n") + in ( text "-- vreg-conflicts" + $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)" + $$ (vcat $ map ppr $ eltsUFM confMap) + $$ text "\n") -- | For every vreg, dump it's how many conflicts it has and its lifetime --- good for making a scatter plot. +-- good for making a scatter plot. pprStatsLifeConflict - :: [RegAllocStats statics instr] - -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph - -> SDoc + :: [RegAllocStats statics instr] + -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph + -> SDoc pprStatsLifeConflict stats graph - = let lifeMap = lifeMapFromSpillCostInfo - $ foldl' plusSpillCostInfo zeroSpillCostInfo - $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ] - - scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of - Just (_, l) -> l - Nothing -> 0 - Just node = Color.lookupNode graph r - in parens $ hcat $ punctuate (text ", ") - [ doubleQuotes $ ppr $ Color.nodeId node - , ppr $ sizeUniqSet (Color.nodeConflicts node) - , ppr $ lifetime ]) - $ map Color.nodeId - $ eltsUFM - $ Color.graphMap graph - - in ( text "-- vreg-conflict-lifetime" - $$ text "-- (vreg, vreg_conflicts, vreg_lifetime)" - $$ (vcat scatter) - $$ text "\n") + = let lifeMap = lifeMapFromSpillCostInfo + $ foldl' plusSpillCostInfo zeroSpillCostInfo + $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ] + + scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of + Just (_, l) -> l + Nothing -> 0 + Just node = Color.lookupNode graph r + in parens $ hcat $ punctuate (text ", ") + [ doubleQuotes $ ppr $ Color.nodeId node + , ppr $ sizeUniqSet (Color.nodeConflicts node) + , ppr $ lifetime ]) + $ map Color.nodeId + $ eltsUFM + $ Color.graphMap graph + + in ( text "-- vreg-conflict-lifetime" + $$ text "-- (vreg, vreg_conflicts, vreg_lifetime)" + $$ (vcat scatter) + $$ text "\n") -- | Count spill/reload/reg-reg moves. --- Lets us see how well the register allocator has done. -countSRMs - :: Instruction instr - => LiveCmmDecl statics instr -> (Int, Int, Int) +-- Lets us see how well the register allocator has done. +countSRMs + :: Instruction instr + => LiveCmmDecl statics instr -> (Int, Int, Int) countSRMs cmm - = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0) + = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0) +countSRM_block :: Instruction instr + => GenBasicBlock (LiveInstr instr) + -> State (Int, Int, Int) (GenBasicBlock (LiveInstr instr)) countSRM_block (BasicBlock i instrs) - = do instrs' <- mapM countSRM_instr instrs - return $ BasicBlock i instrs' + = do instrs' <- mapM countSRM_instr instrs + return $ BasicBlock i instrs' +countSRM_instr :: Instruction instr + => LiveInstr instr -> State (Int, Int, Int) (LiveInstr instr) countSRM_instr li - | LiveInstr SPILL{} _ <- li - = do modify $ \(s, r, m) -> (s + 1, r, m) - return li - - | LiveInstr RELOAD{} _ <- li - = do modify $ \(s, r, m) -> (s, r + 1, m) - return li - - | LiveInstr instr _ <- li - , Just _ <- takeRegRegMoveInstr instr - = do modify $ \(s, r, m) -> (s, r, m + 1) - return li - - | otherwise - = return li + | LiveInstr SPILL{} _ <- li + = do modify $ \(s, r, m) -> (s + 1, r, m) + return li + + | LiveInstr RELOAD{} _ <- li + = do modify $ \(s, r, m) -> (s, r + 1, m) + return li + + | LiveInstr instr _ <- li + , Just _ <- takeRegRegMoveInstr instr + = do modify $ \(s, r, m) -> (s, r, m + 1) + return li + + | otherwise + = return li -- sigh.. +addSRM :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int) addSRM (s1, r1, m1) (s2, r2, m2) - = (s1+s2, r1+r2, m1+m2) + = let !s = s1 + s2 + !r = r1 + r2 + !m = m1 + m2 + in (s, r, m) diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs index 3f92ed975b..a15bca07e7 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs @@ -139,22 +139,27 @@ regAlloc :: (Outputable instr, Instruction instr) => DynFlags -> LiveCmmDecl statics instr - -> UniqSM (NatCmmDecl statics instr, Maybe RegAllocStats) + -> UniqSM ( NatCmmDecl statics instr + , Maybe Int -- number of extra stack slots required, + -- beyond maxSpillSlots + , Maybe RegAllocStats) regAlloc _ (CmmData sec d) = return ( CmmData sec d + , Nothing , Nothing ) regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl []) = return ( CmmProc info lbl (ListGraph []) + , Nothing , Nothing ) regAlloc dflags (CmmProc static lbl sccs) | LiveInfo info (Just first_id) (Just block_live) _ <- static = do -- do register allocation on each component. - (final_blocks, stats) + (final_blocks, stats, stack_use) <- linearRegAlloc dflags first_id block_live sccs -- make sure the block that was first in the input list @@ -162,7 +167,15 @@ regAlloc dflags (CmmProc static lbl sccs) let ((first':_), rest') = partition ((== first_id) . blockId) final_blocks + let max_spill_slots = maxSpillSlots dflags + extra_stack + | stack_use > max_spill_slots + = Just (stack_use - max_spill_slots) + | otherwise + = Nothing + return ( CmmProc info lbl (ListGraph (first' : rest')) + , extra_stack , Just stats) -- bogus. to make non-exhaustive match warning go away. @@ -184,7 +197,7 @@ linearRegAlloc -> BlockId -- ^ the first block -> BlockMap RegSet -- ^ live regs on entry to each basic block -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock instr], RegAllocStats) + -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) linearRegAlloc dflags first_id block_live sccs = let platform = targetPlatform dflags @@ -204,14 +217,14 @@ linearRegAlloc' -> BlockId -- ^ the first block -> BlockMap RegSet -- ^ live regs on entry to each basic block -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock instr], RegAllocStats) + -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) linearRegAlloc' dflags initFreeRegs first_id block_live sccs = do us <- getUs - let (_, _, stats, blocks) = + let (_, stack, stats, blocks) = runR dflags emptyBlockMap initFreeRegs emptyRegMap (emptyStackMap dflags) us $ linearRA_SCCs first_id block_live [] sccs - return (blocks, stats) + return (blocks, stats, getStackUse stack) linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr) diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs index b1fc3c169e..69cf411751 100644 --- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs +++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs @@ -21,15 +21,13 @@ module RegAlloc.Linear.StackMap ( StackSlot, StackMap(..), emptyStackMap, - getStackSlotFor + getStackSlotFor, + getStackUse ) where -import RegAlloc.Linear.FreeRegs - import DynFlags -import Outputable import UniqFM import Unique @@ -40,7 +38,7 @@ type StackSlot = Int data StackMap = StackMap { -- | The slots that are still available to be allocated. - stackMapFreeSlots :: [StackSlot] + stackMapNextFreeSlot :: !Int -- | Assignment of vregs to stack slots. , stackMapAssignment :: UniqFM StackSlot } @@ -48,7 +46,7 @@ data StackMap -- | An empty stack map, with all slots available. emptyStackMap :: DynFlags -> StackMap -emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM +emptyStackMap _ = StackMap 0 emptyUFM -- | If this vreg unique already has a stack assignment then return the slot number, @@ -56,24 +54,13 @@ emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM -- getStackSlotFor :: StackMap -> Unique -> (StackMap, Int) -getStackSlotFor (StackMap [] _) _ - - -- This happens all the time when trying to compile darcs' SHA1.hs, see Track #1993 - -- SHA1.lhs has also been added to the Crypto library on Hackage, - -- so we see this all the time. - -- - -- It would be better to automatically invoke the graph allocator, or do something - -- else besides panicing, but that's a job for a different day. -- BL 2009/02 - -- - = panic $ "RegAllocLinear.getStackSlotFor: out of stack slots\n" - ++ " If you are trying to compile SHA1.hs from the crypto library then this\n" - ++ " is a known limitation in the linear allocator.\n" - ++ "\n" - ++ " Try enabling the graph colouring allocator with -fregs-graph instead." - ++ " You can still file a bug report if you like.\n" - -getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg = - case lookupUFM reserved reg of - Just slot -> (fs, slot) - Nothing -> (StackMap stack' (addToUFM reserved reg freeSlot), freeSlot) +getStackSlotFor fs@(StackMap _ reserved) reg + | Just slot <- lookupUFM reserved reg = (fs, slot) + +getStackSlotFor (StackMap freeSlot reserved) reg = + (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot) + +-- | Return the number of stack slots that were allocated +getStackUse :: StackMap -> Int +getStackUse (StackMap freeSlot _) = freeSlot diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs index ac58944f1c..608f0a423b 100644 --- a/compiler/nativeGen/RegAlloc/Liveness.hs +++ b/compiler/nativeGen/RegAlloc/Liveness.hs @@ -5,8 +5,6 @@ -- (c) The University of Glasgow 2004 -- ----------------------------------------------------------------------------- -{-# OPTIONS -Wall -fno-warn-name-shadowing #-} - module RegAlloc.Liveness ( RegSet, RegMap, emptyRegMap, @@ -138,6 +136,11 @@ instance Instruction instr => Instruction (InstrSR instr) where mkJumpInstr target = map Instr (mkJumpInstr target) + mkStackAllocInstr platform amount = + Instr (mkStackAllocInstr platform amount) + + mkStackDeallocInstr platform amount = + Instr (mkStackDeallocInstr platform amount) -- | An instruction with liveness information. diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs index 9404badea6..f55c660118 100644 --- a/compiler/nativeGen/SPARC/Instr.hs +++ b/compiler/nativeGen/SPARC/Instr.hs @@ -108,6 +108,8 @@ instance Instruction Instr where mkRegRegMoveInstr = sparc_mkRegRegMoveInstr takeRegRegMoveInstr = sparc_takeRegRegMoveInstr mkJumpInstr = sparc_mkJumpInstr + mkStackAllocInstr = panic "no sparc_mkStackAllocInstr" + mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr" -- | SPARC instruction set. diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index b83ede89aa..fbbc37e6c9 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1214,22 +1214,22 @@ getCondCode (CmmMachOp mop [x, y]) MO_F_Lt W64 -> condFltCode LTT x y MO_F_Le W64 -> condFltCode LE x y - MO_Eq _ -> condIntCode EQQ x y - MO_Ne _ -> condIntCode NE x y + MO_Eq _ -> condIntCode EQQ x y + MO_Ne _ -> condIntCode NE x y - MO_S_Gt _ -> condIntCode GTT x y - MO_S_Ge _ -> condIntCode GE x y - MO_S_Lt _ -> condIntCode LTT x y - MO_S_Le _ -> condIntCode LE x y + MO_S_Gt _ -> condIntCode GTT x y + MO_S_Ge _ -> condIntCode GE x y + MO_S_Lt _ -> condIntCode LTT x y + MO_S_Le _ -> condIntCode LE x y MO_U_Gt _ -> condIntCode GU x y MO_U_Ge _ -> condIntCode GEU x y MO_U_Lt _ -> condIntCode LU x y MO_U_Le _ -> condIntCode LEU x y - _other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y])) + _other -> pprPanic "getCondCode(x86,x86_64)" (ppr (CmmMachOp mop [x,y])) -getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) +getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other) @@ -1276,7 +1276,8 @@ condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do return (CondCode False cond code) -- anything vs operand -condIntCode' is32Bit cond x y | isOperand is32Bit y = do +condIntCode' is32Bit cond x y + | isOperand is32Bit y = do dflags <- getDynFlags (x_reg, x_code) <- getNonClobberedReg x (y_op, y_code) <- getOperand y @@ -1284,6 +1285,17 @@ condIntCode' is32Bit cond x y | isOperand is32Bit y = do code = x_code `appOL` y_code `snocOL` CMP (cmmTypeSize (cmmExprType dflags x)) y_op (OpReg x_reg) return (CondCode False cond code) +-- operand vs. anything: invert the comparison so that we can use a +-- single comparison instruction. + | isOperand is32Bit x + , Just revcond <- maybeFlipCond cond = do + dflags <- getDynFlags + (y_reg, y_code) <- getNonClobberedReg y + (x_op, x_code) <- getOperand x + let + code = y_code `appOL` x_code `snocOL` + CMP (cmmTypeSize (cmmExprType dflags x)) x_op (OpReg y_reg) + return (CondCode False revcond code) -- anything vs anything condIntCode' _ cond x y = do diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs index ce97095222..586dabd8f4 100644 --- a/compiler/nativeGen/X86/Cond.hs +++ b/compiler/nativeGen/X86/Cond.hs @@ -1,39 +1,32 @@ - -{-# OPTIONS -fno-warn-tabs #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and --- detab the module (please do the detabbing in a separate patch). See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces --- for details - module X86.Cond ( - Cond(..), - condUnsigned, - condToSigned, - condToUnsigned + Cond(..), + condUnsigned, + condToSigned, + condToUnsigned, + maybeFlipCond ) where data Cond - = ALWAYS -- What's really used? ToDo - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - | NEG - | POS - | CARRY - | OFLO - | PARITY - | NOTPARITY - deriving Eq + = ALWAYS -- What's really used? ToDo + | EQQ + | GE + | GEU + | GTT + | GU + | LE + | LEU + | LTT + | LU + | NE + | NEG + | POS + | CARRY + | OFLO + | PARITY + | NOTPARITY + deriving Eq condUnsigned :: Cond -> Bool condUnsigned GU = True @@ -57,3 +50,19 @@ condToUnsigned LTT = LU condToUnsigned GE = GEU condToUnsigned LE = LEU condToUnsigned x = x + +-- | @maybeFlipCond c@ returns @Just c'@ if it is possible to flip the +-- arguments to the conditional @c@, and the new condition should be @c'@. +maybeFlipCond :: Cond -> Maybe Cond +maybeFlipCond cond = case cond of + EQQ -> Just EQQ + NE -> Just NE + LU -> Just GU + GU -> Just LU + LEU -> Just GEU + GEU -> Just LEU + LTT -> Just GTT + GTT -> Just LTT + LE -> Just GE + GE -> Just LE + _other -> Nothing diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 7f0e48e769..7bd9b0cc9e 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -11,7 +11,7 @@ module X86.Instr (Instr(..), Operand(..), getJumpDestBlockId, canShortcut, shortcutStatics, - shortcutJump, i386_insert_ffrees, + shortcutJump, i386_insert_ffrees, allocMoreStack, maxSpillSlots, archWordSize) where @@ -58,6 +58,8 @@ instance Instruction Instr where mkRegRegMoveInstr = x86_mkRegRegMoveInstr takeRegRegMoveInstr = x86_takeRegRegMoveInstr mkJumpInstr = x86_mkJumpInstr + mkStackAllocInstr = x86_mkStackAllocInstr + mkStackDeallocInstr = x86_mkStackDeallocInstr -- ----------------------------------------------------------------------------- @@ -620,14 +622,13 @@ x86_mkSpillInstr -> Instr x86_mkSpillInstr dflags reg delta slot - = let off = spillSlotToOffset dflags slot + = let off = spillSlotToOffset dflags slot - delta in - let off_w = (off - delta) `div` (if is32Bit then 4 else 8) - in case targetClassOfReg platform reg of + case targetClassOfReg platform reg of RcInteger -> MOV (archWordSize is32Bit) - (OpReg reg) (OpAddr (spRel dflags off_w)) - RcDouble -> GST FF80 reg (spRel dflags off_w) {- RcFloat/RcDouble -} - RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off_w)) + (OpReg reg) (OpAddr (spRel dflags off)) + RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -} + RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off)) _ -> panic "X86.mkSpillInstr: no match" where platform = targetPlatform dflags is32Bit = target32Bit platform @@ -641,14 +642,13 @@ x86_mkLoadInstr -> Instr x86_mkLoadInstr dflags reg delta slot - = let off = spillSlotToOffset dflags slot + = let off = spillSlotToOffset dflags slot - delta in - let off_w = (off-delta) `div` (if is32Bit then 4 else 8) - in case targetClassOfReg platform reg of + case targetClassOfReg platform reg of RcInteger -> MOV (archWordSize is32Bit) - (OpAddr (spRel dflags off_w)) (OpReg reg) - RcDouble -> GLD FF80 (spRel dflags off_w) reg {- RcFloat/RcDouble -} - RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off_w)) (OpReg reg) + (OpAddr (spRel dflags off)) (OpReg reg) + RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -} + RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg) _ -> panic "X86.x86_mkLoadInstr" where platform = targetPlatform dflags is32Bit = target32Bit platform @@ -666,12 +666,7 @@ maxSpillSlots dflags -- the C stack pointer. spillSlotToOffset :: DynFlags -> Int -> Int spillSlotToOffset dflags slot - | slot >= 0 && slot < maxSpillSlots dflags = 64 + spillSlotSize dflags * slot - | otherwise - = pprPanic "spillSlotToOffset:" - ( text "invalid spill location: " <> int slot - $$ text "maxSpillSlots: " <> int (maxSpillSlots dflags)) -------------------------------------------------------------------------------- @@ -744,8 +739,25 @@ x86_mkJumpInstr id = [JXX ALWAYS id] - - +x86_mkStackAllocInstr + :: Platform + -> Int + -> Instr +x86_mkStackAllocInstr platform amount + = case platformArch platform of + ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp) + ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) + _ -> panic "x86_mkStackAllocInstr" + +x86_mkStackDeallocInstr + :: Platform + -> Int + -> Instr +x86_mkStackDeallocInstr platform amount + = case platformArch platform of + ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp) + ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp) + _ -> panic "x86_mkStackDeallocInstr" i386_insert_ffrees :: [GenBasicBlock Instr] @@ -753,18 +765,12 @@ i386_insert_ffrees i386_insert_ffrees blocks | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ]) - = map ffree_before_nonlocal_transfers blocks - + = map insertGFREEs blocks | otherwise = blocks - where - ffree_before_nonlocal_transfers (BasicBlock id insns) - = BasicBlock id (foldr p [] insns) - where p insn r = case insn of - CALL _ _ -> GFREE : insn : r - JMP _ _ -> GFREE : insn : r - JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL" - _ -> insn : r + where + insertGFREEs (BasicBlock id insns) + = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns) -- if you ever add a new FP insn to the fake x86 FP insn set, -- you must update this too @@ -796,6 +802,57 @@ is_G_instr instr _ -> False +-- +-- Note [extra spill slots] +-- +-- If the register allocator used more spill slots than we have +-- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more +-- C stack space on entry and exit from this proc. Therefore we +-- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp" +-- before every non-local jump. +-- +-- This became necessary when the new codegen started bundling entire +-- functions together into one proc, because the register allocator +-- assigns a different stack slot to each virtual reg within a proc. +-- To avoid using so many slots we could also: +-- +-- - split up the proc into connected components before code generator +-- +-- - rename the virtual regs, so that we re-use vreg names and hence +-- stack slots for non-overlapping vregs. +-- +allocMoreStack + :: Platform + -> Int + -> NatCmmDecl statics X86.Instr.Instr + -> NatCmmDecl statics X86.Instr.Instr + +allocMoreStack _ _ top@(CmmData _ _) = top +allocMoreStack platform amount (CmmProc info lbl (ListGraph code)) = + CmmProc info lbl (ListGraph (map insert_stack_insns code)) + where + alloc = mkStackAllocInstr platform amount + dealloc = mkStackDeallocInstr platform amount + + is_entry_point id = id `mapMember` info + + insert_stack_insns (BasicBlock id insns) + | is_entry_point id = BasicBlock id (alloc : block') + | otherwise = BasicBlock id block' + where + block' = insertBeforeNonlocalTransfers dealloc insns + + +insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr] +insertBeforeNonlocalTransfers insert insns + = foldr p [] insns + where p insn r = case insn of + CALL _ _ -> insert : insn : r + JMP _ _ -> insert : insn : r + JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL" + _ -> insn : r + + data JumpDest = DestBlockId BlockId | DestImm Imm getJumpDestBlockId :: JumpDest -> Maybe BlockId diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index 4eec96f5e1..6b2fe16855 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -196,13 +196,13 @@ addrModeRegs _ = [] spRel :: DynFlags - -> Int -- ^ desired stack offset in words, positive or negative + -> Int -- ^ desired stack offset in bytes, positive or negative -> AddrMode spRel dflags n | target32Bit (targetPlatform dflags) - = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE dflags)) + = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt n) | otherwise - = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE dflags)) + = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt n) -- The register numbers must fit into 32 bits on x86, so that we can -- use a Word32 to represent the set of free registers in the register |
