summaryrefslogtreecommitdiff
path: root/compiler/nativeGen
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r--compiler/nativeGen/AsmCodeGen.lhs85
-rw-r--r--compiler/nativeGen/Instruction.hs13
-rw-r--r--compiler/nativeGen/PPC/Instr.hs2
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Stats.hs400
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Main.hs25
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/StackMap.hs39
-rw-r--r--compiler/nativeGen/RegAlloc/Liveness.hs7
-rw-r--r--compiler/nativeGen/SPARC/Instr.hs2
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs30
-rw-r--r--compiler/nativeGen/X86/Cond.hs69
-rw-r--r--compiler/nativeGen/X86/Instr.hs117
-rw-r--r--compiler/nativeGen/X86/Regs.hs6
12 files changed, 458 insertions, 337 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
index 8c608f1bf1..47fd96c426 100644
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -51,7 +51,7 @@ import NCGMonad
import BlockId
import CgUtils ( fixStgRegisters )
import OldCmm
-import CmmOpt ( cmmEliminateDeadBlocks, cmmMiniInline, cmmMachOpFold )
+import CmmOpt ( cmmMachOpFold )
import OldPprCmm
import CLabel
@@ -133,16 +133,17 @@ The machine-dependent bits break down as follows:
data NcgImpl statics instr jumpDest = NcgImpl {
cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
- generateJumpTableForInstr :: DynFlags -> instr -> Maybe (NatCmmDecl statics instr),
+ generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
getJumpDestBlockId :: jumpDest -> Maybe BlockId,
canShortcut :: instr -> Maybe jumpDest,
shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
- maxSpillSlots :: DynFlags -> Int,
- allocatableRegs :: Platform -> [RealReg],
+ maxSpillSlots :: Int,
+ allocatableRegs :: [RealReg],
ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
+ ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr,
ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr]
}
@@ -154,15 +155,16 @@ nativeCodeGen dflags h us cmms
nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms
x86NcgImpl = NcgImpl {
cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
,canShortcut = X86.Instr.canShortcut
,shortcutStatics = X86.Instr.shortcutStatics
,shortcutJump = X86.Instr.shortcutJump
,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
- ,maxSpillSlots = X86.Instr.maxSpillSlots
- ,allocatableRegs = X86.Regs.allocatableRegs
+ ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
+ ,allocatableRegs = X86.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
,ncgExpandTop = id
,ncgMakeFarBranches = id
}
@@ -172,30 +174,32 @@ nativeCodeGen dflags h us cmms
ArchPPC ->
nCG' $ NcgImpl {
cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
,canShortcut = PPC.RegInfo.canShortcut
,shortcutStatics = PPC.RegInfo.shortcutStatics
,shortcutJump = PPC.RegInfo.shortcutJump
,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
- ,maxSpillSlots = PPC.Instr.maxSpillSlots
- ,allocatableRegs = PPC.Regs.allocatableRegs
+ ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
+ ,allocatableRegs = PPC.Regs.allocatableRegs platform
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = noAllocMoreStack
,ncgExpandTop = id
,ncgMakeFarBranches = makeFarBranches
}
ArchSPARC ->
nCG' $ NcgImpl {
cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr
+ ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
,canShortcut = SPARC.ShortcutJump.canShortcut
,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
,shortcutJump = SPARC.ShortcutJump.shortcutJump
,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
- ,maxSpillSlots = SPARC.Instr.maxSpillSlots
- ,allocatableRegs = \_ -> SPARC.Regs.allocatableRegs
+ ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
+ ,allocatableRegs = SPARC.Regs.allocatableRegs
,ncg_x86fp_kludge = id
+ ,ncgAllocMoreStack = noAllocMoreStack
,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
,ncgMakeFarBranches = id
}
@@ -206,6 +210,23 @@ nativeCodeGen dflags h us cmms
ArchUnknown ->
panic "nativeCodeGen: No NCG for unknown arch"
+
+--
+-- Allocating more stack space for spilling is currently only
+-- supported for the linear register allocator on x86/x86_64, the rest
+-- default to the panic below. To support allocating extra stack on
+-- more platforms provide a definition of ncgAllocMoreStack.
+--
+noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr
+noAllocMoreStack amount _
+ = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
+ ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
+ ++ " is a known limitation in the linear allocator.\n"
+ ++ "\n"
+ ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
+ ++ " You can still file a bug report if you like.\n"
+
+
nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
=> DynFlags
-> NcgImpl statics instr jumpDest
@@ -419,7 +440,7 @@ cmmNativeGen dflags ncgImpl us cmm count
= foldr (\r -> plusUFM_C unionUniqSets
$ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
emptyUFM
- $ allocatableRegs ncgImpl platform
+ $ allocatableRegs ncgImpl
-- do the graph coloring register allocation
let ((alloced, regAllocStats), usAlloc)
@@ -428,7 +449,7 @@ cmmNativeGen dflags ncgImpl us cmm count
$ Color.regAlloc
dflags
alloc_regs
- (mkUniqSet [0 .. maxSpillSlots ncgImpl dflags])
+ (mkUniqSet [0 .. maxSpillSlots ncgImpl])
withLiveness
-- dump out what happened during register allocation
@@ -457,11 +478,20 @@ cmmNativeGen dflags ncgImpl us cmm count
else do
-- do linear register allocation
+ let reg_alloc proc = do
+ (alloced, maybe_more_stack, ra_stats) <-
+ Linear.regAlloc dflags proc
+ case maybe_more_stack of
+ Nothing -> return ( alloced, ra_stats )
+ Just amount ->
+ return ( ncgAllocMoreStack ncgImpl amount alloced
+ , ra_stats )
+
let ((alloced, regAllocStats), usAlloc)
= {-# SCC "RegAlloc" #-}
initUs usLive
$ liftM unzip
- $ mapM (Linear.regAlloc dflags) withLiveness
+ $ mapM reg_alloc withLiveness
dumpIfSet_dyn dflags
Opt_D_dump_asm_regalloc "Registers allocated"
@@ -490,7 +520,7 @@ cmmNativeGen dflags ncgImpl us cmm count
---- generate jump tables
let tabled =
{-# SCC "generateJumpTables" #-}
- generateJumpTables dflags ncgImpl kludged
+ generateJumpTables ncgImpl kludged
---- shortcut branches
let shorted =
@@ -711,12 +741,12 @@ makeFarBranches blocks
-- Analyzes all native code and generates data sections for all jump
-- table instructions.
generateJumpTables
- :: DynFlags -> NcgImpl statics instr jumpDest
- -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
-generateJumpTables dflags ncgImpl xs = concatMap f xs
+ :: NcgImpl statics instr jumpDest
+ -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
+generateJumpTables ncgImpl xs = concatMap f xs
where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs
f p = [p]
- g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl dflags) xs)
+ g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
-- -----------------------------------------------------------------------------
-- Shortcut branches
@@ -828,15 +858,13 @@ genMachCode dflags cmmTopCodeGen cmm_top
Here we do:
(a) Constant folding
- (b) Simple inlining: a temporary which is assigned to and then
- used, once, can be shorted.
(c) Position independent code and dynamic linking
(i) introduce the appropriate indirections
and position independent refs
(ii) compile a list of imported symbols
(d) Some arch-specific optimizations
-(a) and (b) will be moving to the new Hoopl pipeline, however, (c) and
+(a) will be moving to the new Hoopl pipeline, however, (c) and
(d) are only needed by the native backend and will continue to live
here.
@@ -851,14 +879,7 @@ Ideas for other things we could do (put these in Hoopl please!):
cmmToCmm :: DynFlags -> RawCmmDecl -> (RawCmmDecl, [CLabel])
cmmToCmm _ top@(CmmData _ _) = (top, [])
cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do
- let reachable_blocks | dopt Opt_TryNewCodeGen dflags = blocks
- | otherwise = cmmEliminateDeadBlocks blocks
- -- The new codegen path has already eliminated unreachable blocks by now
-
- inlined_blocks | dopt Opt_TryNewCodeGen dflags = reachable_blocks
- | otherwise = cmmMiniInline dflags reachable_blocks
-
- blocks' <- mapM cmmBlockConFold inlined_blocks
+ blocks' <- mapM cmmBlockConFold blocks
return $ CmmProc info lbl (ListGraph blocks')
newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
index 64ba32c6dc..86f5ae435d 100644
--- a/compiler/nativeGen/Instruction.hs
+++ b/compiler/nativeGen/Instruction.hs
@@ -163,3 +163,16 @@ class Instruction instr where
-> [instr]
+ -- Subtract an amount from the C stack pointer
+ mkStackAllocInstr
+ :: Platform -- TODO: remove (needed by x86/x86_64
+ -- because they share an Instr type)
+ -> Int
+ -> instr
+
+ -- Add an amount to the C stack pointer
+ mkStackDeallocInstr
+ :: Platform -- TODO: remove (needed by x86/x86_64
+ -- because they share an Instr type)
+ -> Int
+ -> instr
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index 464a88a08b..1f5e809abb 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -64,6 +64,8 @@ instance Instruction Instr where
mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr
takeRegRegMoveInstr = ppc_takeRegRegMoveInstr
mkJumpInstr = ppc_mkJumpInstr
+ mkStackAllocInstr = panic "no ppc_mkStackAllocInstr"
+ mkStackDeallocInstr = panic "no ppc_mkStackDeallocInstr"
-- -----------------------------------------------------------------------------
diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
index 32970336ad..f85cdb7eff 100644
--- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs
+++ b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
@@ -1,23 +1,16 @@
-{-# OPTIONS -fno-warn-missing-signatures #-}
--- | Carries interesting info for debugging / profiling of the
--- graph coloring register allocator.
-{-# OPTIONS -fno-warn-tabs #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and
--- detab the module (please do the detabbing in a separate patch). See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
--- for details
+-- | Carries interesting info for debugging / profiling of the
+-- graph coloring register allocator.
module RegAlloc.Graph.Stats (
- RegAllocStats (..),
+ RegAllocStats (..),
- pprStats,
- pprStatsSpills,
- pprStatsLifetimes,
- pprStatsConflict,
- pprStatsLifeConflict,
+ pprStats,
+ pprStatsSpills,
+ pprStatsLifetimes,
+ pprStatsConflict,
+ pprStatsLifeConflict,
- countSRMs, addSRM
+ countSRMs, addSRM
)
where
@@ -45,251 +38,260 @@ import Data.List
data RegAllocStats statics instr
- -- initial graph
- = RegAllocStatsStart
- { raLiveCmm :: [LiveCmmDecl statics instr] -- ^ initial code, with liveness
- , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the initial, uncolored graph
- , raSpillCosts :: SpillCostInfo } -- ^ information to help choose which regs to spill
-
- -- a spill stage
- | RegAllocStatsSpill
- { raCode :: [LiveCmmDecl statics instr] -- ^ the code we tried to allocate registers for
- , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the partially colored graph
- , raCoalesced :: UniqFM VirtualReg -- ^ the regs that were coaleced
- , raSpillStats :: SpillStats -- ^ spiller stats
- , raSpillCosts :: SpillCostInfo -- ^ number of instrs each reg lives for
- , raSpilled :: [LiveCmmDecl statics instr] } -- ^ code with spill instructions added
-
- -- a successful coloring
- | RegAllocStatsColored
- { raCode :: [LiveCmmDecl statics instr] -- ^ the code we tried to allocate registers for
- , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the uncolored graph
- , raGraphColored :: Color.Graph VirtualReg RegClass RealReg -- ^ the coalesced and colored graph
- , raCoalesced :: UniqFM VirtualReg -- ^ the regs that were coaleced
- , raCodeCoalesced :: [LiveCmmDecl statics instr] -- ^ code with coalescings applied
- , raPatched :: [LiveCmmDecl statics instr] -- ^ code with vregs replaced by hregs
- , raSpillClean :: [LiveCmmDecl statics instr] -- ^ code with unneeded spill\/reloads cleaned out
- , raFinal :: [NatCmmDecl statics instr] -- ^ final code
- , raSRMs :: (Int, Int, Int) } -- ^ spill\/reload\/reg-reg moves present in this code
+ -- initial graph
+ = RegAllocStatsStart
+ { raLiveCmm :: [LiveCmmDecl statics instr] -- ^ initial code, with liveness
+ , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the initial, uncolored graph
+ , raSpillCosts :: SpillCostInfo } -- ^ information to help choose which regs to spill
+
+ -- a spill stage
+ | RegAllocStatsSpill
+ { raCode :: [LiveCmmDecl statics instr] -- ^ the code we tried to allocate registers for
+ , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the partially colored graph
+ , raCoalesced :: UniqFM VirtualReg -- ^ the regs that were coaleced
+ , raSpillStats :: SpillStats -- ^ spiller stats
+ , raSpillCosts :: SpillCostInfo -- ^ number of instrs each reg lives for
+ , raSpilled :: [LiveCmmDecl statics instr] } -- ^ code with spill instructions added
+
+ -- a successful coloring
+ | RegAllocStatsColored
+ { raCode :: [LiveCmmDecl statics instr] -- ^ the code we tried to allocate registers for
+ , raGraph :: Color.Graph VirtualReg RegClass RealReg -- ^ the uncolored graph
+ , raGraphColored :: Color.Graph VirtualReg RegClass RealReg -- ^ the coalesced and colored graph
+ , raCoalesced :: UniqFM VirtualReg -- ^ the regs that were coaleced
+ , raCodeCoalesced :: [LiveCmmDecl statics instr] -- ^ code with coalescings applied
+ , raPatched :: [LiveCmmDecl statics instr] -- ^ code with vregs replaced by hregs
+ , raSpillClean :: [LiveCmmDecl statics instr] -- ^ code with unneeded spill\/reloads cleaned out
+ , raFinal :: [NatCmmDecl statics instr] -- ^ final code
+ , raSRMs :: (Int, Int, Int) } -- ^ spill\/reload\/reg-reg moves present in this code
instance (Outputable statics, Outputable instr) => Outputable (RegAllocStats statics instr) where
ppr (s@RegAllocStatsStart{}) = sdocWithPlatform $ \platform ->
- text "# Start"
- $$ text "# Native code with liveness information."
- $$ ppr (raLiveCmm s)
- $$ text ""
- $$ text "# Initial register conflict graph."
- $$ Color.dotGraph
- (targetRegDotColor platform)
- (trivColorable platform
- (targetVirtualRegSqueeze platform)
- (targetRealRegSqueeze platform))
- (raGraph s)
+ text "# Start"
+ $$ text "# Native code with liveness information."
+ $$ ppr (raLiveCmm s)
+ $$ text ""
+ $$ text "# Initial register conflict graph."
+ $$ Color.dotGraph
+ (targetRegDotColor platform)
+ (trivColorable platform
+ (targetVirtualRegSqueeze platform)
+ (targetRealRegSqueeze platform))
+ (raGraph s)
ppr (s@RegAllocStatsSpill{}) =
- text "# Spill"
+ text "# Spill"
- $$ text "# Code with liveness information."
- $$ ppr (raCode s)
- $$ text ""
+ $$ text "# Code with liveness information."
+ $$ ppr (raCode s)
+ $$ text ""
- $$ (if (not $ isNullUFM $ raCoalesced s)
- then text "# Registers coalesced."
- $$ (vcat $ map ppr $ ufmToList $ raCoalesced s)
- $$ text ""
- else empty)
+ $$ (if (not $ isNullUFM $ raCoalesced s)
+ then text "# Registers coalesced."
+ $$ (vcat $ map ppr $ ufmToList $ raCoalesced s)
+ $$ text ""
+ else empty)
- $$ text "# Spills inserted."
- $$ ppr (raSpillStats s)
- $$ text ""
+ $$ text "# Spills inserted."
+ $$ ppr (raSpillStats s)
+ $$ text ""
- $$ text "# Code with spills inserted."
- $$ ppr (raSpilled s)
+ $$ text "# Code with spills inserted."
+ $$ ppr (raSpilled s)
ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) }) = sdocWithPlatform $ \platform ->
- text "# Colored"
-
- $$ text "# Code with liveness information."
- $$ ppr (raCode s)
- $$ text ""
-
- $$ text "# Register conflict graph (colored)."
- $$ Color.dotGraph
- (targetRegDotColor platform)
- (trivColorable platform
- (targetVirtualRegSqueeze platform)
- (targetRealRegSqueeze platform))
- (raGraphColored s)
- $$ text ""
-
- $$ (if (not $ isNullUFM $ raCoalesced s)
- then text "# Registers coalesced."
- $$ (vcat $ map ppr $ ufmToList $ raCoalesced s)
- $$ text ""
- else empty)
-
- $$ text "# Native code after coalescings applied."
- $$ ppr (raCodeCoalesced s)
- $$ text ""
-
- $$ text "# Native code after register allocation."
- $$ ppr (raPatched s)
- $$ text ""
-
- $$ text "# Clean out unneeded spill/reloads."
- $$ ppr (raSpillClean s)
- $$ text ""
-
- $$ text "# Final code, after rewriting spill/rewrite pseudo instrs."
- $$ ppr (raFinal s)
- $$ text ""
- $$ text "# Score:"
- $$ (text "# spills inserted: " <> int spills)
- $$ (text "# reloads inserted: " <> int reloads)
- $$ (text "# reg-reg moves remaining: " <> int moves)
- $$ text ""
+ text "# Colored"
+
+ $$ text "# Code with liveness information."
+ $$ ppr (raCode s)
+ $$ text ""
+
+ $$ text "# Register conflict graph (colored)."
+ $$ Color.dotGraph
+ (targetRegDotColor platform)
+ (trivColorable platform
+ (targetVirtualRegSqueeze platform)
+ (targetRealRegSqueeze platform))
+ (raGraphColored s)
+ $$ text ""
+
+ $$ (if (not $ isNullUFM $ raCoalesced s)
+ then text "# Registers coalesced."
+ $$ (vcat $ map ppr $ ufmToList $ raCoalesced s)
+ $$ text ""
+ else empty)
+
+ $$ text "# Native code after coalescings applied."
+ $$ ppr (raCodeCoalesced s)
+ $$ text ""
+
+ $$ text "# Native code after register allocation."
+ $$ ppr (raPatched s)
+ $$ text ""
+
+ $$ text "# Clean out unneeded spill/reloads."
+ $$ ppr (raSpillClean s)
+ $$ text ""
+
+ $$ text "# Final code, after rewriting spill/rewrite pseudo instrs."
+ $$ ppr (raFinal s)
+ $$ text ""
+ $$ text "# Score:"
+ $$ (text "# spills inserted: " <> int spills)
+ $$ (text "# reloads inserted: " <> int reloads)
+ $$ (text "# reg-reg moves remaining: " <> int moves)
+ $$ text ""
-- | Do all the different analysis on this list of RegAllocStats
-pprStats
- :: [RegAllocStats statics instr]
- -> Color.Graph VirtualReg RegClass RealReg
- -> SDoc
-
+pprStats
+ :: [RegAllocStats statics instr]
+ -> Color.Graph VirtualReg RegClass RealReg
+ -> SDoc
+
pprStats stats graph
- = let outSpills = pprStatsSpills stats
- outLife = pprStatsLifetimes stats
- outConflict = pprStatsConflict stats
- outScatter = pprStatsLifeConflict stats graph
+ = let outSpills = pprStatsSpills stats
+ outLife = pprStatsLifetimes stats
+ outConflict = pprStatsConflict stats
+ outScatter = pprStatsLifeConflict stats graph
- in vcat [outSpills, outLife, outConflict, outScatter]
+ in vcat [outSpills, outLife, outConflict, outScatter]
-- | Dump a table of how many spill loads \/ stores were inserted for each vreg.
pprStatsSpills
- :: [RegAllocStats statics instr] -> SDoc
+ :: [RegAllocStats statics instr] -> SDoc
pprStatsSpills stats
= let
- finals = [ s | s@RegAllocStatsColored{} <- stats]
+ finals = [ s | s@RegAllocStatsColored{} <- stats]
- -- sum up how many stores\/loads\/reg-reg-moves were left in the code
- total = foldl' addSRM (0, 0, 0)
- $ map raSRMs finals
+ -- sum up how many stores\/loads\/reg-reg-moves were left in the code
+ total = foldl' addSRM (0, 0, 0)
+ $ map raSRMs finals
- in ( text "-- spills-added-total"
- $$ text "-- (stores, loads, reg_reg_moves_remaining)"
- $$ ppr total
- $$ text "")
+ in ( text "-- spills-added-total"
+ $$ text "-- (stores, loads, reg_reg_moves_remaining)"
+ $$ ppr total
+ $$ text "")
-- | Dump a table of how long vregs tend to live for in the initial code.
pprStatsLifetimes
- :: [RegAllocStats statics instr] -> SDoc
+ :: [RegAllocStats statics instr] -> SDoc
pprStatsLifetimes stats
- = let info = foldl' plusSpillCostInfo zeroSpillCostInfo
- [ raSpillCosts s
- | s@RegAllocStatsStart{} <- stats ]
+ = let info = foldl' plusSpillCostInfo zeroSpillCostInfo
+ [ raSpillCosts s
+ | s@RegAllocStatsStart{} <- stats ]
- lifeBins = binLifetimeCount $ lifeMapFromSpillCostInfo info
+ lifeBins = binLifetimeCount $ lifeMapFromSpillCostInfo info
- in ( text "-- vreg-population-lifetimes"
- $$ text "-- (instruction_count, number_of_vregs_that_lived_that_long)"
- $$ (vcat $ map ppr $ eltsUFM lifeBins)
- $$ text "\n")
+ in ( text "-- vreg-population-lifetimes"
+ $$ text "-- (instruction_count, number_of_vregs_that_lived_that_long)"
+ $$ (vcat $ map ppr $ eltsUFM lifeBins)
+ $$ text "\n")
binLifetimeCount :: UniqFM (VirtualReg, Int) -> UniqFM (Int, Int)
binLifetimeCount fm
- = let lifes = map (\l -> (l, (l, 1)))
- $ map snd
- $ eltsUFM fm
+ = let lifes = map (\l -> (l, (l, 1)))
+ $ map snd
+ $ eltsUFM fm
- in addListToUFM_C
- (\(l1, c1) (_, c2) -> (l1, c1 + c2))
- emptyUFM
- lifes
+ in addListToUFM_C
+ (\(l1, c1) (_, c2) -> (l1, c1 + c2))
+ emptyUFM
+ lifes
-- | Dump a table of how many conflicts vregs tend to have in the initial code.
pprStatsConflict
- :: [RegAllocStats statics instr] -> SDoc
+ :: [RegAllocStats statics instr] -> SDoc
pprStatsConflict stats
- = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2)))
- emptyUFM
- $ map Color.slurpNodeConflictCount
- [ raGraph s | s@RegAllocStatsStart{} <- stats ]
+ = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2)))
+ emptyUFM
+ $ map Color.slurpNodeConflictCount
+ [ raGraph s | s@RegAllocStatsStart{} <- stats ]
- in ( text "-- vreg-conflicts"
- $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)"
- $$ (vcat $ map ppr $ eltsUFM confMap)
- $$ text "\n")
+ in ( text "-- vreg-conflicts"
+ $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)"
+ $$ (vcat $ map ppr $ eltsUFM confMap)
+ $$ text "\n")
-- | For every vreg, dump it's how many conflicts it has and its lifetime
--- good for making a scatter plot.
+-- good for making a scatter plot.
pprStatsLifeConflict
- :: [RegAllocStats statics instr]
- -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph
- -> SDoc
+ :: [RegAllocStats statics instr]
+ -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph
+ -> SDoc
pprStatsLifeConflict stats graph
- = let lifeMap = lifeMapFromSpillCostInfo
- $ foldl' plusSpillCostInfo zeroSpillCostInfo
- $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ]
-
- scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of
- Just (_, l) -> l
- Nothing -> 0
- Just node = Color.lookupNode graph r
- in parens $ hcat $ punctuate (text ", ")
- [ doubleQuotes $ ppr $ Color.nodeId node
- , ppr $ sizeUniqSet (Color.nodeConflicts node)
- , ppr $ lifetime ])
- $ map Color.nodeId
- $ eltsUFM
- $ Color.graphMap graph
-
- in ( text "-- vreg-conflict-lifetime"
- $$ text "-- (vreg, vreg_conflicts, vreg_lifetime)"
- $$ (vcat scatter)
- $$ text "\n")
+ = let lifeMap = lifeMapFromSpillCostInfo
+ $ foldl' plusSpillCostInfo zeroSpillCostInfo
+ $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ]
+
+ scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of
+ Just (_, l) -> l
+ Nothing -> 0
+ Just node = Color.lookupNode graph r
+ in parens $ hcat $ punctuate (text ", ")
+ [ doubleQuotes $ ppr $ Color.nodeId node
+ , ppr $ sizeUniqSet (Color.nodeConflicts node)
+ , ppr $ lifetime ])
+ $ map Color.nodeId
+ $ eltsUFM
+ $ Color.graphMap graph
+
+ in ( text "-- vreg-conflict-lifetime"
+ $$ text "-- (vreg, vreg_conflicts, vreg_lifetime)"
+ $$ (vcat scatter)
+ $$ text "\n")
-- | Count spill/reload/reg-reg moves.
--- Lets us see how well the register allocator has done.
-countSRMs
- :: Instruction instr
- => LiveCmmDecl statics instr -> (Int, Int, Int)
+-- Lets us see how well the register allocator has done.
+countSRMs
+ :: Instruction instr
+ => LiveCmmDecl statics instr -> (Int, Int, Int)
countSRMs cmm
- = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0)
+ = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0)
+countSRM_block :: Instruction instr
+ => GenBasicBlock (LiveInstr instr)
+ -> State (Int, Int, Int) (GenBasicBlock (LiveInstr instr))
countSRM_block (BasicBlock i instrs)
- = do instrs' <- mapM countSRM_instr instrs
- return $ BasicBlock i instrs'
+ = do instrs' <- mapM countSRM_instr instrs
+ return $ BasicBlock i instrs'
+countSRM_instr :: Instruction instr
+ => LiveInstr instr -> State (Int, Int, Int) (LiveInstr instr)
countSRM_instr li
- | LiveInstr SPILL{} _ <- li
- = do modify $ \(s, r, m) -> (s + 1, r, m)
- return li
-
- | LiveInstr RELOAD{} _ <- li
- = do modify $ \(s, r, m) -> (s, r + 1, m)
- return li
-
- | LiveInstr instr _ <- li
- , Just _ <- takeRegRegMoveInstr instr
- = do modify $ \(s, r, m) -> (s, r, m + 1)
- return li
-
- | otherwise
- = return li
+ | LiveInstr SPILL{} _ <- li
+ = do modify $ \(s, r, m) -> (s + 1, r, m)
+ return li
+
+ | LiveInstr RELOAD{} _ <- li
+ = do modify $ \(s, r, m) -> (s, r + 1, m)
+ return li
+
+ | LiveInstr instr _ <- li
+ , Just _ <- takeRegRegMoveInstr instr
+ = do modify $ \(s, r, m) -> (s, r, m + 1)
+ return li
+
+ | otherwise
+ = return li
-- sigh..
+addSRM :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
addSRM (s1, r1, m1) (s2, r2, m2)
- = (s1+s2, r1+r2, m1+m2)
+ = let !s = s1 + s2
+ !r = r1 + r2
+ !m = m1 + m2
+ in (s, r, m)
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
index 3f92ed975b..a15bca07e7 100644
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs
@@ -139,22 +139,27 @@ regAlloc
:: (Outputable instr, Instruction instr)
=> DynFlags
-> LiveCmmDecl statics instr
- -> UniqSM (NatCmmDecl statics instr, Maybe RegAllocStats)
+ -> UniqSM ( NatCmmDecl statics instr
+ , Maybe Int -- number of extra stack slots required,
+ -- beyond maxSpillSlots
+ , Maybe RegAllocStats)
regAlloc _ (CmmData sec d)
= return
( CmmData sec d
+ , Nothing
, Nothing )
regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl [])
= return ( CmmProc info lbl (ListGraph [])
+ , Nothing
, Nothing )
regAlloc dflags (CmmProc static lbl sccs)
| LiveInfo info (Just first_id) (Just block_live) _ <- static
= do
-- do register allocation on each component.
- (final_blocks, stats)
+ (final_blocks, stats, stack_use)
<- linearRegAlloc dflags first_id block_live sccs
-- make sure the block that was first in the input list
@@ -162,7 +167,15 @@ regAlloc dflags (CmmProc static lbl sccs)
let ((first':_), rest')
= partition ((== first_id) . blockId) final_blocks
+ let max_spill_slots = maxSpillSlots dflags
+ extra_stack
+ | stack_use > max_spill_slots
+ = Just (stack_use - max_spill_slots)
+ | otherwise
+ = Nothing
+
return ( CmmProc info lbl (ListGraph (first' : rest'))
+ , extra_stack
, Just stats)
-- bogus. to make non-exhaustive match warning go away.
@@ -184,7 +197,7 @@ linearRegAlloc
-> BlockId -- ^ the first block
-> BlockMap RegSet -- ^ live regs on entry to each basic block
-> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock instr], RegAllocStats)
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
linearRegAlloc dflags first_id block_live sccs
= let platform = targetPlatform dflags
@@ -204,14 +217,14 @@ linearRegAlloc'
-> BlockId -- ^ the first block
-> BlockMap RegSet -- ^ live regs on entry to each basic block
-> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock instr], RegAllocStats)
+ -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
linearRegAlloc' dflags initFreeRegs first_id block_live sccs
= do us <- getUs
- let (_, _, stats, blocks) =
+ let (_, stack, stats, blocks) =
runR dflags emptyBlockMap initFreeRegs emptyRegMap (emptyStackMap dflags) us
$ linearRA_SCCs first_id block_live [] sccs
- return (blocks, stats)
+ return (blocks, stats, getStackUse stack)
linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr)
diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
index b1fc3c169e..69cf411751 100644
--- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
+++ b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
@@ -21,15 +21,13 @@ module RegAlloc.Linear.StackMap (
StackSlot,
StackMap(..),
emptyStackMap,
- getStackSlotFor
+ getStackSlotFor,
+ getStackUse
)
where
-import RegAlloc.Linear.FreeRegs
-
import DynFlags
-import Outputable
import UniqFM
import Unique
@@ -40,7 +38,7 @@ type StackSlot = Int
data StackMap
= StackMap
{ -- | The slots that are still available to be allocated.
- stackMapFreeSlots :: [StackSlot]
+ stackMapNextFreeSlot :: !Int
-- | Assignment of vregs to stack slots.
, stackMapAssignment :: UniqFM StackSlot }
@@ -48,7 +46,7 @@ data StackMap
-- | An empty stack map, with all slots available.
emptyStackMap :: DynFlags -> StackMap
-emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM
+emptyStackMap _ = StackMap 0 emptyUFM
-- | If this vreg unique already has a stack assignment then return the slot number,
@@ -56,24 +54,13 @@ emptyStackMap dflags = StackMap [0 .. maxSpillSlots dflags] emptyUFM
--
getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
-getStackSlotFor (StackMap [] _) _
-
- -- This happens all the time when trying to compile darcs' SHA1.hs, see Track #1993
- -- SHA1.lhs has also been added to the Crypto library on Hackage,
- -- so we see this all the time.
- --
- -- It would be better to automatically invoke the graph allocator, or do something
- -- else besides panicing, but that's a job for a different day. -- BL 2009/02
- --
- = panic $ "RegAllocLinear.getStackSlotFor: out of stack slots\n"
- ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
- ++ " is a known limitation in the linear allocator.\n"
- ++ "\n"
- ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
- ++ " You can still file a bug report if you like.\n"
-
-getStackSlotFor fs@(StackMap (freeSlot:stack') reserved) reg =
- case lookupUFM reserved reg of
- Just slot -> (fs, slot)
- Nothing -> (StackMap stack' (addToUFM reserved reg freeSlot), freeSlot)
+getStackSlotFor fs@(StackMap _ reserved) reg
+ | Just slot <- lookupUFM reserved reg = (fs, slot)
+
+getStackSlotFor (StackMap freeSlot reserved) reg =
+ (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
+
+-- | Return the number of stack slots that were allocated
+getStackUse :: StackMap -> Int
+getStackUse (StackMap freeSlot _) = freeSlot
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
index ac58944f1c..608f0a423b 100644
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ b/compiler/nativeGen/RegAlloc/Liveness.hs
@@ -5,8 +5,6 @@
-- (c) The University of Glasgow 2004
--
-----------------------------------------------------------------------------
-{-# OPTIONS -Wall -fno-warn-name-shadowing #-}
-
module RegAlloc.Liveness (
RegSet,
RegMap, emptyRegMap,
@@ -138,6 +136,11 @@ instance Instruction instr => Instruction (InstrSR instr) where
mkJumpInstr target = map Instr (mkJumpInstr target)
+ mkStackAllocInstr platform amount =
+ Instr (mkStackAllocInstr platform amount)
+
+ mkStackDeallocInstr platform amount =
+ Instr (mkStackDeallocInstr platform amount)
-- | An instruction with liveness information.
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
index 9404badea6..f55c660118 100644
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ b/compiler/nativeGen/SPARC/Instr.hs
@@ -108,6 +108,8 @@ instance Instruction Instr where
mkRegRegMoveInstr = sparc_mkRegRegMoveInstr
takeRegRegMoveInstr = sparc_takeRegRegMoveInstr
mkJumpInstr = sparc_mkJumpInstr
+ mkStackAllocInstr = panic "no sparc_mkStackAllocInstr"
+ mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr"
-- | SPARC instruction set.
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index b83ede89aa..fbbc37e6c9 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1214,22 +1214,22 @@ getCondCode (CmmMachOp mop [x, y])
MO_F_Lt W64 -> condFltCode LTT x y
MO_F_Le W64 -> condFltCode LE x y
- MO_Eq _ -> condIntCode EQQ x y
- MO_Ne _ -> condIntCode NE x y
+ MO_Eq _ -> condIntCode EQQ x y
+ MO_Ne _ -> condIntCode NE x y
- MO_S_Gt _ -> condIntCode GTT x y
- MO_S_Ge _ -> condIntCode GE x y
- MO_S_Lt _ -> condIntCode LTT x y
- MO_S_Le _ -> condIntCode LE x y
+ MO_S_Gt _ -> condIntCode GTT x y
+ MO_S_Ge _ -> condIntCode GE x y
+ MO_S_Lt _ -> condIntCode LTT x y
+ MO_S_Le _ -> condIntCode LE x y
MO_U_Gt _ -> condIntCode GU x y
MO_U_Ge _ -> condIntCode GEU x y
MO_U_Lt _ -> condIntCode LU x y
MO_U_Le _ -> condIntCode LEU x y
- _other -> pprPanic "getCondCode(x86,x86_64,sparc)" (ppr (CmmMachOp mop [x,y]))
+ _other -> pprPanic "getCondCode(x86,x86_64)" (ppr (CmmMachOp mop [x,y]))
-getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other)
+getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
@@ -1276,7 +1276,8 @@ condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
return (CondCode False cond code)
-- anything vs operand
-condIntCode' is32Bit cond x y | isOperand is32Bit y = do
+condIntCode' is32Bit cond x y
+ | isOperand is32Bit y = do
dflags <- getDynFlags
(x_reg, x_code) <- getNonClobberedReg x
(y_op, y_code) <- getOperand y
@@ -1284,6 +1285,17 @@ condIntCode' is32Bit cond x y | isOperand is32Bit y = do
code = x_code `appOL` y_code `snocOL`
CMP (cmmTypeSize (cmmExprType dflags x)) y_op (OpReg x_reg)
return (CondCode False cond code)
+-- operand vs. anything: invert the comparison so that we can use a
+-- single comparison instruction.
+ | isOperand is32Bit x
+ , Just revcond <- maybeFlipCond cond = do
+ dflags <- getDynFlags
+ (y_reg, y_code) <- getNonClobberedReg y
+ (x_op, x_code) <- getOperand x
+ let
+ code = y_code `appOL` x_code `snocOL`
+ CMP (cmmTypeSize (cmmExprType dflags x)) x_op (OpReg y_reg)
+ return (CondCode False revcond code)
-- anything vs anything
condIntCode' _ cond x y = do
diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs
index ce97095222..586dabd8f4 100644
--- a/compiler/nativeGen/X86/Cond.hs
+++ b/compiler/nativeGen/X86/Cond.hs
@@ -1,39 +1,32 @@
-
-{-# OPTIONS -fno-warn-tabs #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and
--- detab the module (please do the detabbing in a separate patch). See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
--- for details
-
module X86.Cond (
- Cond(..),
- condUnsigned,
- condToSigned,
- condToUnsigned
+ Cond(..),
+ condUnsigned,
+ condToSigned,
+ condToUnsigned,
+ maybeFlipCond
)
where
data Cond
- = ALWAYS -- What's really used? ToDo
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- | NEG
- | POS
- | CARRY
- | OFLO
- | PARITY
- | NOTPARITY
- deriving Eq
+ = ALWAYS -- What's really used? ToDo
+ | EQQ
+ | GE
+ | GEU
+ | GTT
+ | GU
+ | LE
+ | LEU
+ | LTT
+ | LU
+ | NE
+ | NEG
+ | POS
+ | CARRY
+ | OFLO
+ | PARITY
+ | NOTPARITY
+ deriving Eq
condUnsigned :: Cond -> Bool
condUnsigned GU = True
@@ -57,3 +50,19 @@ condToUnsigned LTT = LU
condToUnsigned GE = GEU
condToUnsigned LE = LEU
condToUnsigned x = x
+
+-- | @maybeFlipCond c@ returns @Just c'@ if it is possible to flip the
+-- arguments to the conditional @c@, and the new condition should be @c'@.
+maybeFlipCond :: Cond -> Maybe Cond
+maybeFlipCond cond = case cond of
+ EQQ -> Just EQQ
+ NE -> Just NE
+ LU -> Just GU
+ GU -> Just LU
+ LEU -> Just GEU
+ GEU -> Just LEU
+ LTT -> Just GTT
+ GTT -> Just LTT
+ LE -> Just GE
+ GE -> Just LE
+ _other -> Nothing
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 7f0e48e769..7bd9b0cc9e 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -11,7 +11,7 @@
module X86.Instr (Instr(..), Operand(..),
getJumpDestBlockId, canShortcut, shortcutStatics,
- shortcutJump, i386_insert_ffrees,
+ shortcutJump, i386_insert_ffrees, allocMoreStack,
maxSpillSlots, archWordSize)
where
@@ -58,6 +58,8 @@ instance Instruction Instr where
mkRegRegMoveInstr = x86_mkRegRegMoveInstr
takeRegRegMoveInstr = x86_takeRegRegMoveInstr
mkJumpInstr = x86_mkJumpInstr
+ mkStackAllocInstr = x86_mkStackAllocInstr
+ mkStackDeallocInstr = x86_mkStackDeallocInstr
-- -----------------------------------------------------------------------------
@@ -620,14 +622,13 @@ x86_mkSpillInstr
-> Instr
x86_mkSpillInstr dflags reg delta slot
- = let off = spillSlotToOffset dflags slot
+ = let off = spillSlotToOffset dflags slot - delta
in
- let off_w = (off - delta) `div` (if is32Bit then 4 else 8)
- in case targetClassOfReg platform reg of
+ case targetClassOfReg platform reg of
RcInteger -> MOV (archWordSize is32Bit)
- (OpReg reg) (OpAddr (spRel dflags off_w))
- RcDouble -> GST FF80 reg (spRel dflags off_w) {- RcFloat/RcDouble -}
- RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off_w))
+ (OpReg reg) (OpAddr (spRel dflags off))
+ RcDouble -> GST FF80 reg (spRel dflags off) {- RcFloat/RcDouble -}
+ RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
_ -> panic "X86.mkSpillInstr: no match"
where platform = targetPlatform dflags
is32Bit = target32Bit platform
@@ -641,14 +642,13 @@ x86_mkLoadInstr
-> Instr
x86_mkLoadInstr dflags reg delta slot
- = let off = spillSlotToOffset dflags slot
+ = let off = spillSlotToOffset dflags slot - delta
in
- let off_w = (off-delta) `div` (if is32Bit then 4 else 8)
- in case targetClassOfReg platform reg of
+ case targetClassOfReg platform reg of
RcInteger -> MOV (archWordSize is32Bit)
- (OpAddr (spRel dflags off_w)) (OpReg reg)
- RcDouble -> GLD FF80 (spRel dflags off_w) reg {- RcFloat/RcDouble -}
- RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off_w)) (OpReg reg)
+ (OpAddr (spRel dflags off)) (OpReg reg)
+ RcDouble -> GLD FF80 (spRel dflags off) reg {- RcFloat/RcDouble -}
+ RcDoubleSSE -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
_ -> panic "X86.x86_mkLoadInstr"
where platform = targetPlatform dflags
is32Bit = target32Bit platform
@@ -666,12 +666,7 @@ maxSpillSlots dflags
-- the C stack pointer.
spillSlotToOffset :: DynFlags -> Int -> Int
spillSlotToOffset dflags slot
- | slot >= 0 && slot < maxSpillSlots dflags
= 64 + spillSlotSize dflags * slot
- | otherwise
- = pprPanic "spillSlotToOffset:"
- ( text "invalid spill location: " <> int slot
- $$ text "maxSpillSlots: " <> int (maxSpillSlots dflags))
--------------------------------------------------------------------------------
@@ -744,8 +739,25 @@ x86_mkJumpInstr id
= [JXX ALWAYS id]
-
-
+x86_mkStackAllocInstr
+ :: Platform
+ -> Int
+ -> Instr
+x86_mkStackAllocInstr platform amount
+ = case platformArch platform of
+ ArchX86 -> SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
+ ArchX86_64 -> SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ _ -> panic "x86_mkStackAllocInstr"
+
+x86_mkStackDeallocInstr
+ :: Platform
+ -> Int
+ -> Instr
+x86_mkStackDeallocInstr platform amount
+ = case platformArch platform of
+ ArchX86 -> ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
+ ArchX86_64 -> ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)
+ _ -> panic "x86_mkStackDeallocInstr"
i386_insert_ffrees
:: [GenBasicBlock Instr]
@@ -753,18 +765,12 @@ i386_insert_ffrees
i386_insert_ffrees blocks
| or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
- = map ffree_before_nonlocal_transfers blocks
-
+ = map insertGFREEs blocks
| otherwise
= blocks
- where
- ffree_before_nonlocal_transfers (BasicBlock id insns)
- = BasicBlock id (foldr p [] insns)
- where p insn r = case insn of
- CALL _ _ -> GFREE : insn : r
- JMP _ _ -> GFREE : insn : r
- JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL"
- _ -> insn : r
+ where
+ insertGFREEs (BasicBlock id insns)
+ = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns)
-- if you ever add a new FP insn to the fake x86 FP insn set,
-- you must update this too
@@ -796,6 +802,57 @@ is_G_instr instr
_ -> False
+--
+-- Note [extra spill slots]
+--
+-- If the register allocator used more spill slots than we have
+-- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
+-- C stack space on entry and exit from this proc. Therefore we
+-- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
+-- before every non-local jump.
+--
+-- This became necessary when the new codegen started bundling entire
+-- functions together into one proc, because the register allocator
+-- assigns a different stack slot to each virtual reg within a proc.
+-- To avoid using so many slots we could also:
+--
+-- - split up the proc into connected components before code generator
+--
+-- - rename the virtual regs, so that we re-use vreg names and hence
+-- stack slots for non-overlapping vregs.
+--
+allocMoreStack
+ :: Platform
+ -> Int
+ -> NatCmmDecl statics X86.Instr.Instr
+ -> NatCmmDecl statics X86.Instr.Instr
+
+allocMoreStack _ _ top@(CmmData _ _) = top
+allocMoreStack platform amount (CmmProc info lbl (ListGraph code)) =
+ CmmProc info lbl (ListGraph (map insert_stack_insns code))
+ where
+ alloc = mkStackAllocInstr platform amount
+ dealloc = mkStackDeallocInstr platform amount
+
+ is_entry_point id = id `mapMember` info
+
+ insert_stack_insns (BasicBlock id insns)
+ | is_entry_point id = BasicBlock id (alloc : block')
+ | otherwise = BasicBlock id block'
+ where
+ block' = insertBeforeNonlocalTransfers dealloc insns
+
+
+insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr]
+insertBeforeNonlocalTransfers insert insns
+ = foldr p [] insns
+ where p insn r = case insn of
+ CALL _ _ -> insert : insn : r
+ JMP _ _ -> insert : insn : r
+ JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL"
+ _ -> insn : r
+
+
data JumpDest = DestBlockId BlockId | DestImm Imm
getJumpDestBlockId :: JumpDest -> Maybe BlockId
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
index 4eec96f5e1..6b2fe16855 100644
--- a/compiler/nativeGen/X86/Regs.hs
+++ b/compiler/nativeGen/X86/Regs.hs
@@ -196,13 +196,13 @@ addrModeRegs _ = []
spRel :: DynFlags
- -> Int -- ^ desired stack offset in words, positive or negative
+ -> Int -- ^ desired stack offset in bytes, positive or negative
-> AddrMode
spRel dflags n
| target32Bit (targetPlatform dflags)
- = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt (n * wORD_SIZE dflags))
+ = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt n)
| otherwise
- = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt (n * wORD_SIZE dflags))
+ = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt n)
-- The register numbers must fit into 32 bits on x86, so that we can
-- use a Word32 to represent the set of free registers in the register