diff options
author | Simon Marlow <marlowsd@gmail.com> | 2013-01-07 12:26:29 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2013-01-07 15:52:22 +0000 |
commit | 03d360f289a1c7e93fedf8cfa274cbe5929cd32c (patch) | |
tree | 85cb00f070ddb852f16400b726ed2450dff34ad4 | |
parent | 7d1216ab9b7e80a860de0498854883761d6e7d33 (diff) | |
download | haskell-03d360f289a1c7e93fedf8cfa274cbe5929cd32c.tar.gz |
Fix bugs in allocMoreStack (#7498, #7510)
There were four bugs here. Clearly I didn't test this enough to
expose the bugs - it appeared to work on x86/Linux, but completely by
accident it seems.
1. the delta was wrong by a factor of the slot size (as noted on #7498)
2. we weren't correctly aligning the stack pointer (sp needs to be
16-byte aligned on x86/x86_64)
3. we were doing the adjustment multiple times in the case of a block
that was both a return point and a local branch target. To fix this I
had to add new shim blocks to adjust the stack pointer, and retarget
the original branches. See comment for details.
4. we were doing the adjustment for CALL instructions, which is
unnecessary and wrong; only JMPs should be preceded by a stack
adjustment.
(Someone with a PPC box will need to update the PPC version of
allocMoreStack to fix the above bugs, using the x86 version as a
guide.)
-rw-r--r-- | compiler/nativeGen/AsmCodeGen.lhs | 10 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Instr.hs | 7 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 124 |
3 files changed, 99 insertions, 42 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index ee83a66169..58c3c75794 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -147,7 +147,7 @@ data NcgImpl statics instr jumpDest = NcgImpl { allocatableRegs :: [RealReg], ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], - ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr, + ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr), ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr] } @@ -238,7 +238,7 @@ sparcNcgImpl dflags -- default to the panic below. To support allocating extra stack on -- more platforms provide a definition of ncgAllocMoreStack. -- -noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr +noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr) noAllocMoreStack amount _ = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n" ++ " If you are trying to compile SHA1.hs from the crypto library then this\n" @@ -518,9 +518,9 @@ cmmNativeGen dflags ncgImpl us cmm count Linear.regAlloc dflags proc case maybe_more_stack of Nothing -> return ( alloced, ra_stats ) - Just amount -> - return ( ncgAllocMoreStack ncgImpl amount alloced - , ra_stats ) + Just amount -> do + alloced' <- ncgAllocMoreStack ncgImpl amount alloced + return (alloced', ra_stats ) let ((alloced, regAllocStats), usAlloc) = {-# SCC "RegAlloc" #-} diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index 89536b1b2a..80b7556320 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -36,6 +36,7 @@ import CLabel import Outputable import Platform import FastBool +import UniqSupply -------------------------------------------------------------------------------- -- Size of a PPC memory address, in bytes. @@ -80,11 +81,11 @@ allocMoreStack :: Platform -> Int -> NatCmmDecl statics PPC.Instr.Instr - -> NatCmmDecl statics PPC.Instr.Instr + -> UniqSM (NatCmmDecl statics PPC.Instr.Instr) -allocMoreStack _ _ top@(CmmData _ _) = top +allocMoreStack _ _ top@(CmmData _ _) = return top allocMoreStack platform amount (CmmProc info lbl live (ListGraph code)) = - CmmProc info lbl live (ListGraph (map insert_stack_insns code)) + return (CmmProc info lbl live (ListGraph (map insert_stack_insns code))) where alloc = mkStackAllocInstr platform amount dealloc = mkStackDeallocInstr platform amount diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index b3ed833695..504d953303 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -36,6 +36,9 @@ import CLabel import DynFlags import UniqSet import Unique +import UniqSupply + +import Control.Monad -- Size of an x86/x86_64 memory address, in bytes. -- @@ -622,7 +625,7 @@ x86_mkSpillInstr -> Instr x86_mkSpillInstr dflags reg delta slot - = let off = spillSlotToOffset dflags slot - delta + = let off = spillSlotToOffset platform slot - delta in case targetClassOfReg platform reg of RcInteger -> MOV (archWordSize is32Bit) @@ -642,7 +645,7 @@ x86_mkLoadInstr -> Instr x86_mkLoadInstr dflags reg delta slot - = let off = spillSlotToOffset dflags slot - delta + = let off = spillSlotToOffset platform slot - delta in case targetClassOfReg platform reg of RcInteger -> MOV (archWordSize is32Bit) @@ -653,20 +656,25 @@ x86_mkLoadInstr dflags reg delta slot where platform = targetPlatform dflags is32Bit = target32Bit platform -spillSlotSize :: DynFlags -> Int +spillSlotSize :: Platform -> Int spillSlotSize dflags = if is32Bit then 12 else 8 - where is32Bit = target32Bit (targetPlatform dflags) + where is32Bit = target32Bit dflags maxSpillSlots :: DynFlags -> Int maxSpillSlots dflags - = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize dflags) - 1 + = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1 +-- = 0 -- useful for testing allocMoreStack + +-- number of bytes that the stack pointer should be aligned to +stackAlign :: Int +stackAlign = 16 -- convert a spill slot number to a *byte* offset, with no sign: -- decide on a per arch basis whether you are spilling above or below -- the C stack pointer. -spillSlotToOffset :: DynFlags -> Int -> Int -spillSlotToOffset dflags slot - = 64 + spillSlotSize dflags * slot +spillSlotToOffset :: Platform -> Int -> Int +spillSlotToOffset platform slot + = 64 + spillSlotSize platform * slot -------------------------------------------------------------------------------- @@ -772,6 +780,16 @@ i386_insert_ffrees blocks insertGFREEs (BasicBlock id insns) = BasicBlock id (insertBeforeNonlocalTransfers GFREE insns) +insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr] +insertBeforeNonlocalTransfers insert insns + = foldr p [] insns + where p insn r = case insn of + CALL _ _ -> insert : insn : r + JMP _ _ -> insert : insn : r + JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL" + _ -> insn : r + + -- if you ever add a new FP insn to the fake x86 FP insn set, -- you must update this too is_G_instr :: Instr -> Bool @@ -821,36 +839,74 @@ is_G_instr instr -- - rename the virtual regs, so that we re-use vreg names and hence -- stack slots for non-overlapping vregs. -- +-- Note that when a block is both a non-local entry point (with an +-- info table) and a local branch target, we have to split it into +-- two, like so: +-- +-- <info table> +-- L: +-- <code> +-- +-- becomes +-- +-- <info table> +-- L: +-- subl $rsp, N +-- jmp Lnew +-- Lnew: +-- <code> +-- +-- and all branches pointing to L are retargetted to point to Lnew. +-- Otherwise, we would repeat the $rsp adjustment for each branch to +-- L. +-- allocMoreStack :: Platform -> Int -> NatCmmDecl statics X86.Instr.Instr - -> NatCmmDecl statics X86.Instr.Instr - -allocMoreStack _ _ top@(CmmData _ _) = top -allocMoreStack platform amount (CmmProc info lbl live (ListGraph code)) = - CmmProc info lbl live (ListGraph (map insert_stack_insns code)) - where - alloc = mkStackAllocInstr platform amount - dealloc = mkStackDeallocInstr platform amount - - is_entry_point id = id `mapMember` info - - insert_stack_insns (BasicBlock id insns) - | is_entry_point id = BasicBlock id (alloc : block') - | otherwise = BasicBlock id block' - where - block' = insertBeforeNonlocalTransfers dealloc insns - - -insertBeforeNonlocalTransfers :: Instr -> [Instr] -> [Instr] -insertBeforeNonlocalTransfers insert insns - = foldr p [] insns - where p insn r = case insn of - CALL _ _ -> insert : insn : r - JMP _ _ -> insert : insn : r - JXX_GBL _ _ -> panic "insertBeforeNonlocalTransfers: cannot handle JXX_GBL" - _ -> insn : r + -> UniqSM (NatCmmDecl statics X86.Instr.Instr) + +allocMoreStack _ _ top@(CmmData _ _) = return top +allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do + let + infos = mapKeys info + entries = case code of + [] -> infos + BasicBlock entry _ : _ -- first block is the entry point + | entry `elem` infos -> infos + | otherwise -> entry : infos + + uniqs <- replicateM (length entries) getUniqueUs + + let + delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up + where x = slots * spillSlotSize platform -- sp delta + + alloc = mkStackAllocInstr platform delta + dealloc = mkStackDeallocInstr platform delta + + new_blockmap :: BlockEnv BlockId + new_blockmap = mapFromList (zip entries (map mkBlockId uniqs)) + + insert_stack_insns (BasicBlock id insns) + | Just new_blockid <- mapLookup id new_blockmap + = [ BasicBlock id [alloc, JXX ALWAYS new_blockid] + , BasicBlock new_blockid block' ] + | otherwise + = [ BasicBlock id block' ] + where + block' = foldr insert_dealloc [] insns + + insert_dealloc insn r = case insn of + JMP _ _ -> dealloc : insn : r + JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL" + JXX cond b | Just new_dest <- mapLookup b new_blockmap + -> JXX cond new_dest : r + _ -> insn : r + + new_code = concatMap insert_stack_insns code + -- in + return (CmmProc info lbl live (ListGraph new_code)) data JumpDest = DestBlockId BlockId | DestImm Imm |