diff options
author | Simon Marlow <marlowsd@gmail.com> | 2012-07-09 16:08:21 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2012-07-09 16:23:45 +0100 |
commit | c6a61235aa0baf6f9e8a41c5a771ccc7e32c23a5 (patch) | |
tree | a288174c5f6ab513319bd4d1dae6ef79044f5ae9 | |
parent | c9cb46bec47ada686d18437578fbc95281c9c6d4 (diff) | |
download | haskell-c6a61235aa0baf6f9e8a41c5a771ccc7e32c23a5.tar.gz |
Track liveness of GlobalRegs in the new code generator
This gives the register allocator access to R1.., F1.., D1.. etc. for
the new code generator, and is a cheap way to eliminate all the extra
"x = R1" assignments that we get from copyIn.
-rw-r--r-- | compiler/cmm/CmmCommonBlockElim.hs | 6 | ||||
-rw-r--r-- | compiler/cmm/CmmContFlowOpt.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/CmmCvt.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/CmmLayoutStack.hs | 13 | ||||
-rw-r--r-- | compiler/cmm/CmmNode.hs | 22 | ||||
-rw-r--r-- | compiler/cmm/CmmProcPoint.hs | 6 | ||||
-rw-r--r-- | compiler/cmm/CmmRewriteAssignments.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/MkGraph.hs | 29 | ||||
-rw-r--r-- | compiler/cmm/PprCmm.hs | 4 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 16 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 29 |
11 files changed, 71 insertions, 60 deletions
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs index eafa2a00f3..614edf23a2 100644 --- a/compiler/cmm/CmmCommonBlockElim.hs +++ b/compiler/cmm/CmmCommonBlockElim.hs @@ -100,7 +100,7 @@ hash_block block = hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as hash_node (CmmBranch _) = 23 -- NB. ignore the label hash_node (CmmCondBranch p _ _) = hash_e p - hash_node (CmmCall e _ _ _ _) = hash_e e + hash_node (CmmCall e _ _ _ _ _) = hash_e e hash_node (CmmForeignCall t _ _ _ _ _) = hash_tgt t hash_node (CmmSwitch e _) = hash_e e @@ -193,8 +193,8 @@ eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2 eqLastWith eqBid (CmmCondBranch c1 t1 f1) (CmmCondBranch c2 t2 f2) = c1 == c2 && eqBid t1 t2 && eqBid f1 f2 -eqLastWith eqBid (CmmCall t1 c1 a1 r1 u1) (CmmCall t2 c2 a2 r2 u2) = - t1 == t2 && eqMaybeWith eqBid c1 c2 && a1 == a2 && r1 == r2 && u1 == u2 +eqLastWith eqBid (CmmCall t1 c1 g1 a1 r1 u1) (CmmCall t2 c2 g2 a2 r2 u2) = + t1 == t2 && eqMaybeWith eqBid c1 c2 && a1 == a2 && r1 == r2 && u1 == u2 && g1 == g2 eqLastWith eqBid (CmmSwitch e1 bs1) (CmmSwitch e2 bs2) = e1 == e2 && eqListWith (eqMaybeWith eqBid) bs1 bs2 eqLastWith _ _ _ = False diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 3fabf33f97..6b6ecc81b3 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -177,7 +177,7 @@ replaceLabels env g txnode (CmmBranch bid) = CmmBranch (lookup bid) txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f) txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms) - txnode (CmmCall t k a res r) = CmmCall (exp t) (liftM lookup k) a res r + txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc) , succ = lookup (succ fc) } txnode other = mapExpDeep exp other diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index e72eee041c..204f26e24b 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -102,7 +102,7 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g | otherwise -> [Old.CmmCondBranch expr tid, Old.CmmBranch fid] CmmSwitch arg ids -> [Old.CmmSwitch arg ids] -- ToDo: STG Live - CmmCall e _ _ _ _ -> [Old.CmmJump e Nothing] + CmmCall e _ r _ _ _ -> [Old.CmmJump e (Just r)] CmmForeignCall {} -> panic "ofZgraph: CmmForeignCall" tail_of bid = case foldBlockNodesB3 (first, middle, last) block () of Old.BasicBlock _ stmts -> stmts diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index f0dce4a6a1..3ee06215bc 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -894,15 +894,16 @@ lowerSafeForeignCall block -- so we use a jump, not a branch. succLbl = CmmLit (CmmLabel (infoTblLbl succ)) - (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ) + (ret_args, regs, copyout) = copyOutOflow NativeReturn Jump (Young succ) (map (CmmReg . CmmLocal) res) updfr (0, []) - jump = CmmCall { cml_target = succLbl - , cml_cont = Just succ - , cml_args = widthInBytes wordWidth - , cml_ret_args = ret_args - , cml_ret_off = updfr } + jump = CmmCall { cml_target = succLbl + , cml_cont = Just succ + , cml_args_regs = regs + , cml_args = widthInBytes wordWidth + , cml_ret_args = ret_args + , cml_ret_off = updfr } graph' <- lgraphOfAGraph $ suspend <*> midCall <*> diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index b91546e9e6..0a5f5170f0 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -87,14 +87,14 @@ data CmmNode e x where -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or -- (CmmStackSlot (Young b) _). --- ToDO: add this: --- cml_args_regs :: [GlobalReg], --- It says which GlobalRegs are live for the parameters at the --- moment of the call. Later stages can use this to give liveness --- everywhere, which in turn guides register allocation. --- It is the companion of cml_args; cml_args says which stack words --- hold parameters, while cml_arg_regs says which global regs hold parameters. --- But do note [Register parameter passing] + cml_args_regs :: [GlobalReg], + -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed + -- to the call. This is essential information for the + -- native code generator's register allocator; without + -- knowing which GlobalRegs are live it has to assume that + -- they are all live. This list should only include + -- GlobalRegs that are mapped to real machine registers on + -- the target platform. cml_args :: ByteOff, -- Byte offset, from the *old* end of the Area associated with @@ -189,7 +189,7 @@ instance Eq (CmmNode e x) where (CmmBranch a) == (CmmBranch a') = a==a' (CmmCondBranch a b c) == (CmmCondBranch a' b' c') = a==a' && b==b' && c==c' (CmmSwitch a b) == (CmmSwitch a' b') = a==a' && b==b' - (CmmCall a b c d e) == (CmmCall a' b' c' d' e') = a==a' && b==b' && c==c' && d==d' && e==e' + (CmmCall a b c d e f) == (CmmCall a' b' c' d' e' f') = a==a' && b==b' && c==c' && d==d' && e==e' && f==f' (CmmForeignCall a b c d e f) == (CmmForeignCall a' b' c' d' e' f') = a==a' && b==b' && c==c' && d==d' && e==e' && f==f' _ == _ = False @@ -301,7 +301,7 @@ mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapFore mapExp _ l@(CmmBranch _) = l mapExp f (CmmCondBranch e ti fi) = CmmCondBranch (f e) ti fi mapExp f (CmmSwitch e tbl) = CmmSwitch (f e) tbl -mapExp f (CmmCall tgt mb_id o i s) = CmmCall (f tgt) mb_id o i s +mapExp f n@CmmCall {cml_target=tgt} = n{cml_target = f tgt} mapExp f (CmmForeignCall tgt fs as succ updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ updfr intrbl mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x @@ -327,7 +327,7 @@ mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapExpM _ (CmmBranch _) = Nothing mapExpM f (CmmCondBranch e ti fi) = (\x -> CmmCondBranch x ti fi) `fmap` f e mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e -mapExpM f (CmmCall tgt mb_id o i s) = (\x -> CmmCall x mb_id o i s) `fmap` f tgt +mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt mapExpM f (CmmUnsafeForeignCall tgt fs as) = case mapForeignTargetM f tgt of Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as)) diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index 6eb92666af..ebe40d9c9e 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -245,7 +245,8 @@ splitAtProcPoints entry_label callPPs procPoints procMap let add_jump_block (env, bs) (pp, l) = do bid <- liftM mkBlockId getUniqueM let b = blockJoin (CmmEntry bid) emptyBlock jump - jump = CmmCall (CmmLit (CmmLabel l)) Nothing 0 0 0 + jump = CmmCall (CmmLit (CmmLabel l)) Nothing [{-XXX-}] 0 0 0 + -- XXX: No regs are live at the call return (mapInsert pp bid env, b : bs) add_jumps newGraphEnv (ppId, blockEnv) = @@ -286,7 +287,8 @@ splitAtProcPoints entry_label callPPs procPoints procMap -> CmmProc (TopInfo {info_tbl=CmmNonInfoTable, stack_info=stack_info}) lbl (replacePPIds g) where - stack_info = panic "No StackInfo" + stack_info = StackInfo 0 Nothing -- panic "No StackInfo" + -- cannot use panic, this is printed by -ddump-cmmz -- References to procpoint IDs can now be replaced with the -- infotable's label diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs index cf349a0334..2f13997771 100644 --- a/compiler/cmm/CmmRewriteAssignments.hs +++ b/compiler/cmm/CmmRewriteAssignments.hs @@ -438,7 +438,7 @@ overlaps (_, o, w) (_, o', w') = in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)] -lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, invalidateVolatile k assign)] +lastAssignment (Plain (CmmCall _ (Just k) _ _ _ _)) assign = [(k, invalidateVolatile k assign)] lastAssignment (Plain (CmmForeignCall {succ=k})) assign = [(k, invalidateVolatile k assign)] lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index ecd4d4f985..443fa3a441 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -294,7 +294,7 @@ data Transfer = Call | Jump | Ret deriving Eq copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff - -> (Int, CmmAGraph) + -> (Int, [GlobalReg], CmmAGraph) -- Generate code to move the actual parameters into the locations -- required by the calling convention. This includes a store for the @@ -307,10 +307,12 @@ copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -- of the other parameters. copyOutOflow conv transfer area actuals updfr_off (extra_stack_off, extra_stack_stuff) - = foldr co (init_offset, mkNop) (args' ++ stack_params) + = foldr co (init_offset, [], mkNop) (args' ++ stack_params) where - co (v, RegisterParam r) (n, ms) = (n, mkAssign (CmmGlobal r) v <*> ms) - co (v, StackParam off) (n, ms) = (max n off, mkStore (CmmStackSlot area off) v <*> ms) + co (v, RegisterParam r) (n, rs, ms) + = (n, r:rs, mkAssign (CmmGlobal r) v <*> ms) + co (v, StackParam off) (n, rs, ms) + = (max n off, rs, mkStore (CmmStackSlot area off) v <*> ms) stack_params = [ (e, StackParam (off + init_offset)) | (e,off) <- extra_stack_stuff ] @@ -341,7 +343,7 @@ mkCallEntry conv formals = copyInOflow conv Old formals lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset - -> (ByteOff -> CmmAGraph) + -> (ByteOff -> [GlobalReg] -> CmmAGraph) -> CmmAGraph lastWithArgs transfer area conv actuals updfr_off last = lastWithArgsAndExtraStack transfer area conv actuals @@ -349,18 +351,21 @@ lastWithArgs transfer area conv actuals updfr_off last = lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)]) - -> (ByteOff -> CmmAGraph) + -> (ByteOff -> [GlobalReg] -> CmmAGraph) -> CmmAGraph lastWithArgsAndExtraStack transfer area conv actuals updfr_off extra_stack last = - let (outArgs, copies) = copyOutOflow conv transfer area actuals - updfr_off extra_stack in - copies <*> last outArgs + copies <*> last outArgs regs + where + (outArgs, regs, copies) = copyOutOflow conv transfer area actuals + updfr_off extra_stack + noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)]) noExtraStack = (0,[]) -toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff +toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff + -> ByteOff -> [GlobalReg] -> CmmAGraph -toCall e cont updfr_off res_space arg_space = - mkLast $ CmmCall e cont arg_space res_space updfr_off +toCall e cont updfr_off res_space arg_space regs = + mkLast $ CmmCall e cont regs arg_space res_space updfr_off diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index dee6ee881e..9717eea179 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -227,9 +227,9 @@ pprNode node = pp_node <+> pp_debug , ptext (sLit ": goto") , ppr (head [ id | Just id <- ids]) <> semi ] - CmmCall tgt k out res updfr_off -> + CmmCall tgt k regs out res updfr_off -> hcat [ ptext (sLit "call"), space - , pprFun tgt, ptext (sLit "(...)"), space + , pprFun tgt, parens (interpp'SP regs), space , ptext (sLit "returns to") <+> ppr k <+> parens (ppr out) <+> parens (ppr res) , ptext (sLit " with update frame") <+> ppr updfr_off diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 68bfb6d9fe..dd1abc23be 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -632,17 +632,15 @@ cgTailCall fun_id fun_info args = do -- A direct function call (possibly with some left-over arguments) DirectEntry lbl arity -> do { tickyDirectCall arity args - ; if node_points then - do emitComment $ mkFastString "directEntry" - emitAssign nodeReg fun - directCall lbl arity args - else do emitComment $ mkFastString "directEntry else" - directCall lbl arity args } + ; if node_points + then directCall NativeNodeCall lbl arity (fun_arg:args) + else directCall NativeDirectCall lbl arity args } JumpToIt {} -> panic "cgTailCall" -- ??? where - fun_name = idName fun_id + fun_arg = StgVarArg fun_id + fun_name = idName fun_id fun = idInfoToAmode fun_info lf_info = cgIdInfoLF fun_info node_points = nodeMustPointToIt lf_info @@ -693,13 +691,13 @@ emitEnter fun = do ; lcall <- newLabelC ; let area = Young lret ; let (off, copyin) = copyInOflow NativeReturn area res_regs - (outArgs, copyout) = copyOutOflow NativeNodeCall Call area + (outArgs, regs, copyout) = copyOutOflow NativeNodeCall Call area [fun] updfr_off (0,[]) -- refer to fun via nodeReg after the copyout, to avoid having -- both live simultaneously; this sometimes enables fun to be -- inlined in the RHS of the R1 assignment. ; let entry = entryCode (closureInfoPtr (CmmReg nodeReg)) - the_call = toCall entry (Just lret) updfr_off off outArgs + the_call = toCall entry (Just lret) updfr_off off outArgs regs ; emit $ copyout <*> mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*> diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 9593af1f50..9c17716b1b 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -165,14 +165,14 @@ adjustHpBackwards -- call f() return to Nothing updfr_off: 32 -directCall :: CLabel -> RepArity -> [StgArg] -> FCode () +directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode () -- (directCall f n args) -- calls f(arg1, ..., argn), and applies the result to the remaining args -- The function f has arity n, and there are guaranteed at least n args -- Both arity and args include void args -directCall lbl arity stg_args +directCall conv lbl arity stg_args = do { argreps <- getArgRepsAmodes stg_args - ; direct_call "directCall" lbl arity argreps } + ; direct_call "directCall" conv lbl arity argreps } slowCall :: CmmExpr -> [StgArg] -> FCode () @@ -181,19 +181,21 @@ slowCall fun stg_args = do { dflags <- getDynFlags ; argsreps <- getArgRepsAmodes stg_args ; let (rts_fun, arity) = slowCallPattern (map fst argsreps) - ; call <- getCode $ direct_call "slow_call" - (mkRtsApFastLabel rts_fun) arity argsreps + ; direct_call "slow_call" NativeNodeCall + (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps) ; emitComment $ mkFastString ("slow_call for " ++ showSDoc dflags (ppr fun) ++ " with pat " ++ unpackFS rts_fun) - ; emit (mkAssign nodeReg fun <*> call) } -------------- -direct_call :: String -> CLabel -> RepArity -> [(ArgRep,Maybe CmmExpr)] -> FCode () -direct_call caller lbl arity args - | debugIsOn && arity > length args -- Too few args +direct_call :: String + -> Convention -- e.g. NativeNodeCall or NativeDirectCall + -> CLabel -> RepArity + -> [(ArgRep,Maybe CmmExpr)] -> FCode () +direct_call caller call_conv lbl arity args + | debugIsOn && real_arity > length args -- Too few args = do -- Caller should ensure that there enough args! pprPanic "direct_call" $ text caller <+> ppr arity <+> @@ -201,15 +203,18 @@ direct_call caller lbl arity args ppr (map snd args) <+> ppr (map fst args) | null rest_args -- Precisely the right number of arguments - = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args) + = emitCall (call_conv, NativeReturn) target (nonVArgs args) | otherwise -- Note [over-saturated calls] - = emitCallWithExtraStack (NativeDirectCall, NativeReturn) + = emitCallWithExtraStack (call_conv, NativeReturn) target (nonVArgs fast_args) (mkStkOffsets stack_args) where target = CmmLit (CmmLabel lbl) - (fast_args, rest_args) = splitAt arity args + (fast_args, rest_args) = splitAt real_arity args stack_args = slowArgs rest_args + real_arity = case call_conv of + NativeNodeCall -> arity+1 + _ -> arity -- When constructing calls, it is easier to keep the ArgReps and the |