diff options
Diffstat (limited to 'compiler/codeGen/StgCmmLayout.hs')
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 238 |
1 files changed, 181 insertions, 57 deletions
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 86986efdfa..9593af1f50 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -15,7 +15,7 @@ module StgCmmLayout ( mkArgDescr, - emitCall, emitReturn, + emitCall, emitReturn, adjustHpBackwards, emitClosureProcAndInfoTable, emitClosureAndInfoTable, @@ -41,10 +41,12 @@ import StgCmmEnv import StgCmmTicky import StgCmmMonad import StgCmmUtils +import StgCmmProf import MkGraph import SMRep import Cmm +import CmmUtils import CLabel import StgSyn import Id @@ -52,6 +54,7 @@ import Name import TyCon ( PrimRep(..) ) import BasicTypes ( RepArity ) import StaticFlags +import Module import Constants import Util @@ -63,38 +66,60 @@ import FastString -- Call and return sequences ------------------------------------------------------------------------ -emitReturn :: [CmmExpr] -> FCode () --- Return multiple values to the sequel +-- | Return multiple values to the sequel +-- +-- If the sequel is @Return@ +-- +-- > return (x,y) +-- +-- If the sequel is @AssignTo [p,q]@ +-- +-- > p=x; q=y; -- --- If the sequel is Return --- return (x,y) --- If the sequel is AssignTo [p,q] --- p=x; q=y; +emitReturn :: [CmmExpr] -> FCode () emitReturn results = do { sequel <- getSequel; ; updfr_off <- getUpdFrameOff - ; emit $ mkComment $ mkFastString ("emitReturn: " ++ show sequel) + ; emitComment $ mkFastString ("emitReturn: " ++ show sequel) ; case sequel of Return _ -> do { adjustHpBackwards ; emit (mkReturnSimple results updfr_off) } AssignTo regs adjust -> do { if adjust then adjustHpBackwards else return () - ; emit (mkMultiAssign regs results) } + ; emitMultiAssign regs results } } + +-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@, +-- using the call/return convention @conv@, passing @args@, and +-- returning the results to the current sequel. +-- emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode () --- (cgCall fun args) makes a call to the entry-code of 'fun', --- passing 'args', and returning the results to the current sequel -emitCall convs@(callConv, _) fun args +emitCall convs fun args + = emitCallWithExtraStack convs fun args noExtraStack + + +-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the +-- entry-code of @fun@, using the call/return convention @conv@, +-- passing @args@, pushing some extra stack frames described by +-- @stack@, and returning the results to the current sequel. +-- +emitCallWithExtraStack + :: (Convention, Convention) -> CmmExpr -> [CmmExpr] + -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode () +emitCallWithExtraStack convs@(callConv, _) fun args extra_stack = do { adjustHpBackwards ; sequel <- getSequel ; updfr_off <- getUpdFrameOff - ; emit $ mkComment $ mkFastString ("emitCall: " ++ show sequel) - ; case sequel of - Return _ -> emit (mkForeignJump callConv fun args updfr_off) - AssignTo res_regs _ -> emit (mkCall fun convs res_regs args updfr_off) - } + ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel) + ; case sequel of + Return _ -> + emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack + AssignTo res_regs _ -> do + emit =<< mkCall fun convs res_regs args updfr_off extra_stack + } + adjustHpBackwards :: FCode () -- This function adjusts and heap pointers just before a tail call or @@ -127,59 +152,137 @@ adjustHpBackwards -- Making calls: directCall and slowCall ------------------------------------------------------------------------- +-- General plan is: +-- - we'll make *one* fast call, either to the function itself +-- (directCall) or to stg_ap_<pat>_fast (slowCall) +-- Any left-over arguments will be pushed on the stack, +-- +-- e.g. Sp[old+8] = arg1 +-- Sp[old+16] = arg2 +-- Sp[old+32] = stg_ap_pp_info +-- R2 = arg3 +-- R3 = arg4 +-- call f() return to Nothing updfr_off: 32 + + directCall :: CLabel -> RepArity -> [StgArg] -> FCode () -- (directCall f n args) -- calls f(arg1, ..., argn), and applies the result to the remaining args -- The function f has arity n, and there are guaranteed at least n args -- Both arity and args include void args directCall lbl arity stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) } + = do { argreps <- getArgRepsAmodes stg_args + ; direct_call "directCall" lbl arity argreps } + slowCall :: CmmExpr -> [StgArg] -> FCode () -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; slow_call fun cmm_args (argsReps stg_args) } + = do { dflags <- getDynFlags + ; argsreps <- getArgRepsAmodes stg_args + ; let (rts_fun, arity) = slowCallPattern (map fst argsreps) + ; call <- getCode $ direct_call "slow_call" + (mkRtsApFastLabel rts_fun) arity argsreps + ; emitComment $ mkFastString ("slow_call for " ++ + showSDoc dflags (ppr fun) ++ + " with pat " ++ unpackFS rts_fun) + ; emit (mkAssign nodeReg fun <*> call) + } + -------------- -direct_call :: String -> CLabel -> RepArity -> [CmmExpr] -> [ArgRep] -> FCode () --- NB1: (length args) may be less than (length reps), because --- the args exclude the void ones --- NB2: 'arity' refers to the *reps* -direct_call caller lbl arity args reps - | debugIsOn && arity > length reps -- Too few args +direct_call :: String -> CLabel -> RepArity -> [(ArgRep,Maybe CmmExpr)] -> FCode () +direct_call caller lbl arity args + | debugIsOn && arity > length args -- Too few args = do -- Caller should ensure that there enough args! - pprPanic "direct_call" (text caller <+> ppr arity - <+> ppr lbl <+> ppr (length reps) - <+> ppr args <+> ppr reps ) - - | null rest_reps -- Precisely the right number of arguments - = emitCall (NativeDirectCall, NativeReturn) target args - - | otherwise -- Over-saturated call - = ASSERT( arity == length initial_reps ) - do { pap_id <- newTemp gcWord - ; withSequel (AssignTo [pap_id] True) - (emitCall (NativeDirectCall, NativeReturn) target fast_args) - ; slow_call (CmmReg (CmmLocal pap_id)) - rest_args rest_reps } + pprPanic "direct_call" $ + text caller <+> ppr arity <+> + ppr lbl <+> ppr (length args) <+> + ppr (map snd args) <+> ppr (map fst args) + + | null rest_args -- Precisely the right number of arguments + = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args) + + | otherwise -- Note [over-saturated calls] + = emitCallWithExtraStack (NativeDirectCall, NativeReturn) + target (nonVArgs fast_args) (mkStkOffsets stack_args) where target = CmmLit (CmmLabel lbl) - (initial_reps, rest_reps) = splitAt arity reps - arg_arity = count isNonV initial_reps - (fast_args, rest_args) = splitAt arg_arity args + (fast_args, rest_args) = splitAt arity args + stack_args = slowArgs rest_args --------------- -slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode () -slow_call fun args reps - = do dflags <- getDynFlags - call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps - emit $ mkComment $ mkFastString ("slow_call for " ++ showSDoc dflags (ppr fun) ++ - " with pat " ++ unpackFS rts_fun) - emit (mkAssign nodeReg fun <*> call) + +-- When constructing calls, it is easier to keep the ArgReps and the +-- CmmExprs zipped together. However, a void argument has no +-- representation, so we need to use Maybe CmmExpr (the alternative of +-- using zeroCLit or even undefined would work, but would be ugly). +-- +getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)] +getArgRepsAmodes = mapM getArgRepAmode + where getArgRepAmode arg + | V <- rep = return (V, Nothing) + | otherwise = do expr <- getArgAmode (NonVoid arg) + return (rep, Just expr) + where rep = toArgRep (argPrimRep arg) + +nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr] +nonVArgs [] = [] +nonVArgs ((_,Nothing) : args) = nonVArgs args +nonVArgs ((_,Just arg) : args) = arg : nonVArgs args + +{- +Note [over-saturated calls] + +The natural thing to do for an over-saturated call would be to call +the function with the correct number of arguments, and then apply the +remaining arguments to the value returned, e.g. + + f a b c d (where f has arity 2) + --> + r = call f(a,b) + call r(c,d) + +but this entails + - saving c and d on the stack + - making a continuation info table + - at the continuation, loading c and d off the stack into regs + - finally, call r + +Note that since there are a fixed number of different r's +(e.g. stg_ap_pp_fast), we can also pre-compile continuations +that correspond to each of them, rather than generating a fresh +one for each over-saturated call. + +Not only does this generate much less code, it is faster too. We will +generate something like: + +Sp[old+16] = c +Sp[old+24] = d +Sp[old+32] = stg_ap_pp_info +call f(a,b) -- usual calling convention + +For the purposes of the CmmCall node, we count this extra stack as +just more arguments that we are passing on the stack (cml_args). +-} + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] +slowArgs [] = [] +slowArgs args -- careful: reps contains voids (V), but args does not + | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args + | otherwise = this_pat ++ slowArgs rest_args where - (rts_fun, arity) = slowCallPattern reps + (arg_pat, n) = slowCallPattern (map fst args) + (call_args, rest_args) = splitAt n args + + stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat + this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args + save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)] + save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") + + -- These cases were found to cover about 99% of all slow calls: slowCallPattern :: [ArgRep] -> (FastString, RepArity) @@ -202,6 +305,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- +-- Fix the byte-offsets of a bunch of things to push on the stack + +-- This is used for pushing slow-call continuations. +-- See Note [over-saturated calls]. + +mkStkOffsets + :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for + -> ( ByteOff -- OUTPUTS: Topmost allocated word + , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out) +mkStkOffsets things + = loop 0 [] (reverse things) + where + loop offset offs [] = (offset,offs) + loop offset offs ((_,Nothing):things) = loop offset offs things + -- ignore Void arguments + loop offset offs ((rep,Just thing):things) + = loop thing_off ((thing, thing_off):offs) things + where + thing_off = offset + argRepSizeW rep * wORD_SIZE + -- offset of thing is offset+size, because we're + -- growing the stack *downwards* as the offsets increase. + + +------------------------------------------------------------------------- -- Classifying arguments: ArgRep ------------------------------------------------------------------------- @@ -237,10 +364,7 @@ isNonV :: ArgRep -> Bool isNonV V = False isNonV _ = True -argsReps :: [StgArg] -> [ArgRep] -argsReps = map (toArgRep . argPrimRep) - -argRepSizeW :: ArgRep -> WordOff -- Size in words +argRepSizeW :: ArgRep -> WordOff -- Size in words argRepSizeW N = 1 argRepSizeW P = 1 argRepSizeW F = 1 |