summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmLayout.hs
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-02-03 15:03:06 +0000
committerSimon Marlow <marlowsd@gmail.com>2012-02-08 15:50:43 +0000
commit76999b605423f530ec17562d772eda1c1672db53 (patch)
tree3ca1208d02ca753ffe07e13fe1bdbc1388d7e9cf /compiler/codeGen/StgCmmLayout.hs
parentcd38928495f9186646273432151259f3d654b7e2 (diff)
downloadhaskell-76999b605423f530ec17562d772eda1c1672db53.tar.gz
New stack layout algorithm
Also: - improvements to code generation: push slow-call continuations on the stack instead of generating explicit continuations - remove unused CmmInfo wrapper type (replace with CmmInfoTable) - squash Area and AreaId together, remove now-unused RegSlot - comment out old unused stack-allocation code that no longer compiles after removal of RegSlot
Diffstat (limited to 'compiler/codeGen/StgCmmLayout.hs')
-rw-r--r--compiler/codeGen/StgCmmLayout.hs164
1 files changed, 140 insertions, 24 deletions
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 0299bc0f96..9ee9192794 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -15,7 +15,7 @@
module StgCmmLayout (
mkArgDescr,
- emitCall, emitReturn,
+ emitCall, emitReturn, adjustHpBackwards,
emitClosureProcAndInfoTable,
emitClosureAndInfoTable,
@@ -41,10 +41,12 @@ import StgCmmEnv
import StgCmmTicky
import StgCmmMonad
import StgCmmUtils
+import StgCmmProf
import MkGraph
import SMRep
import Cmm
+import CmmUtils
import CLabel
import StgSyn
import Id
@@ -53,6 +55,7 @@ import TyCon ( PrimRep(..) )
import BasicTypes ( Arity )
import DynFlags
import StaticFlags
+import Module
import Constants
import Util
@@ -93,10 +96,31 @@ emitCall convs@(callConv, _) fun args
; updfr_off <- getUpdFrameOff
; emitComment $ mkFastString ("emitCall: " ++ show sequel)
; case sequel of
- Return _ -> emit (mkForeignJump callConv fun args updfr_off)
- AssignTo res_regs _ -> emit =<< mkCall fun convs res_regs args updfr_off
+ Return _ ->
+ emit $ mkForeignJump callConv fun args updfr_off
+ AssignTo res_regs _ ->
+ emit =<< mkCall fun convs res_regs args updfr_off (0,[])
}
+emitCallWithExtraStack
+ :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
+-- (cgCall fun args) makes a call to the entry-code of 'fun',
+-- passing 'args', and returning the results to the current sequel
+emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
+ = do { adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel)
+ ; case sequel of
+ Return _ ->
+ emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack
+ AssignTo res_regs _ -> do
+ emit =<< mkCall fun convs res_regs args updfr_off extra_stack
+ }
+
+
+
adjustHpBackwards :: FCode ()
-- This function adjusts and heap pointers just before a tail call or
-- return. At a call or return, the virtual heap pointer may be less
@@ -128,6 +152,19 @@ adjustHpBackwards
-- Making calls: directCall and slowCall
-------------------------------------------------------------------------
+-- General plan is:
+-- - we'll make *one* fast call, either to the function itself
+-- (directCall) or to stg_ap_<pat>_fast (slowCall)
+-- Any left-over arguments will be pushed on the stack,
+--
+-- e.g. Sp[old+8] = arg1
+-- Sp[old+16] = arg2
+-- Sp[old+32] = stg_ap_pp_info
+-- R2 = arg3
+-- R3 = arg4
+-- call f() return to Nothing updfr_off: 32
+
+
directCall :: CLabel -> Arity -> [StgArg] -> FCode ()
-- (directCall f n args)
-- calls f(arg1, ..., argn), and applies the result to the remaining args
@@ -140,8 +177,19 @@ directCall lbl arity stg_args
slowCall :: CmmExpr -> [StgArg] -> FCode ()
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; slow_call fun cmm_args (argsReps stg_args) }
+ = do { dflags <- getDynFlags
+ ; cmm_args <- getNonVoidArgAmodes stg_args
+ ; let platform = targetPlatform dflags
+ ; call <- getCode $ direct_call "slow_call"
+ (mkRtsApFastLabel rts_fun) arity cmm_args reps
+ ; emitComment $ mkFastString ("slow_call for " ++
+ showSDoc (pprPlatform platform fun) ++
+ " with pat " ++ showSDoc (ftext rts_fun))
+ ; emit (mkAssign nodeReg fun <*> call)
+ }
+ where
+ reps = argsReps stg_args
+ (rts_fun, arity) = slowCallPattern reps
--------------
direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
@@ -149,7 +197,7 @@ direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
-- the args exclude the void ones
-- NB2: 'arity' refers to the *reps*
direct_call caller lbl arity args reps
- | debugIsOn && arity > length reps -- Too few args
+ | debugIsOn && arity > length reps -- Too few args
= do -- Caller should ensure that there enough args!
dflags <- getDynFlags
let platform = targetPlatform dflags
@@ -157,33 +205,77 @@ direct_call caller lbl arity args reps
<+> pprPlatform platform lbl <+> ppr (length reps)
<+> pprPlatform platform args <+> ppr reps )
- | null rest_reps -- Precisely the right number of arguments
+
+ | null rest_reps -- Precisely the right number of arguments
= emitCall (NativeDirectCall, NativeReturn) target args
- | otherwise -- Over-saturated call
+ | otherwise -- Note [over-saturated calls]
= ASSERT( arity == length initial_reps )
- do { pap_id <- newTemp gcWord
- ; withSequel (AssignTo [pap_id] True)
- (emitCall (NativeDirectCall, NativeReturn) target fast_args)
- ; slow_call (CmmReg (CmmLocal pap_id))
- rest_args rest_reps }
+ emitCallWithExtraStack (NativeDirectCall, NativeReturn)
+ target fast_args (mkStkOffsets stack_args)
where
target = CmmLit (CmmLabel lbl)
(initial_reps, rest_reps) = splitAt arity reps
arg_arity = count isNonV initial_reps
(fast_args, rest_args) = splitAt arg_arity args
-
---------------
-slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode ()
-slow_call fun args reps
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps
- emitComment $ mkFastString ("slow_call for " ++ showSDoc (pprPlatform platform fun) ++
- " with pat " ++ showSDoc (ftext rts_fun))
- emit (mkAssign nodeReg fun <*> call)
+ stack_args = slowArgs (zip rest_reps rest_args)
+
+
+{-
+Note [over-saturated calls]
+
+The natural thing to do for an over-saturated call would be to call
+the function with the correct number of arguments, and then apply the
+remaining arguments to the value returned, e.g.
+
+ f a b c d (where f has arity 2)
+ -->
+ r = call f(a,b)
+ call r(c,d)
+
+but this entails
+ - saving c and d on the stack
+ - making a continuation info table
+ - at the continuation, loading c and d off the stack into regs
+ - finally, call r
+
+Note that since there are a fixed number of different r's
+(e.g. stg_ap_pp_fast), we can also pre-compile continuations
+that correspond to each of them, rather than generating a fresh
+one for each over-saturated call.
+
+Not only does this generate much less code, it is faster too. We will
+generate something like:
+
+Sp[old+16] = c
+Sp[old+24] = d
+Sp[old+32] = stg_ap_pp_info
+call f(a,b) -- usual calling convention
+
+For the purposes of the CmmCall node, we count this extra stack as
+just more arguments that we are passing on the stack (cml_args).
+-}
+
+-- | 'slowArgs' takes a list of function arguments and prepares them for
+-- pushing on the stack for "extra" arguments to a function which requires
+-- fewer arguments than we currently have.
+slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)]
+slowArgs [] = []
+slowArgs amodes
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest
+ | otherwise = this_pat ++ slowArgs rest
where
- (rts_fun, arity) = slowCallPattern reps
+ (arg_pat, args, rest) = matchSlowPattern amodes
+ stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
+ this_pat = (N, mkLblExpr stg_ap_pat) : args
+ save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)]
+ save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
+
+matchSlowPattern :: [(ArgRep,CmmExpr)]
+ -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)])
+matchSlowPattern amodes = (arg_pat, these, rest)
+ where (arg_pat, n) = slowCallPattern (map fst amodes)
+ (these, rest) = splitAt n amodes
-- These cases were found to cover about 99% of all slow calls:
slowCallPattern :: [ArgRep] -> (FastString, Arity)
@@ -206,6 +298,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-------------------------------------------------------------------------
+-- Fix the byte-offsets of a bunch of things to push on the stack
+
+-- This is used for pushing slow-call continuations.
+-- See Note [over-saturated calls].
+
+mkStkOffsets
+ :: [(ArgRep,CmmExpr)] -- things to make offsets for
+ -> ( ByteOff -- OUTPUTS: Topmost allocated word
+ , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
+mkStkOffsets things
+ = loop 0 [] (reverse things)
+ where
+ loop offset offs [] = (offset,offs)
+ loop offset offs ((V,_):things) = loop offset offs things
+ -- ignore Void arguments
+ loop offset offs ((rep,thing):things)
+ = loop thing_off ((thing, thing_off):offs) things
+ where
+ thing_off = offset + argRepSizeW rep * wORD_SIZE
+ -- offset of thing is offset+size, because we're
+ -- growing the stack *downwards* as the offsets increase.
+
+
+-------------------------------------------------------------------------
-- Classifying arguments: ArgRep
-------------------------------------------------------------------------