diff options
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/StgCmmEnv.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 111 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 142 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmMonad.hs | 15 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 11 |
5 files changed, 189 insertions, 93 deletions
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index d8a7061eec..3b56e2feb6 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -27,7 +27,7 @@ module StgCmmEnv ( bindArgsToRegs, bindToReg, rebindToReg, bindArgToReg, idToReg, - getArgAmode, getNonVoidArgAmodes, + getArgAmode, getNonVoidArgAmodes, getCgIdInfo, maybeLetNoEscape, ) where @@ -213,7 +213,6 @@ getNonVoidArgAmodes (arg:args) ; amodes <- getNonVoidArgAmodes args ; return ( amode : amodes ) } - ------------------------------------------------------------------------ -- Interface functions for binding and re-binding names ------------------------------------------------------------------------ diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index f1a522b37d..d5c9600b38 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -22,6 +22,7 @@ import StgCmmEnv import StgCmmMonad import StgCmmUtils import StgCmmClosure +import StgCmmLayout import BlockId import Cmm @@ -45,15 +46,16 @@ import Control.Monad -- Code generation for Foreign Calls ----------------------------------------------------------------------------- -cgForeignCall :: [LocalReg] -- r1,r2 where to put the results - -> [ForeignHint] - -> ForeignCall -- the op +-- | emit code for a foreign call, and return the results to the sequel. +-- +cgForeignCall :: ForeignCall -- the op -> [StgArg] -- x,y arguments + -> Type -- result type -> FCode () --- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z ) -cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args +cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty = do { cmm_args <- getFCallArgs stg_args + ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty ; let ((call_args, arg_hints), cmm_target) = case target of StaticTarget lbl mPkgId @@ -61,7 +63,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a = case mPkgId of Nothing -> ForeignLabelInThisPackage Just pkgId -> ForeignLabelInPackage pkgId - size = call_size cmm_args + size = call_size cmm_args in ( unzip cmm_args , CmmLit (CmmLabel (mkForeignLabel lbl size labelSource IsFunction))) @@ -69,10 +71,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a DynamicTarget -> case cmm_args of (fn,_):rest -> (unzip rest, fn) [] -> panic "cgForeignCall []" - fc = ForeignConvention cconv arg_hints result_hints + fc = ForeignConvention cconv arg_hints res_hints call_target = ForeignTarget cmm_target fc - ; emitForeignCall safety results call_target call_args CmmMayReturn } + -- we want to emit code for the call, and then emitReturn. + -- However, if the sequel is AssignTo, we shortcut a little + -- and generate a foreign call that assigns the results + -- directly. Otherwise we end up generating a bunch of + -- useless "r = r" assignments, which are not merely annoying: + -- they prevent the common block elimination from working correctly + -- in the case of a safe foreign call. + -- See Note [safe foreign call convention] + -- + ; sequel <- getSequel + ; case sequel of + AssignTo assign_to_these _ -> + do { emitForeignCall safety assign_to_these call_target + call_args CmmMayReturn + } + + _something_else -> + do { emitForeignCall safety res_regs call_target + call_args CmmMayReturn + ; emitReturn (map (CmmReg . CmmLocal) res_regs) + } + } where -- in the stdcall calling convention, the symbol needs @size appended -- to it, where size is the total number of bytes of arguments. We @@ -83,7 +106,76 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a | otherwise = Nothing -- ToDo: this might not be correct for 64-bit API - arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE + arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) + wORD_SIZE + +{- Note [safe foreign call convention] + +The simple thing to do for a safe foreign call would be the same as an +unsafe one: just + + emitForeignCall ... + emitReturn ... + +but consider what happens in this case + + case foo x y z of + (# s, r #) -> ... + +The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r] +as the result reg, and we generate + + r = foo(x,y,z) returns to L1 -- emitForeignCall + L1: + r = r -- emitReturn + goto L2 +L2: + ... + +Now L1 is a proc point (by definition, it is the continuation of the +safe foreign call). If L2 does a heap check, then L2 will also be a +proc point. + +Furthermore, the stack layout algorithm has to arrange to save r +somewhere between the call and the jump to L1, which is annoying: we +would have to treat r differently from the other live variables, which +have to be saved *before* the call. + +So we adopt a special convention for safe foreign calls: the results +are copied out according to the NativeReturn convention by the call, +and the continuation of the call should copyIn the results. (The +copyOut code is actually inserted when the safe foreign call is +lowered later). The result regs attached to the safe foreign call are +only used temporarily to hold the results before they are copied out. + +We will now generate this: + + r = foo(x,y,z) returns to L1 + L1: + r = R1 -- copyIn, inserted by mkSafeCall + goto L2 + L2: + ... r ... + +And when the safe foreign call is lowered later (see Note [lower safe +foreign calls]) we get this: + + suspendThread() + r = foo(x,y,z) + resumeThread() + R1 = r -- copyOut, inserted by lowerSafeForeignCall + jump L1 + L1: + r = R1 -- copyIn, inserted by mkSafeCall + goto L2 + L2: + ... r ... + +Now consider what happens if L2 does a heap check: the Adams +optimisation kicks in and commons up L1 with the heap-check +continuation, resulting in just one proc point instead of two. Yay! +-} + emitCCall :: [(CmmFormal,ForeignHint)] -> CmmExpr @@ -125,6 +217,7 @@ emitForeignCall safety results target args _ret (playInterruptible safety) + {- -- THINK ABOUT THIS (used to happen) -- we might need to load arguments into temporaries before diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 9ee9192794..16b33d1faf 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -67,13 +67,17 @@ import FastString ( mkFastString, FastString, fsLit ) -- Call and return sequences ------------------------------------------------------------------------ -emitReturn :: [CmmExpr] -> FCode () --- Return multiple values to the sequel +-- | Return multiple values to the sequel +-- +-- If the sequel is @Return@ +-- +-- > return (x,y) -- --- If the sequel is Return --- return (x,y) --- If the sequel is AssignTo [p,q] --- p=x; q=y; +-- If the sequel is @AssignTo [p,q]@ +-- +-- > p=x; q=y; +-- +emitReturn :: [CmmExpr] -> FCode () emitReturn results = do { sequel <- getSequel; ; updfr_off <- getUpdFrameOff @@ -87,26 +91,24 @@ emitReturn results ; emitMultiAssign regs results } } + +-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@, +-- using the call/return convention @conv@, passing @args@, and +-- returning the results to the current sequel. +-- emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode () --- (cgCall fun args) makes a call to the entry-code of 'fun', --- passing 'args', and returning the results to the current sequel -emitCall convs@(callConv, _) fun args - = do { adjustHpBackwards - ; sequel <- getSequel - ; updfr_off <- getUpdFrameOff - ; emitComment $ mkFastString ("emitCall: " ++ show sequel) - ; case sequel of - Return _ -> - emit $ mkForeignJump callConv fun args updfr_off - AssignTo res_regs _ -> - emit =<< mkCall fun convs res_regs args updfr_off (0,[]) - } +emitCall convs fun args + = emitCallWithExtraStack convs fun args noExtraStack + +-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the +-- entry-code of @fun@, using the call/return convention @conv@, +-- passing @args@, pushing some extra stack frames described by +-- @stack@, and returning the results to the current sequel. +-- emitCallWithExtraStack :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode () --- (cgCall fun args) makes a call to the entry-code of 'fun', --- passing 'args', and returning the results to the current sequel emitCallWithExtraStack convs@(callConv, _) fun args extra_stack = do { adjustHpBackwards ; sequel <- getSequel @@ -120,7 +122,6 @@ emitCallWithExtraStack convs@(callConv, _) fun args extra_stack } - adjustHpBackwards :: FCode () -- This function adjusts and heap pointers just before a tail call or -- return. At a call or return, the virtual heap pointer may be less @@ -171,55 +172,67 @@ directCall :: CLabel -> Arity -> [StgArg] -> FCode () -- The function f has arity n, and there are guaranteed at least n args -- Both arity and args include void args directCall lbl arity stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) } + = do { argreps <- getArgRepsAmodes stg_args + ; direct_call "directCall" lbl arity argreps } + slowCall :: CmmExpr -> [StgArg] -> FCode () -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args = do { dflags <- getDynFlags - ; cmm_args <- getNonVoidArgAmodes stg_args + ; argsreps <- getArgRepsAmodes stg_args + ; let (rts_fun, arity) = slowCallPattern (map fst argsreps) ; let platform = targetPlatform dflags ; call <- getCode $ direct_call "slow_call" - (mkRtsApFastLabel rts_fun) arity cmm_args reps + (mkRtsApFastLabel rts_fun) arity argsreps ; emitComment $ mkFastString ("slow_call for " ++ showSDoc (pprPlatform platform fun) ++ " with pat " ++ showSDoc (ftext rts_fun)) ; emit (mkAssign nodeReg fun <*> call) } - where - reps = argsReps stg_args - (rts_fun, arity) = slowCallPattern reps + -------------- -direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode () --- NB1: (length args) may be less than (length reps), because --- the args exclude the void ones --- NB2: 'arity' refers to the *reps* -direct_call caller lbl arity args reps - | debugIsOn && arity > length reps -- Too few args +direct_call :: String -> CLabel -> Arity -> [(ArgRep,Maybe CmmExpr)] -> FCode () +direct_call caller lbl arity args + | debugIsOn && arity > length args -- Too few args = do -- Caller should ensure that there enough args! dflags <- getDynFlags let platform = targetPlatform dflags - pprPanic "direct_call" (text caller <+> ppr arity - <+> pprPlatform platform lbl <+> ppr (length reps) - <+> pprPlatform platform args <+> ppr reps ) + pprPanic "direct_call" $ + text caller <+> ppr arity <+> + pprPlatform platform lbl <+> ppr (length args) <+> + pprPlatform platform (map snd args) <+> ppr (map fst args) - - | null rest_reps -- Precisely the right number of arguments - = emitCall (NativeDirectCall, NativeReturn) target args + | null rest_args -- Precisely the right number of arguments + = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args) | otherwise -- Note [over-saturated calls] - = ASSERT( arity == length initial_reps ) - emitCallWithExtraStack (NativeDirectCall, NativeReturn) - target fast_args (mkStkOffsets stack_args) + = emitCallWithExtraStack (NativeDirectCall, NativeReturn) + target (nonVArgs fast_args) (mkStkOffsets stack_args) where target = CmmLit (CmmLabel lbl) - (initial_reps, rest_reps) = splitAt arity reps - arg_arity = count isNonV initial_reps - (fast_args, rest_args) = splitAt arg_arity args - stack_args = slowArgs (zip rest_reps rest_args) + (fast_args, rest_args) = splitAt arity args + stack_args = slowArgs rest_args + +-- When constructing calls, it is easier to keep the ArgReps and the +-- CmmExprs zipped together. However, a void argument has no +-- representation, so we need to use Maybe CmmExpr (the alternative of +-- using zeroCLit or even undefined would work, but would be ugly). +-- +getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)] +getArgRepsAmodes = mapM getArgRepAmode + where getArgRepAmode arg + | V <- rep = return (V, Nothing) + | otherwise = do expr <- getArgAmode (NonVoid arg) + return (rep, Just expr) + where rep = toArgRep (argPrimRep arg) + +nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr] +nonVArgs [] = [] +nonVArgs ((_,Nothing) : args) = nonVArgs args +nonVArgs ((_,Just arg) : args) = arg : nonVArgs args {- Note [over-saturated calls] @@ -259,23 +272,21 @@ just more arguments that we are passing on the stack (cml_args). -- | 'slowArgs' takes a list of function arguments and prepares them for -- pushing on the stack for "extra" arguments to a function which requires -- fewer arguments than we currently have. -slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)] +slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] slowArgs [] = [] -slowArgs amodes - | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest - | otherwise = this_pat ++ slowArgs rest +slowArgs args -- careful: reps contains voids (V), but args does not + | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args + | otherwise = this_pat ++ slowArgs rest_args where - (arg_pat, args, rest) = matchSlowPattern amodes + (arg_pat, n) = slowCallPattern (map fst args) + (call_args, rest_args) = splitAt n args + stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat - this_pat = (N, mkLblExpr stg_ap_pat) : args - save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)] + this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args + save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)] save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") -matchSlowPattern :: [(ArgRep,CmmExpr)] - -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)]) -matchSlowPattern amodes = (arg_pat, these, rest) - where (arg_pat, n) = slowCallPattern (map fst amodes) - (these, rest) = splitAt n amodes + -- These cases were found to cover about 99% of all slow calls: slowCallPattern :: [ArgRep] -> (FastString, Arity) @@ -304,16 +315,16 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) -- See Note [over-saturated calls]. mkStkOffsets - :: [(ArgRep,CmmExpr)] -- things to make offsets for + :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for -> ( ByteOff -- OUTPUTS: Topmost allocated word , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out) mkStkOffsets things = loop 0 [] (reverse things) where loop offset offs [] = (offset,offs) - loop offset offs ((V,_):things) = loop offset offs things + loop offset offs ((_,Nothing):things) = loop offset offs things -- ignore Void arguments - loop offset offs ((rep,thing):things) + loop offset offs ((rep,Just thing):things) = loop thing_off ((thing, thing_off):offs) things where thing_off = offset + argRepSizeW rep * wORD_SIZE @@ -357,10 +368,7 @@ isNonV :: ArgRep -> Bool isNonV V = False isNonV _ = True -argsReps :: [StgArg] -> [ArgRep] -argsReps = map (toArgRep . argPrimRep) - -argRepSizeW :: ArgRep -> WordOff -- Size in words +argRepSizeW :: ArgRep -> WordOff -- Size in words argRepSizeW N = 1 argRepSizeW P = 1 argRepSizeW F = 1 diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index ccf0777906..240469c3f2 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -77,9 +77,6 @@ import Unique import UniqSupply import FastString import Outputable -import Util - -import Compiler.Hoopl hiding (Unique, (<*>), mkLabel, mkBranch, mkLast) import Control.Monad import Data.List @@ -614,7 +611,7 @@ emitComment :: FastString -> FCode () #if 0 /* def DEBUG */ emitComment s = emitCgStmt (CgStmt (CmmComment s)) #else -emitComment s = return () +emitComment _ = return () #endif emitAssign :: CmmReg -> CmmExpr -> FCode () @@ -707,12 +704,16 @@ mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> FCode CmmAGraph mkSafeCall t fs as upd i = do k <- newLabelC + let (_off, copyout) = copyInOflow NativeReturn (Young k) fs + -- see Note [safe foreign call convention] return ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth)) (CmmLit (CmmBlock k)) - <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i}) - <*> mkLabel k) - + <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k + , updfr=upd, intrbl=i }) + <*> mkLabel k + <*> copyout + ) -- ---------------------------------------------------------------------------- -- CgStmts diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index c95b1f02ff..9f87271fba 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -68,14 +68,9 @@ cgOpApp :: StgOp -- The op -- Foreign calls cgOpApp (StgFCallOp fcall _) stg_args res_ty - = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty - -- Choose result regs r1, r2 - -- Note [Foreign call results] - ; cgForeignCall res_regs res_hints fcall stg_args - -- r1, r2 = foo( x, y ) - ; emitReturn (map (CmmReg . CmmLocal) res_regs) } - -- return (r1, r2) - + = cgForeignCall fcall stg_args res_ty + -- Note [Foreign call results] + -- tagToEnum# is special: we need to pull the constructor -- out of the table, and perform an appropriate return. |