summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-03-06 12:24:40 +0000
committerSimon Marlow <marlowsd@gmail.com>2012-03-06 13:28:32 +0000
commit93e42a6895d2172f40d37fd13cb7405243dc4d0f (patch)
tree61e620bdba7d5b0e37348d68ba6ab2a60959a21e /compiler/codeGen
parent9a32e71d912985a6fd8e3491518ac357f2e8686b (diff)
downloadhaskell-93e42a6895d2172f40d37fd13cb7405243dc4d0f.tar.gz
Lower safe foreign calls in the new CmmLayoutStack
We also generate much better code for safe foreign calls (and maybe also unsafe foreign calls) than previously. See the two new Notes: Note [lower safe foreign calls] Note [safe foreign call convention]
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/StgCmmEnv.hs3
-rw-r--r--compiler/codeGen/StgCmmForeign.hs111
-rw-r--r--compiler/codeGen/StgCmmLayout.hs142
-rw-r--r--compiler/codeGen/StgCmmMonad.hs15
-rw-r--r--compiler/codeGen/StgCmmPrim.hs11
5 files changed, 189 insertions, 93 deletions
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
index d8a7061eec..3b56e2feb6 100644
--- a/compiler/codeGen/StgCmmEnv.hs
+++ b/compiler/codeGen/StgCmmEnv.hs
@@ -27,7 +27,7 @@ module StgCmmEnv (
bindArgsToRegs, bindToReg, rebindToReg,
bindArgToReg, idToReg,
- getArgAmode, getNonVoidArgAmodes,
+ getArgAmode, getNonVoidArgAmodes,
getCgIdInfo,
maybeLetNoEscape,
) where
@@ -213,7 +213,6 @@ getNonVoidArgAmodes (arg:args)
; amodes <- getNonVoidArgAmodes args
; return ( amode : amodes ) }
-
------------------------------------------------------------------------
-- Interface functions for binding and re-binding names
------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index f1a522b37d..d5c9600b38 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -22,6 +22,7 @@ import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
import StgCmmClosure
+import StgCmmLayout
import BlockId
import Cmm
@@ -45,15 +46,16 @@ import Control.Monad
-- Code generation for Foreign Calls
-----------------------------------------------------------------------------
-cgForeignCall :: [LocalReg] -- r1,r2 where to put the results
- -> [ForeignHint]
- -> ForeignCall -- the op
+-- | emit code for a foreign call, and return the results to the sequel.
+--
+cgForeignCall :: ForeignCall -- the op
-> [StgArg] -- x,y arguments
+ -> Type -- result type
-> FCode ()
--- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z )
-cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args
+cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
= do { cmm_args <- getFCallArgs stg_args
+ ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
; let ((call_args, arg_hints), cmm_target)
= case target of
StaticTarget lbl mPkgId
@@ -61,7 +63,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
= case mPkgId of
Nothing -> ForeignLabelInThisPackage
Just pkgId -> ForeignLabelInPackage pkgId
- size = call_size cmm_args
+ size = call_size cmm_args
in ( unzip cmm_args
, CmmLit (CmmLabel
(mkForeignLabel lbl size labelSource IsFunction)))
@@ -69,10 +71,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
DynamicTarget -> case cmm_args of
(fn,_):rest -> (unzip rest, fn)
[] -> panic "cgForeignCall []"
- fc = ForeignConvention cconv arg_hints result_hints
+ fc = ForeignConvention cconv arg_hints res_hints
call_target = ForeignTarget cmm_target fc
- ; emitForeignCall safety results call_target call_args CmmMayReturn }
+ -- we want to emit code for the call, and then emitReturn.
+ -- However, if the sequel is AssignTo, we shortcut a little
+ -- and generate a foreign call that assigns the results
+ -- directly. Otherwise we end up generating a bunch of
+ -- useless "r = r" assignments, which are not merely annoying:
+ -- they prevent the common block elimination from working correctly
+ -- in the case of a safe foreign call.
+ -- See Note [safe foreign call convention]
+ --
+ ; sequel <- getSequel
+ ; case sequel of
+ AssignTo assign_to_these _ ->
+ do { emitForeignCall safety assign_to_these call_target
+ call_args CmmMayReturn
+ }
+
+ _something_else ->
+ do { emitForeignCall safety res_regs call_target
+ call_args CmmMayReturn
+ ; emitReturn (map (CmmReg . CmmLocal) res_regs)
+ }
+ }
where
-- in the stdcall calling convention, the symbol needs @size appended
-- to it, where size is the total number of bytes of arguments. We
@@ -83,7 +106,76 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
| otherwise = Nothing
-- ToDo: this might not be correct for 64-bit API
- arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE
+ arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg)
+ wORD_SIZE
+
+{- Note [safe foreign call convention]
+
+The simple thing to do for a safe foreign call would be the same as an
+unsafe one: just
+
+ emitForeignCall ...
+ emitReturn ...
+
+but consider what happens in this case
+
+ case foo x y z of
+ (# s, r #) -> ...
+
+The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
+as the result reg, and we generate
+
+ r = foo(x,y,z) returns to L1 -- emitForeignCall
+ L1:
+ r = r -- emitReturn
+ goto L2
+L2:
+ ...
+
+Now L1 is a proc point (by definition, it is the continuation of the
+safe foreign call). If L2 does a heap check, then L2 will also be a
+proc point.
+
+Furthermore, the stack layout algorithm has to arrange to save r
+somewhere between the call and the jump to L1, which is annoying: we
+would have to treat r differently from the other live variables, which
+have to be saved *before* the call.
+
+So we adopt a special convention for safe foreign calls: the results
+are copied out according to the NativeReturn convention by the call,
+and the continuation of the call should copyIn the results. (The
+copyOut code is actually inserted when the safe foreign call is
+lowered later). The result regs attached to the safe foreign call are
+only used temporarily to hold the results before they are copied out.
+
+We will now generate this:
+
+ r = foo(x,y,z) returns to L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+And when the safe foreign call is lowered later (see Note [lower safe
+foreign calls]) we get this:
+
+ suspendThread()
+ r = foo(x,y,z)
+ resumeThread()
+ R1 = r -- copyOut, inserted by lowerSafeForeignCall
+ jump L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+Now consider what happens if L2 does a heap check: the Adams
+optimisation kicks in and commons up L1 with the heap-check
+continuation, resulting in just one proc point instead of two. Yay!
+-}
+
emitCCall :: [(CmmFormal,ForeignHint)]
-> CmmExpr
@@ -125,6 +217,7 @@ emitForeignCall safety results target args _ret
(playInterruptible safety)
+
{-
-- THINK ABOUT THIS (used to happen)
-- we might need to load arguments into temporaries before
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 9ee9192794..16b33d1faf 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -67,13 +67,17 @@ import FastString ( mkFastString, FastString, fsLit )
-- Call and return sequences
------------------------------------------------------------------------
-emitReturn :: [CmmExpr] -> FCode ()
--- Return multiple values to the sequel
+-- | Return multiple values to the sequel
+--
+-- If the sequel is @Return@
+--
+-- > return (x,y)
--
--- If the sequel is Return
--- return (x,y)
--- If the sequel is AssignTo [p,q]
--- p=x; q=y;
+-- If the sequel is @AssignTo [p,q]@
+--
+-- > p=x; q=y;
+--
+emitReturn :: [CmmExpr] -> FCode ()
emitReturn results
= do { sequel <- getSequel;
; updfr_off <- getUpdFrameOff
@@ -87,26 +91,24 @@ emitReturn results
; emitMultiAssign regs results }
}
+
+-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
+-- using the call/return convention @conv@, passing @args@, and
+-- returning the results to the current sequel.
+--
emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ()
--- (cgCall fun args) makes a call to the entry-code of 'fun',
--- passing 'args', and returning the results to the current sequel
-emitCall convs@(callConv, _) fun args
- = do { adjustHpBackwards
- ; sequel <- getSequel
- ; updfr_off <- getUpdFrameOff
- ; emitComment $ mkFastString ("emitCall: " ++ show sequel)
- ; case sequel of
- Return _ ->
- emit $ mkForeignJump callConv fun args updfr_off
- AssignTo res_regs _ ->
- emit =<< mkCall fun convs res_regs args updfr_off (0,[])
- }
+emitCall convs fun args
+ = emitCallWithExtraStack convs fun args noExtraStack
+
+-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
+-- entry-code of @fun@, using the call/return convention @conv@,
+-- passing @args@, pushing some extra stack frames described by
+-- @stack@, and returning the results to the current sequel.
+--
emitCallWithExtraStack
:: (Convention, Convention) -> CmmExpr -> [CmmExpr]
-> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
--- (cgCall fun args) makes a call to the entry-code of 'fun',
--- passing 'args', and returning the results to the current sequel
emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
= do { adjustHpBackwards
; sequel <- getSequel
@@ -120,7 +122,6 @@ emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
}
-
adjustHpBackwards :: FCode ()
-- This function adjusts and heap pointers just before a tail call or
-- return. At a call or return, the virtual heap pointer may be less
@@ -171,55 +172,67 @@ directCall :: CLabel -> Arity -> [StgArg] -> FCode ()
-- The function f has arity n, and there are guaranteed at least n args
-- Both arity and args include void args
directCall lbl arity stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) }
+ = do { argreps <- getArgRepsAmodes stg_args
+ ; direct_call "directCall" lbl arity argreps }
+
slowCall :: CmmExpr -> [StgArg] -> FCode ()
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
= do { dflags <- getDynFlags
- ; cmm_args <- getNonVoidArgAmodes stg_args
+ ; argsreps <- getArgRepsAmodes stg_args
+ ; let (rts_fun, arity) = slowCallPattern (map fst argsreps)
; let platform = targetPlatform dflags
; call <- getCode $ direct_call "slow_call"
- (mkRtsApFastLabel rts_fun) arity cmm_args reps
+ (mkRtsApFastLabel rts_fun) arity argsreps
; emitComment $ mkFastString ("slow_call for " ++
showSDoc (pprPlatform platform fun) ++
" with pat " ++ showSDoc (ftext rts_fun))
; emit (mkAssign nodeReg fun <*> call)
}
- where
- reps = argsReps stg_args
- (rts_fun, arity) = slowCallPattern reps
+
--------------
-direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
--- NB1: (length args) may be less than (length reps), because
--- the args exclude the void ones
--- NB2: 'arity' refers to the *reps*
-direct_call caller lbl arity args reps
- | debugIsOn && arity > length reps -- Too few args
+direct_call :: String -> CLabel -> Arity -> [(ArgRep,Maybe CmmExpr)] -> FCode ()
+direct_call caller lbl arity args
+ | debugIsOn && arity > length args -- Too few args
= do -- Caller should ensure that there enough args!
dflags <- getDynFlags
let platform = targetPlatform dflags
- pprPanic "direct_call" (text caller <+> ppr arity
- <+> pprPlatform platform lbl <+> ppr (length reps)
- <+> pprPlatform platform args <+> ppr reps )
+ pprPanic "direct_call" $
+ text caller <+> ppr arity <+>
+ pprPlatform platform lbl <+> ppr (length args) <+>
+ pprPlatform platform (map snd args) <+> ppr (map fst args)
-
- | null rest_reps -- Precisely the right number of arguments
- = emitCall (NativeDirectCall, NativeReturn) target args
+ | null rest_args -- Precisely the right number of arguments
+ = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args)
| otherwise -- Note [over-saturated calls]
- = ASSERT( arity == length initial_reps )
- emitCallWithExtraStack (NativeDirectCall, NativeReturn)
- target fast_args (mkStkOffsets stack_args)
+ = emitCallWithExtraStack (NativeDirectCall, NativeReturn)
+ target (nonVArgs fast_args) (mkStkOffsets stack_args)
where
target = CmmLit (CmmLabel lbl)
- (initial_reps, rest_reps) = splitAt arity reps
- arg_arity = count isNonV initial_reps
- (fast_args, rest_args) = splitAt arg_arity args
- stack_args = slowArgs (zip rest_reps rest_args)
+ (fast_args, rest_args) = splitAt arity args
+ stack_args = slowArgs rest_args
+
+-- When constructing calls, it is easier to keep the ArgReps and the
+-- CmmExprs zipped together. However, a void argument has no
+-- representation, so we need to use Maybe CmmExpr (the alternative of
+-- using zeroCLit or even undefined would work, but would be ugly).
+--
+getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
+getArgRepsAmodes = mapM getArgRepAmode
+ where getArgRepAmode arg
+ | V <- rep = return (V, Nothing)
+ | otherwise = do expr <- getArgAmode (NonVoid arg)
+ return (rep, Just expr)
+ where rep = toArgRep (argPrimRep arg)
+
+nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
+nonVArgs [] = []
+nonVArgs ((_,Nothing) : args) = nonVArgs args
+nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
{-
Note [over-saturated calls]
@@ -259,23 +272,21 @@ just more arguments that we are passing on the stack (cml_args).
-- | 'slowArgs' takes a list of function arguments and prepares them for
-- pushing on the stack for "extra" arguments to a function which requires
-- fewer arguments than we currently have.
-slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)]
+slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
slowArgs [] = []
-slowArgs amodes
- | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest
- | otherwise = this_pat ++ slowArgs rest
+slowArgs args -- careful: reps contains voids (V), but args does not
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args
+ | otherwise = this_pat ++ slowArgs rest_args
where
- (arg_pat, args, rest) = matchSlowPattern amodes
+ (arg_pat, n) = slowCallPattern (map fst args)
+ (call_args, rest_args) = splitAt n args
+
stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
- this_pat = (N, mkLblExpr stg_ap_pat) : args
- save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)]
+ this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
+ save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
-matchSlowPattern :: [(ArgRep,CmmExpr)]
- -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)])
-matchSlowPattern amodes = (arg_pat, these, rest)
- where (arg_pat, n) = slowCallPattern (map fst amodes)
- (these, rest) = splitAt n amodes
+
-- These cases were found to cover about 99% of all slow calls:
slowCallPattern :: [ArgRep] -> (FastString, Arity)
@@ -304,16 +315,16 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-- See Note [over-saturated calls].
mkStkOffsets
- :: [(ArgRep,CmmExpr)] -- things to make offsets for
+ :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for
-> ( ByteOff -- OUTPUTS: Topmost allocated word
, [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
mkStkOffsets things
= loop 0 [] (reverse things)
where
loop offset offs [] = (offset,offs)
- loop offset offs ((V,_):things) = loop offset offs things
+ loop offset offs ((_,Nothing):things) = loop offset offs things
-- ignore Void arguments
- loop offset offs ((rep,thing):things)
+ loop offset offs ((rep,Just thing):things)
= loop thing_off ((thing, thing_off):offs) things
where
thing_off = offset + argRepSizeW rep * wORD_SIZE
@@ -357,10 +368,7 @@ isNonV :: ArgRep -> Bool
isNonV V = False
isNonV _ = True
-argsReps :: [StgArg] -> [ArgRep]
-argsReps = map (toArgRep . argPrimRep)
-
-argRepSizeW :: ArgRep -> WordOff -- Size in words
+argRepSizeW :: ArgRep -> WordOff -- Size in words
argRepSizeW N = 1
argRepSizeW P = 1
argRepSizeW F = 1
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index ccf0777906..240469c3f2 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -77,9 +77,6 @@ import Unique
import UniqSupply
import FastString
import Outputable
-import Util
-
-import Compiler.Hoopl hiding (Unique, (<*>), mkLabel, mkBranch, mkLast)
import Control.Monad
import Data.List
@@ -614,7 +611,7 @@ emitComment :: FastString -> FCode ()
#if 0 /* def DEBUG */
emitComment s = emitCgStmt (CgStmt (CmmComment s))
#else
-emitComment s = return ()
+emitComment _ = return ()
#endif
emitAssign :: CmmReg -> CmmExpr -> FCode ()
@@ -707,12 +704,16 @@ mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
-> FCode CmmAGraph
mkSafeCall t fs as upd i = do
k <- newLabelC
+ let (_off, copyout) = copyInOflow NativeReturn (Young k) fs
+ -- see Note [safe foreign call convention]
return
( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth))
(CmmLit (CmmBlock k))
- <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i})
- <*> mkLabel k)
-
+ <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k
+ , updfr=upd, intrbl=i })
+ <*> mkLabel k
+ <*> copyout
+ )
-- ----------------------------------------------------------------------------
-- CgStmts
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index c95b1f02ff..9f87271fba 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -68,14 +68,9 @@ cgOpApp :: StgOp -- The op
-- Foreign calls
cgOpApp (StgFCallOp fcall _) stg_args res_ty
- = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
- -- Choose result regs r1, r2
- -- Note [Foreign call results]
- ; cgForeignCall res_regs res_hints fcall stg_args
- -- r1, r2 = foo( x, y )
- ; emitReturn (map (CmmReg . CmmLocal) res_regs) }
- -- return (r1, r2)
-
+ = cgForeignCall fcall stg_args res_ty
+ -- Note [Foreign call results]
+
-- tagToEnum# is special: we need to pull the constructor
-- out of the table, and perform an appropriate return.