summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmBuildInfoTables.hs89
-rw-r--r--compiler/cmm/CmmLayoutStack.hs233
-rw-r--r--compiler/cmm/MkGraph.hs15
-rw-r--r--compiler/codeGen/StgCmmEnv.hs3
-rw-r--r--compiler/codeGen/StgCmmForeign.hs111
-rw-r--r--compiler/codeGen/StgCmmLayout.hs142
-rw-r--r--compiler/codeGen/StgCmmMonad.hs15
-rw-r--r--compiler/codeGen/StgCmmPrim.hs11
8 files changed, 390 insertions, 229 deletions
diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs
index 43ff2b0758..7a396ee37b 100644
--- a/compiler/cmm/CmmBuildInfoTables.hs
+++ b/compiler/cmm/CmmBuildInfoTables.hs
@@ -17,7 +17,6 @@ module CmmBuildInfoTables
, setInfoTableSRT
, TopSRT, emptySRT, srtToData
, bundleCAFs
- , lowerSafeForeignCalls
, cafTransfers )
where
@@ -315,91 +314,3 @@ updInfoTbl toVars toSrt info_tbl@(CmmInfoTable {})
StackRep ls -> StackRep (toVars ls)
other -> other }
updInfoTbl _ _ t@CmmNonInfoTable = t
-
-----------------------------------------------------------------
--- Safe foreign calls: We need to insert the code that suspends and resumes
--- the thread before and after a safe foreign call.
--- Why do we do this so late in the pipeline?
--- Because we need this code to appear without interrruption: you can't rely on the
--- value of the stack pointer between the call and resetting the thread state;
--- you need to have an infotable on the young end of the stack both when
--- suspending the thread and making the foreign call.
--- All of this is much easier if we insert the suspend and resume calls here.
-
--- At the same time, we prepare for the stages of the compiler that
--- build the proc points. We have to do this at the same time because
--- the safe foreign calls need special treatment with respect to infotables.
--- A safe foreign call needs an infotable even though it isn't
--- a procpoint. The following datatype captures the information
--- needed to generate the infotables along with the Cmm data and procedures.
-
--- JD: Why not do this while splitting procedures?
-lowerSafeForeignCalls :: AreaMap -> CmmDecl -> FuelUniqSM CmmDecl
-lowerSafeForeignCalls _ t@(CmmData _ _) = return t
-lowerSafeForeignCalls areaMap (CmmProc info l g@(CmmGraph {g_entry=entry})) = do
- let block b mblocks = mblocks >>= lowerSafeCallBlock entry areaMap b
- blocks <- foldGraphBlocks block (return mapEmpty) g
- return $ CmmProc info l (ofBlockMap entry blocks)
-
--- If the block ends with a safe call in the block, lower it to an unsafe
--- call (with appropriate saves and restores before and after).
-lowerSafeCallBlock :: BlockId -> AreaMap -> CmmBlock -> BlockEnv CmmBlock
- -> FuelUniqSM (BlockEnv CmmBlock)
-lowerSafeCallBlock entry areaMap b blocks =
- case blockToNodeList b of
- (JustC (CmmEntry id), m, JustC l@(CmmForeignCall {})) -> lowerSafeForeignCall entry areaMap blocks id m l
- _ -> return $ insertBlock b blocks
-
--- Late in the code generator, we want to insert the code necessary
--- to lower a safe foreign call to a sequence of unsafe calls.
-lowerSafeForeignCall :: BlockId -> AreaMap -> BlockEnv CmmBlock -> BlockId -> [CmmNode O O] -> CmmNode O C
- -> FuelUniqSM (BlockEnv CmmBlock)
-lowerSafeForeignCall entry areaMap blocks bid m
- (CmmForeignCall {tgt=tgt, res=rs, args=as, succ=succ, updfr = updfr_off, intrbl = intrbl}) =
- do let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
- -- Both 'id' and 'new_base' are KindNonPtr because they're
- -- RTS-only objects and are not subject to garbage collection
- id <- newTemp bWord
- new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
- let (caller_save, caller_load) = callerSaveVolatileRegs
- load_tso <- newTemp gcWord -- TODO FIXME NOW
- load_stack <- newTemp gcWord -- TODO FIXME NOW
- let (<**>) = (M.<*>)
- let suspendThread = foreignLbl "suspendThread"
- resumeThread = foreignLbl "resumeThread"
- foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit name)))
- suspend = saveThreadState <**>
- caller_save <**>
- mkUnsafeCall (ForeignTarget suspendThread
- (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
- [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]
- midCall = mkUnsafeCall tgt rs as
- resume = mkUnsafeCall (ForeignTarget resumeThread
- (ForeignConvention CCallConv [AddrHint] [AddrHint]))
- [new_base] [CmmReg (CmmLocal id)] <**>
- -- Assign the result to BaseReg: we
- -- might now have a different Capability!
- mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <**>
- caller_load <**>
- loadThreadState load_tso load_stack
- -- We have to save the return value on the stack because its next use
- -- may appear in a different procedure due to procpoint splitting...
- saveRetVals = foldl (<**>) mkNop $ map (M.mkMiddle . spill) rs
- spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
- regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset)
- where offset = w + expectJust "lowerForeign" Nothing -- XXX need to fix this: (Map.lookup (RegSlot r) areaMap)
- sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup area areaMap)
- area = if succ == entry then Old else Young succ
- w = widthInBytes $ typeWidth $ localRegType r
- -- Note: The successor must be a procpoint, and we have already split,
- -- so we use a jump, not a branch.
- succLbl = CmmLit (CmmLabel (infoTblLbl succ))
- jump = CmmCall { cml_target = succLbl, cml_cont = Nothing
- , cml_args = widthInBytes wordWidth ,cml_ret_args = 0
- , cml_ret_off = updfr_off}
- graph' <- liftUniq $ labelAGraph bid $ catAGraphs (map M.mkMiddle m) <**>
- suspend <**> midCall <**>
- resume <**> saveRetVals <**> M.mkLast jump
- return $ blocks `mapUnion` toBlockMap graph'
-lowerSafeForeignCall _ _ _ _ _ _ = panic "lowerSafeForeignCall was passed something else"
-
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index 9a382c0557..87f495aa72 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -3,13 +3,20 @@ module CmmLayoutStack (
cmmLayoutStack, setInfoTableStackMap
) where
+import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
+import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX
+
import Cmm
import BlockId
+import CLabel
import CmmUtils
+import MkGraph
+import Module
+import ForeignCall
import CmmLive
import CmmProcPoint
import SMRep
-import Hoopl
+import Hoopl hiding ((<*>), mkLast, mkMiddle)
import OptimizationFuel
import Constants
import UniqSupply
@@ -177,31 +184,49 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
-- a proc point, we must save the live variables, adjust Sp, and
-- construct the StackMaps for each of the successor blocks.
-- See handleLastNode for details.
- (saves, out, sp_off, last1, fixup_blocks)
+ (middle2, sp_off, middle3, last1, fixup_blocks, out)
<- handleLastNode procpoints liveness cont_info
acc_stackmaps stack1 last0
- let hwm' = maximum (acc_hwm : map sm_sp (mapElems out))
- middle2 = maybeAddSpAdj sp_off $ foldl blockSnoc middle1 saves
-
- area_off = getAreaOff final_stackmaps
-
- -- manifest Sp: turn all CmmStackSlots into actual loads
- adj_middle = mapExpDeep (areaToSp sp0 sp_high area_off)
- adj_last = optStackCheck .
- mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
-
- middle3 = blockFromList $
- map adj_middle $
- elimStackStores stack0 final_stackmaps area_off $
- blockToList middle2
-
- newblock = blockJoin entry0 middle3 (adj_last last1)
-
- fixup_blocks' = map (blockMapNodes3 (id, adj_middle, id)) fixup_blocks
+ -- our block:
+ -- middle1 -- the original middle nodes
+ -- middle2 -- live variable saves from handleLastNode
+ -- Sp = Sp + sp_off -- Sp adjustment goes here
+ -- middle3 -- some more middle nodes from handleLastNode
+ -- last1 -- the last node
+ --
+ -- The next step is to manifest Sp: turn all the CmmStackSlots
+ -- into CmmLoads from Sp. The adjustment for middle1/middle2
+ -- will be different from that for middle3/last1, because the
+ -- Sp adjustment intervenes.
+ --
+ let area_off = getAreaOff final_stackmaps
+
+ adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
+ adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off)
+ adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
+
+ middle_pre = maybeAddSpAdj sp_off $
+ blockFromList $
+ map adj_pre_sp $
+ elimStackStores stack0 final_stackmaps area_off $
+ blockToList $
+ foldl blockSnoc middle1 middle2
+
+ middle_post = map adj_post_sp middle3
+
+ final_middle = foldl blockSnoc middle_pre middle_post
+ final_last = optStackCheck (adj_post_sp last1)
+
+ newblock = blockJoin entry0 final_middle final_last
+
+ fixup_blocks' = map (blockMapNodes3 (id, adj_post_sp, id))
+ fixup_blocks
stackmaps' = mapUnion acc_stackmaps out
+ hwm' = maximum (acc_hwm : map sm_sp (mapElems out))
+
pprTrace "layout(out)" (ppr out) $ return ()
go bs stackmaps' hwm' (newblock : fixup_blocks' ++ acc_blocks)
@@ -292,16 +317,33 @@ getStackLoc (Young l) n stackmaps =
-- -----------------------------------------------------------------------------
-- Handling stack allocation for a last node
+-- We take a single last node and turn it into:
+--
+-- C1 (some statements)
+-- Sp = Sp + N
+-- C2 (some more statements)
+-- call f() -- the actual last node
+--
+-- plus possibly some more blocks (we may have to add some fixup code
+-- between the last node and the continuation).
+--
+-- C1: is the code for saving the variables across this last node onto
+-- the stack, if the continuation is a call or jumps to a proc point.
+--
+-- C2: if the last node is a safe foreign call, we have to inject some
+-- extra code that goes *after* the Sp adjustment.
+
handleLastNode
:: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
-> BlockEnv StackMap -> StackMap
-> CmmNode O C
-> UniqSM
- ( [CmmNode O O] -- assignments to save live variables
- , BlockEnv StackMap -- stackmaps for the continuations
- , ByteOff -- amount to adjust Sp before the jump
+ ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
+ , ByteOff -- amount to adjust Sp
+ , [CmmNode O O] -- nodes to go *after* the Sp adjustment
, CmmNode O C -- new last node
, [CmmBlock] -- new blocks
+ , BlockEnv StackMap -- stackmaps for the continuations
)
handleLastNode procpoints liveness cont_info stackmaps
@@ -312,39 +354,45 @@ handleLastNode procpoints liveness cont_info stackmaps
-- is cml_args, after popping any other junk from the stack.
CmmCall{ cml_cont = Nothing, .. } -> do
let sp_off = sp0 - cml_args
- return ([], mapEmpty, sp_off, last, [])
+ return ([], sp_off, [], last, [], mapEmpty)
-- At each CmmCall with a continuation:
CmmCall{ cml_cont = Just cont_lbl, .. } ->
- lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+ lastCall cont_lbl [] cml_args cml_ret_args cml_ret_off
- CmmForeignCall{ succ = cont_lbl, .. } ->
- lastCall cont_lbl 0{-no args-} 0{-no results-} (sm_ret_off stack0)
+ CmmForeignCall{ succ = cont_lbl, .. } -> do
+ (mids, spoff, _, last', blocks, stackmap') <-
+ lastCall cont_lbl res wORD_SIZE wORD_SIZE (sm_ret_off stack0)
+ -- one word each for args and results: the return address
+ (extra_mids, last'') <- lowerSafeForeignCall last'
+ return (mids, spoff, extra_mids, last'', blocks, stackmap')
CmmBranch{..} -> handleProcPoints
CmmCondBranch{..} -> handleProcPoints
CmmSwitch{..} -> handleProcPoints
where
- lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
+ lastCall :: BlockId -> [LocalReg] -> ByteOff -> ByteOff -> ByteOff
-> UniqSM
( [CmmNode O O]
- , BlockEnv StackMap
, ByteOff
+ , [CmmNode O O]
, CmmNode O C
, [CmmBlock]
+ , BlockEnv StackMap
)
- lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+ lastCall cont_lbl res_regs cml_args cml_ret_args cml_ret_off
-- If we have already seen this continuation before, then
-- we just have to make the stack look the same:
| Just cont_stack <- mapLookup cont_lbl stackmaps
=
return ( fixupStack stack0 cont_stack
- , stackmaps
, sp0 - sm_sp cont_stack
+ , []
, last
- , [] )
+ , []
+ , stackmaps )
-- a continuation we haven't seen before:
-- allocate the stack frame for it.
@@ -353,6 +401,7 @@ handleLastNode procpoints liveness cont_info stackmaps
-- get the set of LocalRegs live in the continuation
let target_live = mapFindWithDefault Set.empty cont_lbl
liveness
+ `Set.difference` Set.fromList res_regs
-- the stack from the base to cml_ret_off is off-limits.
-- our new stack frame contains:
@@ -382,18 +431,19 @@ handleLastNode procpoints liveness cont_info stackmaps
-- emit an Sp adjustment, taking into account the call area
--
return ( assigs
- , mapSingleton cont_lbl cont_stack
, sp_off
+ , []
, last
, [] -- no new blocks
- )
+ , mapSingleton cont_lbl cont_stack )
handleProcPoints :: UniqSM ( [CmmNode O O]
- , BlockEnv StackMap
, ByteOff
+ , [CmmNode O O]
, CmmNode O C
- , [CmmBlock] )
+ , [CmmBlock]
+ , BlockEnv StackMap )
handleProcPoints = do
pps <- mapM handleProcPoint (successors last)
@@ -401,10 +451,11 @@ handleLastNode procpoints liveness cont_info stackmaps
lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
fix_lbl l = mapLookup l lbl_map `orElse` l
return ( []
- , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ]
, 0
+ , []
, mapSuccessors fix_lbl last
- , concat [ blk | (_,_,_,blk) <- pps ] )
+ , concat [ blk | (_,_,_,blk) <- pps ]
+ , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
-- For each proc point that is a successor of this block
-- (a) if the proc point already has a stackmap, we need to
@@ -642,6 +693,112 @@ stackMapToLiveness StackMap{..} =
-- -----------------------------------------------------------------------------
+-- Lowering safe foreign calls
+
+{-
+Note [lower safe foreign calls]
+
+We start with
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | r1 = foo(x,y,z) returns to L1
+ '-----------------------
+ L1:
+ R1 = r1 -- copyIn, inserted by mkSafeCall
+ ...
+
+the stack layout algorithm will arrange to save and reload everything
+live across the call. Our job now is to expand the call so we get
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | SAVE_THREAD_STATE()
+ | token = suspendThread(BaseReg, interruptible)
+ | r = foo(x,y,z)
+ | BaseReg = resumeThread(token)
+ | LOAD_THREAD_STATE()
+ | R1 = r -- copyOut
+ | jump L1
+ '-----------------------
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ ...
+
+Note the copyOut, which saves the results in the places that L1 is
+expecting them (see Note {safe foreign call convention]).
+-}
+
+
+lowerSafeForeignCall :: CmmNode O C -> UniqSM ([CmmNode O O], CmmNode O C)
+lowerSafeForeignCall CmmForeignCall { .. } =
+ do let
+ -- Both 'id' and 'new_base' are KindNonPtr because they're
+ -- RTS-only objects and are not subject to garbage collection
+ id <- newTemp bWord
+ new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
+ let (caller_save, caller_load) = callerSaveVolatileRegs
+ load_tso <- newTemp gcWord
+ load_stack <- newTemp gcWord
+ let suspend = saveThreadState <*>
+ caller_save <*>
+ mkMiddle (callSuspendThread id intrbl)
+ midCall = mkUnsafeCall tgt res args
+ resume = mkMiddle (callResumeThread new_base id) <*>
+ -- Assign the result to BaseReg: we
+ -- might now have a different Capability!
+ mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
+ caller_load <*>
+ loadThreadState load_tso load_stack
+ -- Note: The successor must be a procpoint, and we have already split,
+ -- so we use a jump, not a branch.
+ succLbl = CmmLit (CmmLabel (infoTblLbl succ))
+
+ (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ)
+ (map (CmmReg . CmmLocal) res)
+ updfr (0, [])
+
+ jump = CmmCall { cml_target = succLbl
+ , cml_cont = Just succ
+ , cml_args = widthInBytes wordWidth
+ , cml_ret_args = ret_args
+ , cml_ret_off = updfr }
+
+ graph' <- lgraphOfAGraph $ suspend <*>
+ midCall <*>
+ resume <*>
+ copyout <*>
+ mkLast jump
+
+ case toBlockList graph' of
+ [one] -> let (_, middle, last) = blockSplit one
+ in return (blockToList middle, last)
+ _ -> panic "lowerSafeForeignCall0"
+
+lowerSafeForeignCall _ = panic "lowerSafeForeignCall1"
+
+
+foreignLbl :: FastString -> CmmExpr
+foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
+
+newTemp :: CmmType -> UniqSM LocalReg
+newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
+
+callSuspendThread :: LocalReg -> Bool -> CmmNode O O
+callSuspendThread id intrbl =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "suspendThread"))
+ (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
+ [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]
+
+callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
+callResumeThread new_base id =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "resumeThread"))
+ (ForeignConvention CCallConv [AddrHint] [AddrHint]))
+ [new_base] [CmmReg (CmmLocal id)]
+
+-- -----------------------------------------------------------------------------
plusW :: ByteOff -> WordOff -> ByteOff
plusW b w = b + w * wORD_SIZE
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 922f31e45a..797b785de2 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -12,6 +12,7 @@ module MkGraph
, mkCbranch, mkSwitch
, mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch
, copyInOflow, copyOutOflow
+ , noExtraStack
, toCall, Transfer(..)
)
where
@@ -188,8 +189,7 @@ mkJumpGC e actuals updfr_off =
mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset
-> CmmAGraph
mkForeignJump conv e actuals updfr_off =
- lastWithArgs Jump Old conv actuals updfr_off $
- toCall e Nothing updfr_off 0
+ mkForeignJumpExtra conv e actuals updfr_off noExtraStack
mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual]
-> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)])
@@ -208,13 +208,11 @@ mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkReturn e actuals updfr_off =
lastWithArgs Ret Old NativeReturn actuals updfr_off $
toCall e Nothing updfr_off 0
- -- where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkReturnSimple actuals updfr_off =
- lastWithArgs Ret Old NativeReturn actuals updfr_off $
- toCall e Nothing updfr_off 0
- where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
+ mkReturn e actuals updfr_off
+ where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
mkBranch :: BlockId -> CmmAGraph
mkBranch bid = mkLast (CmmBranch bid)
@@ -346,9 +344,8 @@ lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual]
-> (ByteOff -> CmmAGraph)
-> CmmAGraph
lastWithArgs transfer area conv actuals updfr_off last =
- let (outArgs, copies) = copyOutOflow conv transfer area actuals
- updfr_off noExtraStack in
- copies <*> last outArgs
+ lastWithArgsAndExtraStack transfer area conv actuals
+ updfr_off noExtraStack last
lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual]
-> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)])
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
index d8a7061eec..3b56e2feb6 100644
--- a/compiler/codeGen/StgCmmEnv.hs
+++ b/compiler/codeGen/StgCmmEnv.hs
@@ -27,7 +27,7 @@ module StgCmmEnv (
bindArgsToRegs, bindToReg, rebindToReg,
bindArgToReg, idToReg,
- getArgAmode, getNonVoidArgAmodes,
+ getArgAmode, getNonVoidArgAmodes,
getCgIdInfo,
maybeLetNoEscape,
) where
@@ -213,7 +213,6 @@ getNonVoidArgAmodes (arg:args)
; amodes <- getNonVoidArgAmodes args
; return ( amode : amodes ) }
-
------------------------------------------------------------------------
-- Interface functions for binding and re-binding names
------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index f1a522b37d..d5c9600b38 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -22,6 +22,7 @@ import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
import StgCmmClosure
+import StgCmmLayout
import BlockId
import Cmm
@@ -45,15 +46,16 @@ import Control.Monad
-- Code generation for Foreign Calls
-----------------------------------------------------------------------------
-cgForeignCall :: [LocalReg] -- r1,r2 where to put the results
- -> [ForeignHint]
- -> ForeignCall -- the op
+-- | emit code for a foreign call, and return the results to the sequel.
+--
+cgForeignCall :: ForeignCall -- the op
-> [StgArg] -- x,y arguments
+ -> Type -- result type
-> FCode ()
--- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z )
-cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args
+cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
= do { cmm_args <- getFCallArgs stg_args
+ ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
; let ((call_args, arg_hints), cmm_target)
= case target of
StaticTarget lbl mPkgId
@@ -61,7 +63,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
= case mPkgId of
Nothing -> ForeignLabelInThisPackage
Just pkgId -> ForeignLabelInPackage pkgId
- size = call_size cmm_args
+ size = call_size cmm_args
in ( unzip cmm_args
, CmmLit (CmmLabel
(mkForeignLabel lbl size labelSource IsFunction)))
@@ -69,10 +71,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
DynamicTarget -> case cmm_args of
(fn,_):rest -> (unzip rest, fn)
[] -> panic "cgForeignCall []"
- fc = ForeignConvention cconv arg_hints result_hints
+ fc = ForeignConvention cconv arg_hints res_hints
call_target = ForeignTarget cmm_target fc
- ; emitForeignCall safety results call_target call_args CmmMayReturn }
+ -- we want to emit code for the call, and then emitReturn.
+ -- However, if the sequel is AssignTo, we shortcut a little
+ -- and generate a foreign call that assigns the results
+ -- directly. Otherwise we end up generating a bunch of
+ -- useless "r = r" assignments, which are not merely annoying:
+ -- they prevent the common block elimination from working correctly
+ -- in the case of a safe foreign call.
+ -- See Note [safe foreign call convention]
+ --
+ ; sequel <- getSequel
+ ; case sequel of
+ AssignTo assign_to_these _ ->
+ do { emitForeignCall safety assign_to_these call_target
+ call_args CmmMayReturn
+ }
+
+ _something_else ->
+ do { emitForeignCall safety res_regs call_target
+ call_args CmmMayReturn
+ ; emitReturn (map (CmmReg . CmmLocal) res_regs)
+ }
+ }
where
-- in the stdcall calling convention, the symbol needs @size appended
-- to it, where size is the total number of bytes of arguments. We
@@ -83,7 +106,76 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
| otherwise = Nothing
-- ToDo: this might not be correct for 64-bit API
- arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE
+ arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg)
+ wORD_SIZE
+
+{- Note [safe foreign call convention]
+
+The simple thing to do for a safe foreign call would be the same as an
+unsafe one: just
+
+ emitForeignCall ...
+ emitReturn ...
+
+but consider what happens in this case
+
+ case foo x y z of
+ (# s, r #) -> ...
+
+The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
+as the result reg, and we generate
+
+ r = foo(x,y,z) returns to L1 -- emitForeignCall
+ L1:
+ r = r -- emitReturn
+ goto L2
+L2:
+ ...
+
+Now L1 is a proc point (by definition, it is the continuation of the
+safe foreign call). If L2 does a heap check, then L2 will also be a
+proc point.
+
+Furthermore, the stack layout algorithm has to arrange to save r
+somewhere between the call and the jump to L1, which is annoying: we
+would have to treat r differently from the other live variables, which
+have to be saved *before* the call.
+
+So we adopt a special convention for safe foreign calls: the results
+are copied out according to the NativeReturn convention by the call,
+and the continuation of the call should copyIn the results. (The
+copyOut code is actually inserted when the safe foreign call is
+lowered later). The result regs attached to the safe foreign call are
+only used temporarily to hold the results before they are copied out.
+
+We will now generate this:
+
+ r = foo(x,y,z) returns to L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+And when the safe foreign call is lowered later (see Note [lower safe
+foreign calls]) we get this:
+
+ suspendThread()
+ r = foo(x,y,z)
+ resumeThread()
+ R1 = r -- copyOut, inserted by lowerSafeForeignCall
+ jump L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+Now consider what happens if L2 does a heap check: the Adams
+optimisation kicks in and commons up L1 with the heap-check
+continuation, resulting in just one proc point instead of two. Yay!
+-}
+
emitCCall :: [(CmmFormal,ForeignHint)]
-> CmmExpr
@@ -125,6 +217,7 @@ emitForeignCall safety results target args _ret
(playInterruptible safety)
+
{-
-- THINK ABOUT THIS (used to happen)
-- we might need to load arguments into temporaries before
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 9ee9192794..16b33d1faf 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -67,13 +67,17 @@ import FastString ( mkFastString, FastString, fsLit )
-- Call and return sequences
------------------------------------------------------------------------
-emitReturn :: [CmmExpr] -> FCode ()
--- Return multiple values to the sequel
+-- | Return multiple values to the sequel
+--
+-- If the sequel is @Return@
+--
+-- > return (x,y)
--
--- If the sequel is Return
--- return (x,y)
--- If the sequel is AssignTo [p,q]
--- p=x; q=y;
+-- If the sequel is @AssignTo [p,q]@
+--
+-- > p=x; q=y;
+--
+emitReturn :: [CmmExpr] -> FCode ()
emitReturn results
= do { sequel <- getSequel;
; updfr_off <- getUpdFrameOff
@@ -87,26 +91,24 @@ emitReturn results
; emitMultiAssign regs results }
}
+
+-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
+-- using the call/return convention @conv@, passing @args@, and
+-- returning the results to the current sequel.
+--
emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ()
--- (cgCall fun args) makes a call to the entry-code of 'fun',
--- passing 'args', and returning the results to the current sequel
-emitCall convs@(callConv, _) fun args
- = do { adjustHpBackwards
- ; sequel <- getSequel
- ; updfr_off <- getUpdFrameOff
- ; emitComment $ mkFastString ("emitCall: " ++ show sequel)
- ; case sequel of
- Return _ ->
- emit $ mkForeignJump callConv fun args updfr_off
- AssignTo res_regs _ ->
- emit =<< mkCall fun convs res_regs args updfr_off (0,[])
- }
+emitCall convs fun args
+ = emitCallWithExtraStack convs fun args noExtraStack
+
+-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
+-- entry-code of @fun@, using the call/return convention @conv@,
+-- passing @args@, pushing some extra stack frames described by
+-- @stack@, and returning the results to the current sequel.
+--
emitCallWithExtraStack
:: (Convention, Convention) -> CmmExpr -> [CmmExpr]
-> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
--- (cgCall fun args) makes a call to the entry-code of 'fun',
--- passing 'args', and returning the results to the current sequel
emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
= do { adjustHpBackwards
; sequel <- getSequel
@@ -120,7 +122,6 @@ emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
}
-
adjustHpBackwards :: FCode ()
-- This function adjusts and heap pointers just before a tail call or
-- return. At a call or return, the virtual heap pointer may be less
@@ -171,55 +172,67 @@ directCall :: CLabel -> Arity -> [StgArg] -> FCode ()
-- The function f has arity n, and there are guaranteed at least n args
-- Both arity and args include void args
directCall lbl arity stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) }
+ = do { argreps <- getArgRepsAmodes stg_args
+ ; direct_call "directCall" lbl arity argreps }
+
slowCall :: CmmExpr -> [StgArg] -> FCode ()
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
= do { dflags <- getDynFlags
- ; cmm_args <- getNonVoidArgAmodes stg_args
+ ; argsreps <- getArgRepsAmodes stg_args
+ ; let (rts_fun, arity) = slowCallPattern (map fst argsreps)
; let platform = targetPlatform dflags
; call <- getCode $ direct_call "slow_call"
- (mkRtsApFastLabel rts_fun) arity cmm_args reps
+ (mkRtsApFastLabel rts_fun) arity argsreps
; emitComment $ mkFastString ("slow_call for " ++
showSDoc (pprPlatform platform fun) ++
" with pat " ++ showSDoc (ftext rts_fun))
; emit (mkAssign nodeReg fun <*> call)
}
- where
- reps = argsReps stg_args
- (rts_fun, arity) = slowCallPattern reps
+
--------------
-direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
--- NB1: (length args) may be less than (length reps), because
--- the args exclude the void ones
--- NB2: 'arity' refers to the *reps*
-direct_call caller lbl arity args reps
- | debugIsOn && arity > length reps -- Too few args
+direct_call :: String -> CLabel -> Arity -> [(ArgRep,Maybe CmmExpr)] -> FCode ()
+direct_call caller lbl arity args
+ | debugIsOn && arity > length args -- Too few args
= do -- Caller should ensure that there enough args!
dflags <- getDynFlags
let platform = targetPlatform dflags
- pprPanic "direct_call" (text caller <+> ppr arity
- <+> pprPlatform platform lbl <+> ppr (length reps)
- <+> pprPlatform platform args <+> ppr reps )
+ pprPanic "direct_call" $
+ text caller <+> ppr arity <+>
+ pprPlatform platform lbl <+> ppr (length args) <+>
+ pprPlatform platform (map snd args) <+> ppr (map fst args)
-
- | null rest_reps -- Precisely the right number of arguments
- = emitCall (NativeDirectCall, NativeReturn) target args
+ | null rest_args -- Precisely the right number of arguments
+ = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args)
| otherwise -- Note [over-saturated calls]
- = ASSERT( arity == length initial_reps )
- emitCallWithExtraStack (NativeDirectCall, NativeReturn)
- target fast_args (mkStkOffsets stack_args)
+ = emitCallWithExtraStack (NativeDirectCall, NativeReturn)
+ target (nonVArgs fast_args) (mkStkOffsets stack_args)
where
target = CmmLit (CmmLabel lbl)
- (initial_reps, rest_reps) = splitAt arity reps
- arg_arity = count isNonV initial_reps
- (fast_args, rest_args) = splitAt arg_arity args
- stack_args = slowArgs (zip rest_reps rest_args)
+ (fast_args, rest_args) = splitAt arity args
+ stack_args = slowArgs rest_args
+
+-- When constructing calls, it is easier to keep the ArgReps and the
+-- CmmExprs zipped together. However, a void argument has no
+-- representation, so we need to use Maybe CmmExpr (the alternative of
+-- using zeroCLit or even undefined would work, but would be ugly).
+--
+getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
+getArgRepsAmodes = mapM getArgRepAmode
+ where getArgRepAmode arg
+ | V <- rep = return (V, Nothing)
+ | otherwise = do expr <- getArgAmode (NonVoid arg)
+ return (rep, Just expr)
+ where rep = toArgRep (argPrimRep arg)
+
+nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
+nonVArgs [] = []
+nonVArgs ((_,Nothing) : args) = nonVArgs args
+nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
{-
Note [over-saturated calls]
@@ -259,23 +272,21 @@ just more arguments that we are passing on the stack (cml_args).
-- | 'slowArgs' takes a list of function arguments and prepares them for
-- pushing on the stack for "extra" arguments to a function which requires
-- fewer arguments than we currently have.
-slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)]
+slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
slowArgs [] = []
-slowArgs amodes
- | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest
- | otherwise = this_pat ++ slowArgs rest
+slowArgs args -- careful: reps contains voids (V), but args does not
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args
+ | otherwise = this_pat ++ slowArgs rest_args
where
- (arg_pat, args, rest) = matchSlowPattern amodes
+ (arg_pat, n) = slowCallPattern (map fst args)
+ (call_args, rest_args) = splitAt n args
+
stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
- this_pat = (N, mkLblExpr stg_ap_pat) : args
- save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)]
+ this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
+ save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
-matchSlowPattern :: [(ArgRep,CmmExpr)]
- -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)])
-matchSlowPattern amodes = (arg_pat, these, rest)
- where (arg_pat, n) = slowCallPattern (map fst amodes)
- (these, rest) = splitAt n amodes
+
-- These cases were found to cover about 99% of all slow calls:
slowCallPattern :: [ArgRep] -> (FastString, Arity)
@@ -304,16 +315,16 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-- See Note [over-saturated calls].
mkStkOffsets
- :: [(ArgRep,CmmExpr)] -- things to make offsets for
+ :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for
-> ( ByteOff -- OUTPUTS: Topmost allocated word
, [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
mkStkOffsets things
= loop 0 [] (reverse things)
where
loop offset offs [] = (offset,offs)
- loop offset offs ((V,_):things) = loop offset offs things
+ loop offset offs ((_,Nothing):things) = loop offset offs things
-- ignore Void arguments
- loop offset offs ((rep,thing):things)
+ loop offset offs ((rep,Just thing):things)
= loop thing_off ((thing, thing_off):offs) things
where
thing_off = offset + argRepSizeW rep * wORD_SIZE
@@ -357,10 +368,7 @@ isNonV :: ArgRep -> Bool
isNonV V = False
isNonV _ = True
-argsReps :: [StgArg] -> [ArgRep]
-argsReps = map (toArgRep . argPrimRep)
-
-argRepSizeW :: ArgRep -> WordOff -- Size in words
+argRepSizeW :: ArgRep -> WordOff -- Size in words
argRepSizeW N = 1
argRepSizeW P = 1
argRepSizeW F = 1
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index ccf0777906..240469c3f2 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -77,9 +77,6 @@ import Unique
import UniqSupply
import FastString
import Outputable
-import Util
-
-import Compiler.Hoopl hiding (Unique, (<*>), mkLabel, mkBranch, mkLast)
import Control.Monad
import Data.List
@@ -614,7 +611,7 @@ emitComment :: FastString -> FCode ()
#if 0 /* def DEBUG */
emitComment s = emitCgStmt (CgStmt (CmmComment s))
#else
-emitComment s = return ()
+emitComment _ = return ()
#endif
emitAssign :: CmmReg -> CmmExpr -> FCode ()
@@ -707,12 +704,16 @@ mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
-> FCode CmmAGraph
mkSafeCall t fs as upd i = do
k <- newLabelC
+ let (_off, copyout) = copyInOflow NativeReturn (Young k) fs
+ -- see Note [safe foreign call convention]
return
( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth))
(CmmLit (CmmBlock k))
- <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i})
- <*> mkLabel k)
-
+ <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k
+ , updfr=upd, intrbl=i })
+ <*> mkLabel k
+ <*> copyout
+ )
-- ----------------------------------------------------------------------------
-- CgStmts
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index c95b1f02ff..9f87271fba 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -68,14 +68,9 @@ cgOpApp :: StgOp -- The op
-- Foreign calls
cgOpApp (StgFCallOp fcall _) stg_args res_ty
- = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
- -- Choose result regs r1, r2
- -- Note [Foreign call results]
- ; cgForeignCall res_regs res_hints fcall stg_args
- -- r1, r2 = foo( x, y )
- ; emitReturn (map (CmmReg . CmmLocal) res_regs) }
- -- return (r1, r2)
-
+ = cgForeignCall fcall stg_args res_ty
+ -- Note [Foreign call results]
+
-- tagToEnum# is special: we need to pull the constructor
-- out of the table, and perform an appropriate return.