diff options
-rw-r--r-- | compiler/cmm/CmmBuildInfoTables.hs | 89 | ||||
-rw-r--r-- | compiler/cmm/CmmLayoutStack.hs | 233 | ||||
-rw-r--r-- | compiler/cmm/MkGraph.hs | 15 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmEnv.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 111 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 142 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmMonad.hs | 15 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 11 |
8 files changed, 390 insertions, 229 deletions
diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs index 43ff2b0758..7a396ee37b 100644 --- a/compiler/cmm/CmmBuildInfoTables.hs +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -17,7 +17,6 @@ module CmmBuildInfoTables , setInfoTableSRT , TopSRT, emptySRT, srtToData , bundleCAFs - , lowerSafeForeignCalls , cafTransfers ) where @@ -315,91 +314,3 @@ updInfoTbl toVars toSrt info_tbl@(CmmInfoTable {}) StackRep ls -> StackRep (toVars ls) other -> other } updInfoTbl _ _ t@CmmNonInfoTable = t - ----------------------------------------------------------------- --- Safe foreign calls: We need to insert the code that suspends and resumes --- the thread before and after a safe foreign call. --- Why do we do this so late in the pipeline? --- Because we need this code to appear without interrruption: you can't rely on the --- value of the stack pointer between the call and resetting the thread state; --- you need to have an infotable on the young end of the stack both when --- suspending the thread and making the foreign call. --- All of this is much easier if we insert the suspend and resume calls here. - --- At the same time, we prepare for the stages of the compiler that --- build the proc points. We have to do this at the same time because --- the safe foreign calls need special treatment with respect to infotables. --- A safe foreign call needs an infotable even though it isn't --- a procpoint. The following datatype captures the information --- needed to generate the infotables along with the Cmm data and procedures. - --- JD: Why not do this while splitting procedures? -lowerSafeForeignCalls :: AreaMap -> CmmDecl -> FuelUniqSM CmmDecl -lowerSafeForeignCalls _ t@(CmmData _ _) = return t -lowerSafeForeignCalls areaMap (CmmProc info l g@(CmmGraph {g_entry=entry})) = do - let block b mblocks = mblocks >>= lowerSafeCallBlock entry areaMap b - blocks <- foldGraphBlocks block (return mapEmpty) g - return $ CmmProc info l (ofBlockMap entry blocks) - --- If the block ends with a safe call in the block, lower it to an unsafe --- call (with appropriate saves and restores before and after). -lowerSafeCallBlock :: BlockId -> AreaMap -> CmmBlock -> BlockEnv CmmBlock - -> FuelUniqSM (BlockEnv CmmBlock) -lowerSafeCallBlock entry areaMap b blocks = - case blockToNodeList b of - (JustC (CmmEntry id), m, JustC l@(CmmForeignCall {})) -> lowerSafeForeignCall entry areaMap blocks id m l - _ -> return $ insertBlock b blocks - --- Late in the code generator, we want to insert the code necessary --- to lower a safe foreign call to a sequence of unsafe calls. -lowerSafeForeignCall :: BlockId -> AreaMap -> BlockEnv CmmBlock -> BlockId -> [CmmNode O O] -> CmmNode O C - -> FuelUniqSM (BlockEnv CmmBlock) -lowerSafeForeignCall entry areaMap blocks bid m - (CmmForeignCall {tgt=tgt, res=rs, args=as, succ=succ, updfr = updfr_off, intrbl = intrbl}) = - do let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) - -- Both 'id' and 'new_base' are KindNonPtr because they're - -- RTS-only objects and are not subject to garbage collection - id <- newTemp bWord - new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) - let (caller_save, caller_load) = callerSaveVolatileRegs - load_tso <- newTemp gcWord -- TODO FIXME NOW - load_stack <- newTemp gcWord -- TODO FIXME NOW - let (<**>) = (M.<*>) - let suspendThread = foreignLbl "suspendThread" - resumeThread = foreignLbl "resumeThread" - foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit name))) - suspend = saveThreadState <**> - caller_save <**> - mkUnsafeCall (ForeignTarget suspendThread - (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) - [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))] - midCall = mkUnsafeCall tgt rs as - resume = mkUnsafeCall (ForeignTarget resumeThread - (ForeignConvention CCallConv [AddrHint] [AddrHint])) - [new_base] [CmmReg (CmmLocal id)] <**> - -- Assign the result to BaseReg: we - -- might now have a different Capability! - mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <**> - caller_load <**> - loadThreadState load_tso load_stack - -- We have to save the return value on the stack because its next use - -- may appear in a different procedure due to procpoint splitting... - saveRetVals = foldl (<**>) mkNop $ map (M.mkMiddle . spill) rs - spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) - regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset) - where offset = w + expectJust "lowerForeign" Nothing -- XXX need to fix this: (Map.lookup (RegSlot r) areaMap) - sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup area areaMap) - area = if succ == entry then Old else Young succ - w = widthInBytes $ typeWidth $ localRegType r - -- Note: The successor must be a procpoint, and we have already split, - -- so we use a jump, not a branch. - succLbl = CmmLit (CmmLabel (infoTblLbl succ)) - jump = CmmCall { cml_target = succLbl, cml_cont = Nothing - , cml_args = widthInBytes wordWidth ,cml_ret_args = 0 - , cml_ret_off = updfr_off} - graph' <- liftUniq $ labelAGraph bid $ catAGraphs (map M.mkMiddle m) <**> - suspend <**> midCall <**> - resume <**> saveRetVals <**> M.mkLast jump - return $ blocks `mapUnion` toBlockMap graph' -lowerSafeForeignCall _ _ _ _ _ _ = panic "lowerSafeForeignCall was passed something else" - diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 9a382c0557..87f495aa72 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -3,13 +3,20 @@ module CmmLayoutStack ( cmmLayoutStack, setInfoTableStackMap ) where +import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX +import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX + import Cmm import BlockId +import CLabel import CmmUtils +import MkGraph +import Module +import ForeignCall import CmmLive import CmmProcPoint import SMRep -import Hoopl +import Hoopl hiding ((<*>), mkLast, mkMiddle) import OptimizationFuel import Constants import UniqSupply @@ -177,31 +184,49 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks -- a proc point, we must save the live variables, adjust Sp, and -- construct the StackMaps for each of the successor blocks. -- See handleLastNode for details. - (saves, out, sp_off, last1, fixup_blocks) + (middle2, sp_off, middle3, last1, fixup_blocks, out) <- handleLastNode procpoints liveness cont_info acc_stackmaps stack1 last0 - let hwm' = maximum (acc_hwm : map sm_sp (mapElems out)) - middle2 = maybeAddSpAdj sp_off $ foldl blockSnoc middle1 saves - - area_off = getAreaOff final_stackmaps - - -- manifest Sp: turn all CmmStackSlots into actual loads - adj_middle = mapExpDeep (areaToSp sp0 sp_high area_off) - adj_last = optStackCheck . - mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off) - - middle3 = blockFromList $ - map adj_middle $ - elimStackStores stack0 final_stackmaps area_off $ - blockToList middle2 - - newblock = blockJoin entry0 middle3 (adj_last last1) - - fixup_blocks' = map (blockMapNodes3 (id, adj_middle, id)) fixup_blocks + -- our block: + -- middle1 -- the original middle nodes + -- middle2 -- live variable saves from handleLastNode + -- Sp = Sp + sp_off -- Sp adjustment goes here + -- middle3 -- some more middle nodes from handleLastNode + -- last1 -- the last node + -- + -- The next step is to manifest Sp: turn all the CmmStackSlots + -- into CmmLoads from Sp. The adjustment for middle1/middle2 + -- will be different from that for middle3/last1, because the + -- Sp adjustment intervenes. + -- + let area_off = getAreaOff final_stackmaps + + adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x + adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off) + adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off) + + middle_pre = maybeAddSpAdj sp_off $ + blockFromList $ + map adj_pre_sp $ + elimStackStores stack0 final_stackmaps area_off $ + blockToList $ + foldl blockSnoc middle1 middle2 + + middle_post = map adj_post_sp middle3 + + final_middle = foldl blockSnoc middle_pre middle_post + final_last = optStackCheck (adj_post_sp last1) + + newblock = blockJoin entry0 final_middle final_last + + fixup_blocks' = map (blockMapNodes3 (id, adj_post_sp, id)) + fixup_blocks stackmaps' = mapUnion acc_stackmaps out + hwm' = maximum (acc_hwm : map sm_sp (mapElems out)) + pprTrace "layout(out)" (ppr out) $ return () go bs stackmaps' hwm' (newblock : fixup_blocks' ++ acc_blocks) @@ -292,16 +317,33 @@ getStackLoc (Young l) n stackmaps = -- ----------------------------------------------------------------------------- -- Handling stack allocation for a last node +-- We take a single last node and turn it into: +-- +-- C1 (some statements) +-- Sp = Sp + N +-- C2 (some more statements) +-- call f() -- the actual last node +-- +-- plus possibly some more blocks (we may have to add some fixup code +-- between the last node and the continuation). +-- +-- C1: is the code for saving the variables across this last node onto +-- the stack, if the continuation is a call or jumps to a proc point. +-- +-- C2: if the last node is a safe foreign call, we have to inject some +-- extra code that goes *after* the Sp adjustment. + handleLastNode :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff -> BlockEnv StackMap -> StackMap -> CmmNode O C -> UniqSM - ( [CmmNode O O] -- assignments to save live variables - , BlockEnv StackMap -- stackmaps for the continuations - , ByteOff -- amount to adjust Sp before the jump + ( [CmmNode O O] -- nodes to go *before* the Sp adjustment + , ByteOff -- amount to adjust Sp + , [CmmNode O O] -- nodes to go *after* the Sp adjustment , CmmNode O C -- new last node , [CmmBlock] -- new blocks + , BlockEnv StackMap -- stackmaps for the continuations ) handleLastNode procpoints liveness cont_info stackmaps @@ -312,39 +354,45 @@ handleLastNode procpoints liveness cont_info stackmaps -- is cml_args, after popping any other junk from the stack. CmmCall{ cml_cont = Nothing, .. } -> do let sp_off = sp0 - cml_args - return ([], mapEmpty, sp_off, last, []) + return ([], sp_off, [], last, [], mapEmpty) -- At each CmmCall with a continuation: CmmCall{ cml_cont = Just cont_lbl, .. } -> - lastCall cont_lbl cml_args cml_ret_args cml_ret_off + lastCall cont_lbl [] cml_args cml_ret_args cml_ret_off - CmmForeignCall{ succ = cont_lbl, .. } -> - lastCall cont_lbl 0{-no args-} 0{-no results-} (sm_ret_off stack0) + CmmForeignCall{ succ = cont_lbl, .. } -> do + (mids, spoff, _, last', blocks, stackmap') <- + lastCall cont_lbl res wORD_SIZE wORD_SIZE (sm_ret_off stack0) + -- one word each for args and results: the return address + (extra_mids, last'') <- lowerSafeForeignCall last' + return (mids, spoff, extra_mids, last'', blocks, stackmap') CmmBranch{..} -> handleProcPoints CmmCondBranch{..} -> handleProcPoints CmmSwitch{..} -> handleProcPoints where - lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff + lastCall :: BlockId -> [LocalReg] -> ByteOff -> ByteOff -> ByteOff -> UniqSM ( [CmmNode O O] - , BlockEnv StackMap , ByteOff + , [CmmNode O O] , CmmNode O C , [CmmBlock] + , BlockEnv StackMap ) - lastCall cont_lbl cml_args cml_ret_args cml_ret_off + lastCall cont_lbl res_regs cml_args cml_ret_args cml_ret_off -- If we have already seen this continuation before, then -- we just have to make the stack look the same: | Just cont_stack <- mapLookup cont_lbl stackmaps = return ( fixupStack stack0 cont_stack - , stackmaps , sp0 - sm_sp cont_stack + , [] , last - , [] ) + , [] + , stackmaps ) -- a continuation we haven't seen before: -- allocate the stack frame for it. @@ -353,6 +401,7 @@ handleLastNode procpoints liveness cont_info stackmaps -- get the set of LocalRegs live in the continuation let target_live = mapFindWithDefault Set.empty cont_lbl liveness + `Set.difference` Set.fromList res_regs -- the stack from the base to cml_ret_off is off-limits. -- our new stack frame contains: @@ -382,18 +431,19 @@ handleLastNode procpoints liveness cont_info stackmaps -- emit an Sp adjustment, taking into account the call area -- return ( assigs - , mapSingleton cont_lbl cont_stack , sp_off + , [] , last , [] -- no new blocks - ) + , mapSingleton cont_lbl cont_stack ) handleProcPoints :: UniqSM ( [CmmNode O O] - , BlockEnv StackMap , ByteOff + , [CmmNode O O] , CmmNode O C - , [CmmBlock] ) + , [CmmBlock] + , BlockEnv StackMap ) handleProcPoints = do pps <- mapM handleProcPoint (successors last) @@ -401,10 +451,11 @@ handleLastNode procpoints liveness cont_info stackmaps lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ] fix_lbl l = mapLookup l lbl_map `orElse` l return ( [] - , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] , 0 + , [] , mapSuccessors fix_lbl last - , concat [ blk | (_,_,_,blk) <- pps ] ) + , concat [ blk | (_,_,_,blk) <- pps ] + , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] ) -- For each proc point that is a successor of this block -- (a) if the proc point already has a stackmap, we need to @@ -642,6 +693,112 @@ stackMapToLiveness StackMap{..} = -- ----------------------------------------------------------------------------- +-- Lowering safe foreign calls + +{- +Note [lower safe foreign calls] + +We start with + + Sp[young(L1)] = L1 + ,----------------------- + | r1 = foo(x,y,z) returns to L1 + '----------------------- + L1: + R1 = r1 -- copyIn, inserted by mkSafeCall + ... + +the stack layout algorithm will arrange to save and reload everything +live across the call. Our job now is to expand the call so we get + + Sp[young(L1)] = L1 + ,----------------------- + | SAVE_THREAD_STATE() + | token = suspendThread(BaseReg, interruptible) + | r = foo(x,y,z) + | BaseReg = resumeThread(token) + | LOAD_THREAD_STATE() + | R1 = r -- copyOut + | jump L1 + '----------------------- + L1: + r = R1 -- copyIn, inserted by mkSafeCall + ... + +Note the copyOut, which saves the results in the places that L1 is +expecting them (see Note {safe foreign call convention]). +-} + + +lowerSafeForeignCall :: CmmNode O C -> UniqSM ([CmmNode O O], CmmNode O C) +lowerSafeForeignCall CmmForeignCall { .. } = + do let + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS-only objects and are not subject to garbage collection + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) + let (caller_save, caller_load) = callerSaveVolatileRegs + load_tso <- newTemp gcWord + load_stack <- newTemp gcWord + let suspend = saveThreadState <*> + caller_save <*> + mkMiddle (callSuspendThread id intrbl) + midCall = mkUnsafeCall tgt res args + resume = mkMiddle (callResumeThread new_base id) <*> + -- Assign the result to BaseReg: we + -- might now have a different Capability! + mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*> + caller_load <*> + loadThreadState load_tso load_stack + -- Note: The successor must be a procpoint, and we have already split, + -- so we use a jump, not a branch. + succLbl = CmmLit (CmmLabel (infoTblLbl succ)) + + (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ) + (map (CmmReg . CmmLocal) res) + updfr (0, []) + + jump = CmmCall { cml_target = succLbl + , cml_cont = Just succ + , cml_args = widthInBytes wordWidth + , cml_ret_args = ret_args + , cml_ret_off = updfr } + + graph' <- lgraphOfAGraph $ suspend <*> + midCall <*> + resume <*> + copyout <*> + mkLast jump + + case toBlockList graph' of + [one] -> let (_, middle, last) = blockSplit one + in return (blockToList middle, last) + _ -> panic "lowerSafeForeignCall0" + +lowerSafeForeignCall _ = panic "lowerSafeForeignCall1" + + +foreignLbl :: FastString -> CmmExpr +foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name)) + +newTemp :: CmmType -> UniqSM LocalReg +newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) + +callSuspendThread :: LocalReg -> Bool -> CmmNode O O +callSuspendThread id intrbl = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "suspendThread")) + (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) + [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))] + +callResumeThread :: LocalReg -> LocalReg -> CmmNode O O +callResumeThread new_base id = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "resumeThread")) + (ForeignConvention CCallConv [AddrHint] [AddrHint])) + [new_base] [CmmReg (CmmLocal id)] + +-- ----------------------------------------------------------------------------- plusW :: ByteOff -> WordOff -> ByteOff plusW b w = b + w * wORD_SIZE diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 922f31e45a..797b785de2 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -12,6 +12,7 @@ module MkGraph , mkCbranch, mkSwitch , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch , copyInOflow, copyOutOflow + , noExtraStack , toCall, Transfer(..) ) where @@ -188,8 +189,7 @@ mkJumpGC e actuals updfr_off = mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph mkForeignJump conv e actuals updfr_off = - lastWithArgs Jump Old conv actuals updfr_off $ - toCall e Nothing updfr_off 0 + mkForeignJumpExtra conv e actuals updfr_off noExtraStack mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)]) @@ -208,13 +208,11 @@ mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph mkReturn e actuals updfr_off = lastWithArgs Ret Old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 - -- where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph mkReturnSimple actuals updfr_off = - lastWithArgs Ret Old NativeReturn actuals updfr_off $ - toCall e Nothing updfr_off 0 - where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord + mkReturn e actuals updfr_off + where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord mkBranch :: BlockId -> CmmAGraph mkBranch bid = mkLast (CmmBranch bid) @@ -346,9 +344,8 @@ lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> (ByteOff -> CmmAGraph) -> CmmAGraph lastWithArgs transfer area conv actuals updfr_off last = - let (outArgs, copies) = copyOutOflow conv transfer area actuals - updfr_off noExtraStack in - copies <*> last outArgs + lastWithArgsAndExtraStack transfer area conv actuals + updfr_off noExtraStack last lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)]) diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index d8a7061eec..3b56e2feb6 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -27,7 +27,7 @@ module StgCmmEnv ( bindArgsToRegs, bindToReg, rebindToReg, bindArgToReg, idToReg, - getArgAmode, getNonVoidArgAmodes, + getArgAmode, getNonVoidArgAmodes, getCgIdInfo, maybeLetNoEscape, ) where @@ -213,7 +213,6 @@ getNonVoidArgAmodes (arg:args) ; amodes <- getNonVoidArgAmodes args ; return ( amode : amodes ) } - ------------------------------------------------------------------------ -- Interface functions for binding and re-binding names ------------------------------------------------------------------------ diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index f1a522b37d..d5c9600b38 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -22,6 +22,7 @@ import StgCmmEnv import StgCmmMonad import StgCmmUtils import StgCmmClosure +import StgCmmLayout import BlockId import Cmm @@ -45,15 +46,16 @@ import Control.Monad -- Code generation for Foreign Calls ----------------------------------------------------------------------------- -cgForeignCall :: [LocalReg] -- r1,r2 where to put the results - -> [ForeignHint] - -> ForeignCall -- the op +-- | emit code for a foreign call, and return the results to the sequel. +-- +cgForeignCall :: ForeignCall -- the op -> [StgArg] -- x,y arguments + -> Type -- result type -> FCode () --- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z ) -cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args +cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty = do { cmm_args <- getFCallArgs stg_args + ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty ; let ((call_args, arg_hints), cmm_target) = case target of StaticTarget lbl mPkgId @@ -61,7 +63,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a = case mPkgId of Nothing -> ForeignLabelInThisPackage Just pkgId -> ForeignLabelInPackage pkgId - size = call_size cmm_args + size = call_size cmm_args in ( unzip cmm_args , CmmLit (CmmLabel (mkForeignLabel lbl size labelSource IsFunction))) @@ -69,10 +71,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a DynamicTarget -> case cmm_args of (fn,_):rest -> (unzip rest, fn) [] -> panic "cgForeignCall []" - fc = ForeignConvention cconv arg_hints result_hints + fc = ForeignConvention cconv arg_hints res_hints call_target = ForeignTarget cmm_target fc - ; emitForeignCall safety results call_target call_args CmmMayReturn } + -- we want to emit code for the call, and then emitReturn. + -- However, if the sequel is AssignTo, we shortcut a little + -- and generate a foreign call that assigns the results + -- directly. Otherwise we end up generating a bunch of + -- useless "r = r" assignments, which are not merely annoying: + -- they prevent the common block elimination from working correctly + -- in the case of a safe foreign call. + -- See Note [safe foreign call convention] + -- + ; sequel <- getSequel + ; case sequel of + AssignTo assign_to_these _ -> + do { emitForeignCall safety assign_to_these call_target + call_args CmmMayReturn + } + + _something_else -> + do { emitForeignCall safety res_regs call_target + call_args CmmMayReturn + ; emitReturn (map (CmmReg . CmmLocal) res_regs) + } + } where -- in the stdcall calling convention, the symbol needs @size appended -- to it, where size is the total number of bytes of arguments. We @@ -83,7 +106,76 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a | otherwise = Nothing -- ToDo: this might not be correct for 64-bit API - arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE + arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) + wORD_SIZE + +{- Note [safe foreign call convention] + +The simple thing to do for a safe foreign call would be the same as an +unsafe one: just + + emitForeignCall ... + emitReturn ... + +but consider what happens in this case + + case foo x y z of + (# s, r #) -> ... + +The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r] +as the result reg, and we generate + + r = foo(x,y,z) returns to L1 -- emitForeignCall + L1: + r = r -- emitReturn + goto L2 +L2: + ... + +Now L1 is a proc point (by definition, it is the continuation of the +safe foreign call). If L2 does a heap check, then L2 will also be a +proc point. + +Furthermore, the stack layout algorithm has to arrange to save r +somewhere between the call and the jump to L1, which is annoying: we +would have to treat r differently from the other live variables, which +have to be saved *before* the call. + +So we adopt a special convention for safe foreign calls: the results +are copied out according to the NativeReturn convention by the call, +and the continuation of the call should copyIn the results. (The +copyOut code is actually inserted when the safe foreign call is +lowered later). The result regs attached to the safe foreign call are +only used temporarily to hold the results before they are copied out. + +We will now generate this: + + r = foo(x,y,z) returns to L1 + L1: + r = R1 -- copyIn, inserted by mkSafeCall + goto L2 + L2: + ... r ... + +And when the safe foreign call is lowered later (see Note [lower safe +foreign calls]) we get this: + + suspendThread() + r = foo(x,y,z) + resumeThread() + R1 = r -- copyOut, inserted by lowerSafeForeignCall + jump L1 + L1: + r = R1 -- copyIn, inserted by mkSafeCall + goto L2 + L2: + ... r ... + +Now consider what happens if L2 does a heap check: the Adams +optimisation kicks in and commons up L1 with the heap-check +continuation, resulting in just one proc point instead of two. Yay! +-} + emitCCall :: [(CmmFormal,ForeignHint)] -> CmmExpr @@ -125,6 +217,7 @@ emitForeignCall safety results target args _ret (playInterruptible safety) + {- -- THINK ABOUT THIS (used to happen) -- we might need to load arguments into temporaries before diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 9ee9192794..16b33d1faf 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -67,13 +67,17 @@ import FastString ( mkFastString, FastString, fsLit ) -- Call and return sequences ------------------------------------------------------------------------ -emitReturn :: [CmmExpr] -> FCode () --- Return multiple values to the sequel +-- | Return multiple values to the sequel +-- +-- If the sequel is @Return@ +-- +-- > return (x,y) -- --- If the sequel is Return --- return (x,y) --- If the sequel is AssignTo [p,q] --- p=x; q=y; +-- If the sequel is @AssignTo [p,q]@ +-- +-- > p=x; q=y; +-- +emitReturn :: [CmmExpr] -> FCode () emitReturn results = do { sequel <- getSequel; ; updfr_off <- getUpdFrameOff @@ -87,26 +91,24 @@ emitReturn results ; emitMultiAssign regs results } } + +-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@, +-- using the call/return convention @conv@, passing @args@, and +-- returning the results to the current sequel. +-- emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode () --- (cgCall fun args) makes a call to the entry-code of 'fun', --- passing 'args', and returning the results to the current sequel -emitCall convs@(callConv, _) fun args - = do { adjustHpBackwards - ; sequel <- getSequel - ; updfr_off <- getUpdFrameOff - ; emitComment $ mkFastString ("emitCall: " ++ show sequel) - ; case sequel of - Return _ -> - emit $ mkForeignJump callConv fun args updfr_off - AssignTo res_regs _ -> - emit =<< mkCall fun convs res_regs args updfr_off (0,[]) - } +emitCall convs fun args + = emitCallWithExtraStack convs fun args noExtraStack + +-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the +-- entry-code of @fun@, using the call/return convention @conv@, +-- passing @args@, pushing some extra stack frames described by +-- @stack@, and returning the results to the current sequel. +-- emitCallWithExtraStack :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode () --- (cgCall fun args) makes a call to the entry-code of 'fun', --- passing 'args', and returning the results to the current sequel emitCallWithExtraStack convs@(callConv, _) fun args extra_stack = do { adjustHpBackwards ; sequel <- getSequel @@ -120,7 +122,6 @@ emitCallWithExtraStack convs@(callConv, _) fun args extra_stack } - adjustHpBackwards :: FCode () -- This function adjusts and heap pointers just before a tail call or -- return. At a call or return, the virtual heap pointer may be less @@ -171,55 +172,67 @@ directCall :: CLabel -> Arity -> [StgArg] -> FCode () -- The function f has arity n, and there are guaranteed at least n args -- Both arity and args include void args directCall lbl arity stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) } + = do { argreps <- getArgRepsAmodes stg_args + ; direct_call "directCall" lbl arity argreps } + slowCall :: CmmExpr -> [StgArg] -> FCode () -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args = do { dflags <- getDynFlags - ; cmm_args <- getNonVoidArgAmodes stg_args + ; argsreps <- getArgRepsAmodes stg_args + ; let (rts_fun, arity) = slowCallPattern (map fst argsreps) ; let platform = targetPlatform dflags ; call <- getCode $ direct_call "slow_call" - (mkRtsApFastLabel rts_fun) arity cmm_args reps + (mkRtsApFastLabel rts_fun) arity argsreps ; emitComment $ mkFastString ("slow_call for " ++ showSDoc (pprPlatform platform fun) ++ " with pat " ++ showSDoc (ftext rts_fun)) ; emit (mkAssign nodeReg fun <*> call) } - where - reps = argsReps stg_args - (rts_fun, arity) = slowCallPattern reps + -------------- -direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode () --- NB1: (length args) may be less than (length reps), because --- the args exclude the void ones --- NB2: 'arity' refers to the *reps* -direct_call caller lbl arity args reps - | debugIsOn && arity > length reps -- Too few args +direct_call :: String -> CLabel -> Arity -> [(ArgRep,Maybe CmmExpr)] -> FCode () +direct_call caller lbl arity args + | debugIsOn && arity > length args -- Too few args = do -- Caller should ensure that there enough args! dflags <- getDynFlags let platform = targetPlatform dflags - pprPanic "direct_call" (text caller <+> ppr arity - <+> pprPlatform platform lbl <+> ppr (length reps) - <+> pprPlatform platform args <+> ppr reps ) + pprPanic "direct_call" $ + text caller <+> ppr arity <+> + pprPlatform platform lbl <+> ppr (length args) <+> + pprPlatform platform (map snd args) <+> ppr (map fst args) - - | null rest_reps -- Precisely the right number of arguments - = emitCall (NativeDirectCall, NativeReturn) target args + | null rest_args -- Precisely the right number of arguments + = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args) | otherwise -- Note [over-saturated calls] - = ASSERT( arity == length initial_reps ) - emitCallWithExtraStack (NativeDirectCall, NativeReturn) - target fast_args (mkStkOffsets stack_args) + = emitCallWithExtraStack (NativeDirectCall, NativeReturn) + target (nonVArgs fast_args) (mkStkOffsets stack_args) where target = CmmLit (CmmLabel lbl) - (initial_reps, rest_reps) = splitAt arity reps - arg_arity = count isNonV initial_reps - (fast_args, rest_args) = splitAt arg_arity args - stack_args = slowArgs (zip rest_reps rest_args) + (fast_args, rest_args) = splitAt arity args + stack_args = slowArgs rest_args + +-- When constructing calls, it is easier to keep the ArgReps and the +-- CmmExprs zipped together. However, a void argument has no +-- representation, so we need to use Maybe CmmExpr (the alternative of +-- using zeroCLit or even undefined would work, but would be ugly). +-- +getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)] +getArgRepsAmodes = mapM getArgRepAmode + where getArgRepAmode arg + | V <- rep = return (V, Nothing) + | otherwise = do expr <- getArgAmode (NonVoid arg) + return (rep, Just expr) + where rep = toArgRep (argPrimRep arg) + +nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr] +nonVArgs [] = [] +nonVArgs ((_,Nothing) : args) = nonVArgs args +nonVArgs ((_,Just arg) : args) = arg : nonVArgs args {- Note [over-saturated calls] @@ -259,23 +272,21 @@ just more arguments that we are passing on the stack (cml_args). -- | 'slowArgs' takes a list of function arguments and prepares them for -- pushing on the stack for "extra" arguments to a function which requires -- fewer arguments than we currently have. -slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)] +slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] slowArgs [] = [] -slowArgs amodes - | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest - | otherwise = this_pat ++ slowArgs rest +slowArgs args -- careful: reps contains voids (V), but args does not + | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args + | otherwise = this_pat ++ slowArgs rest_args where - (arg_pat, args, rest) = matchSlowPattern amodes + (arg_pat, n) = slowCallPattern (map fst args) + (call_args, rest_args) = splitAt n args + stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat - this_pat = (N, mkLblExpr stg_ap_pat) : args - save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)] + this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args + save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)] save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") -matchSlowPattern :: [(ArgRep,CmmExpr)] - -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)]) -matchSlowPattern amodes = (arg_pat, these, rest) - where (arg_pat, n) = slowCallPattern (map fst amodes) - (these, rest) = splitAt n amodes + -- These cases were found to cover about 99% of all slow calls: slowCallPattern :: [ArgRep] -> (FastString, Arity) @@ -304,16 +315,16 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) -- See Note [over-saturated calls]. mkStkOffsets - :: [(ArgRep,CmmExpr)] -- things to make offsets for + :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for -> ( ByteOff -- OUTPUTS: Topmost allocated word , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out) mkStkOffsets things = loop 0 [] (reverse things) where loop offset offs [] = (offset,offs) - loop offset offs ((V,_):things) = loop offset offs things + loop offset offs ((_,Nothing):things) = loop offset offs things -- ignore Void arguments - loop offset offs ((rep,thing):things) + loop offset offs ((rep,Just thing):things) = loop thing_off ((thing, thing_off):offs) things where thing_off = offset + argRepSizeW rep * wORD_SIZE @@ -357,10 +368,7 @@ isNonV :: ArgRep -> Bool isNonV V = False isNonV _ = True -argsReps :: [StgArg] -> [ArgRep] -argsReps = map (toArgRep . argPrimRep) - -argRepSizeW :: ArgRep -> WordOff -- Size in words +argRepSizeW :: ArgRep -> WordOff -- Size in words argRepSizeW N = 1 argRepSizeW P = 1 argRepSizeW F = 1 diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index ccf0777906..240469c3f2 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -77,9 +77,6 @@ import Unique import UniqSupply import FastString import Outputable -import Util - -import Compiler.Hoopl hiding (Unique, (<*>), mkLabel, mkBranch, mkLast) import Control.Monad import Data.List @@ -614,7 +611,7 @@ emitComment :: FastString -> FCode () #if 0 /* def DEBUG */ emitComment s = emitCgStmt (CgStmt (CmmComment s)) #else -emitComment s = return () +emitComment _ = return () #endif emitAssign :: CmmReg -> CmmExpr -> FCode () @@ -707,12 +704,16 @@ mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> FCode CmmAGraph mkSafeCall t fs as upd i = do k <- newLabelC + let (_off, copyout) = copyInOflow NativeReturn (Young k) fs + -- see Note [safe foreign call convention] return ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth)) (CmmLit (CmmBlock k)) - <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i}) - <*> mkLabel k) - + <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k + , updfr=upd, intrbl=i }) + <*> mkLabel k + <*> copyout + ) -- ---------------------------------------------------------------------------- -- CgStmts diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index c95b1f02ff..9f87271fba 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -68,14 +68,9 @@ cgOpApp :: StgOp -- The op -- Foreign calls cgOpApp (StgFCallOp fcall _) stg_args res_ty - = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty - -- Choose result regs r1, r2 - -- Note [Foreign call results] - ; cgForeignCall res_regs res_hints fcall stg_args - -- r1, r2 = foo( x, y ) - ; emitReturn (map (CmmReg . CmmLocal) res_regs) } - -- return (r1, r2) - + = cgForeignCall fcall stg_args res_ty + -- Note [Foreign call results] + -- tagToEnum# is special: we need to pull the constructor -- out of the table, and perform an appropriate return. |