diff options
Diffstat (limited to 'ghc/compiler/codeGen')
26 files changed, 6938 insertions, 4358 deletions
diff --git a/ghc/compiler/codeGen/CgBindery.lhs b/ghc/compiler/codeGen/CgBindery.lhs index b195b5c864..0f858777c2 100644 --- a/ghc/compiler/codeGen/CgBindery.lhs +++ b/ghc/compiler/codeGen/CgBindery.lhs @@ -8,49 +8,49 @@ module CgBindery ( CgBindings, CgIdInfo, StableLoc, VolatileLoc, - stableAmodeIdInfo, heapIdInfo, + cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF, + + stableIdInfo, heapIdInfo, letNoEscapeIdInfo, idInfoToAmode, addBindC, addBindsC, nukeVolatileBinds, nukeDeadBindings, + getLiveStackSlots, - bindNewToStack, rebindToStack, + bindArgsToStack, rebindToStack, bindNewToNode, bindNewToReg, bindArgsToRegs, bindNewToTemp, - getArgAmode, getArgAmodes, - getCAddrModeAndInfo, getCAddrMode, + getArgAmode, getArgAmodes, + getCgIdInfo, getCAddrModeIfVolatile, getVolatileRegs, - - buildContLivenessMask + maybeLetNoEscape, ) where #include "HsVersions.h" -import AbsCSyn import CgMonad - -import CgUsages ( getHpRelOffset, getSpRelOffset, getRealSp ) -import CgStackery ( freeStackSlots, getStackFrame ) -import CLabel ( mkClosureLabel, - mkBitmapLabel, pprCLabel ) +import CgHeapery ( getHpRelOffset ) +import CgStackery ( freeStackSlots, getSpRelOffset ) +import CgUtils ( cgLit, cmmOffsetW ) +import CLabel ( mkClosureLabel, pprCLabel ) import ClosureInfo ( mkLFImported, mkLFArgument, LambdaFormInfo ) -import Bitmap -import PrimRep ( isFollowableRep, getPrimRepSize ) -import Id ( Id, idPrimRep, idType ) -import Type ( typePrimRep ) + +import Cmm +import PprCmm ( {- instance Outputable -} ) +import SMRep ( CgRep(..), WordOff, isFollowableArg, + isVoidArg, cgRepSizeW, argMachRep, + idCgRep, typeCgRep ) +import Id ( Id, idName ) import VarEnv import VarSet ( varSetElems ) -import Literal ( Literal ) -import Maybes ( catMaybes, maybeToBool, seqMaybe ) -import Name ( isInternalName, NamedThing(..) ) -import PprAbsC ( pprAmode, pprMagicId ) -import PrimRep ( PrimRep(..) ) +import Literal ( literalType ) +import Maybes ( catMaybes ) +import Name ( isExternalName ) import StgSyn ( StgArg, StgLiveVars, GenStgArg(..), isStgTypeArg ) -import Unique ( Unique, Uniquable(..) ) +import Unique ( Uniquable(..) ) import UniqSet ( elementOfUniqSet ) -import Util ( zipWithEqual, sortLt ) import Outputable \end{code} @@ -73,22 +73,30 @@ environment. So there can be two bindings for a given name. type CgBindings = IdEnv CgIdInfo data CgIdInfo - = MkCgIdInfo Id -- Id that this is the info for - VolatileLoc - StableLoc - LambdaFormInfo + = CgIdInfo + { cg_id :: Id -- Id that this is the info for + -- Can differ from the Id at occurrence sites by + -- virtue of being externalised, for splittable C + , cg_rep :: CgRep + , cg_vol :: VolatileLoc + , cg_stb :: StableLoc + , cg_lf :: LambdaFormInfo } + +mkCgIdInfo id vol stb lf + = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, + cg_lf = lf, cg_rep = idCgRep id } + +voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc + , cg_stb = VoidLoc, cg_lf = mkLFArgument id + , cg_rep = VoidArg } + -- Used just for VoidRep things data VolatileLoc = NoVolatileLoc - | TempVarLoc Unique - - | RegLoc MagicId -- in one of the magic registers - -- (probably {Int,Float,Char,etc}Reg) - - | VirHpLoc VirtualHeapOffset -- Hp+offset (address of closure) - - | VirNodeLoc VirtualHeapOffset -- Cts of offset indirect from Node - -- ie *(Node+offset) + | RegLoc CmmReg -- In one of the registers (global or local) + | VirHpLoc VirtualHpOffset -- Hp+offset (address of closure) + | VirNodeLoc VirtualHpOffset -- Cts of offset indirect from Node + -- ie *(Node+offset) \end{code} @StableLoc@ encodes where an Id can be found, used by @@ -97,33 +105,37 @@ the @CgBindings@ environment in @CgBindery@. \begin{code} data StableLoc = NoStableLoc - | VirStkLoc VirtualSpOffset - | LitLoc Literal - | StableAmodeLoc CAddrMode --- these are so StableLoc can be abstract: + | VirStkLoc VirtualSpOffset -- The thing is held in this + -- stack slot -maybeStkLoc (VirStkLoc offset) = Just offset -maybeStkLoc _ = Nothing + | VirStkLNE VirtualSpOffset -- A let-no-escape thing; the + -- value is this stack pointer + -- (as opposed to the contents of the slot) + + | StableLoc CmmExpr + | VoidLoc -- Used only for VoidRep variables. They never need to + -- be saved, so it makes sense to treat treat them as + -- having a stable location \end{code} \begin{code} instance Outputable CgIdInfo where - ppr (MkCgIdInfo id vol stb lf) + ppr (CgIdInfo id rep vol stb lf) = ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb] instance Outputable VolatileLoc where ppr NoVolatileLoc = empty - ppr (TempVarLoc u) = ptext SLIT("tmp") <+> ppr u - ppr (RegLoc r) = ptext SLIT("reg") <+> pprMagicId r - ppr (VirHpLoc v) = ptext SLIT("vh") <+> ppr v - ppr (VirNodeLoc v) = ptext SLIT("vn") <+> ppr v + ppr (RegLoc r) = ptext SLIT("reg") <+> ppr r + ppr (VirHpLoc v) = ptext SLIT("vh") <+> ppr v + ppr (VirNodeLoc v) = ptext SLIT("vn") <+> ppr v instance Outputable StableLoc where - ppr NoStableLoc = empty - ppr (VirStkLoc v) = ptext SLIT("vs") <+> ppr v - ppr (LitLoc l) = ptext SLIT("lit") <+> ppr l - ppr (StableAmodeLoc a) = ptext SLIT("amode") <+> pprAmode a + ppr NoStableLoc = empty + ppr VoidLoc = ptext SLIT("void") + ppr (VirStkLoc v) = ptext SLIT("vs") <+> ppr v + ppr (VirStkLNE v) = ptext SLIT("lne") <+> ppr v + ppr (StableLoc a) = ptext SLIT("amode") <+> ppr a \end{code} %************************************************************************ @@ -133,41 +145,49 @@ instance Outputable StableLoc where %************************************************************************ \begin{code} -stableAmodeIdInfo i amode lf_info = MkCgIdInfo i NoVolatileLoc (StableAmodeLoc amode) lf_info -heapIdInfo i offset lf_info = MkCgIdInfo i (VirHpLoc offset) NoStableLoc lf_info -tempIdInfo i uniq lf_info = MkCgIdInfo i (TempVarLoc uniq) NoStableLoc lf_info - -letNoEscapeIdInfo i sp lf_info - = MkCgIdInfo i NoVolatileLoc (StableAmodeLoc (CJoinPoint sp)) lf_info - -idInfoToAmode :: PrimRep -> CgIdInfo -> FCode CAddrMode -idInfoToAmode kind (MkCgIdInfo _ vol stab _) = idInfoPiecesToAmode kind vol stab - -idInfoPiecesToAmode :: PrimRep -> VolatileLoc -> StableLoc -> FCode CAddrMode - -idInfoPiecesToAmode kind (TempVarLoc uniq) stable_loc = returnFC (CTemp uniq kind) -idInfoPiecesToAmode kind (RegLoc magic_id) stable_loc = returnFC (CReg magic_id) - -idInfoPiecesToAmode kind NoVolatileLoc (LitLoc lit) = returnFC (CLit lit) -idInfoPiecesToAmode kind NoVolatileLoc (StableAmodeLoc amode) = returnFC amode +stableIdInfo id amode lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info +heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info +letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info +stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info +nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info +regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info + +idInfoToAmode :: CgIdInfo -> FCode CmmExpr +idInfoToAmode info + = case cg_vol info of { + RegLoc reg -> returnFC (CmmReg reg) ; + VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ; + VirHpLoc hp_off -> getHpRelOffset hp_off ; + NoVolatileLoc -> + + case cg_stb info of + StableLoc amode -> returnFC amode + VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off + ; return (CmmLoad sp_rel mach_rep) } + + VirStkLNE sp_off -> getSpRelOffset sp_off ; + + VoidLoc -> return $ pprPanic "idInfoToAmode: void" (ppr (cg_id info)) + -- We return a 'bottom' amode, rather than panicing now + -- In this way getArgAmode returns a pair of (VoidArg, bottom) + -- and that's exactly what we want + + NoStableLoc -> pprPanic "idInfoToAmode: no loc" (ppr (cg_id info)) + } + where + mach_rep = argMachRep (cg_rep info) -idInfoPiecesToAmode kind (VirNodeLoc nd_off) stable_loc - = returnFC (CVal (nodeRel nd_off) kind) - -- Virtual offsets from Node increase into the closures, - -- and so do Node-relative offsets (which we want in the CVal), - -- so there is no mucking about to do to the offset. +cgIdInfoId :: CgIdInfo -> Id +cgIdInfoId = cg_id -idInfoPiecesToAmode kind (VirHpLoc hp_off) stable_loc - = getHpRelOffset hp_off `thenFC` \ rel_hp -> - returnFC (CAddr rel_hp) +cgIdInfoLF :: CgIdInfo -> LambdaFormInfo +cgIdInfoLF = cg_lf -idInfoPiecesToAmode kind NoVolatileLoc (VirStkLoc i) - = getSpRelOffset i `thenFC` \ rel_sp -> - returnFC (CVal rel_sp kind) +cgIdInfoArgRep :: CgIdInfo -> CgRep +cgIdInfoArgRep = cg_rep -#ifdef DEBUG -idInfoPiecesToAmode kind NoVolatileLoc NoStableLoc = panic "idInfoPiecesToAmode: no loc" -#endif +maybeLetNoEscape (CgIdInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off +maybeLetNoEscape other = Nothing \end{code} %************************************************************************ @@ -176,8 +196,8 @@ idInfoPiecesToAmode kind NoVolatileLoc NoStableLoc = panic "idInfoPiecesToAmode: %* * %************************************************************************ -There are three basic routines, for adding (@addBindC@), modifying -(@modifyBindC@) and looking up (@lookupBindC@) bindings. +.There are three basic routines, for adding (@addBindC@), modifying +(@modifyBindC@) and looking up (@getCgIdInfo@) bindings. A @Id@ is bound to a @(VolatileLoc, StableLoc)@ triple. The name should not already be bound. (nice ASSERT, eh?) @@ -192,8 +212,8 @@ addBindsC :: [(Id, CgIdInfo)] -> Code addBindsC new_bindings = do binds <- getBinds let new_binds = foldl (\ binds (name,info) -> extendVarEnv binds name info) - binds - new_bindings + binds + new_bindings setBinds new_binds modifyBindC :: Id -> (CgIdInfo -> CgIdInfo) -> Code @@ -201,19 +221,34 @@ modifyBindC name mangle_fn = do binds <- getBinds setBinds $ modifyVarEnv mangle_fn binds name -lookupBindC :: Id -> FCode CgIdInfo -lookupBindC id = do maybe_info <- lookupBindC_maybe id - case maybe_info of - Just info -> return info - Nothing -> cgLookupPanic id - -lookupBindC_maybe :: Id -> FCode (Maybe CgIdInfo) -lookupBindC_maybe id - = do static_binds <- getStaticBinds - local_binds <- getBinds - return (lookupVarEnv local_binds id - `seqMaybe` - lookupVarEnv static_binds id) +getCgIdInfo :: Id -> FCode CgIdInfo +getCgIdInfo id + = do { -- Try local bindings first + ; local_binds <- getBinds + ; case lookupVarEnv local_binds id of { + Just info -> return info ; + Nothing -> do + + { -- Try top-level bindings + static_binds <- getStaticBinds + ; case lookupVarEnv static_binds id of { + Just info -> return info ; + Nothing -> + + -- Should be imported; make up a CgIdInfo for it + if isExternalName name then + return (stableIdInfo id ext_lbl (mkLFImported id)) + else + if isVoidArg (idCgRep id) then + -- Void things are never in the environment + return (voidIdInfo id) + else + -- Bug + cgLookupPanic id + }}}} + where + name = idName id + ext_lbl = CmmLit (CmmLabel (mkClosureLabel name)) cgLookupPanic :: Id -> FCode a cgLookupPanic id @@ -223,9 +258,9 @@ cgLookupPanic id pprPanic "cgPanic" (vcat [ppr id, ptext SLIT("static binds for:"), - vcat [ ppr i | (MkCgIdInfo i _ _ _) <- rngVarEnv static_binds ], + vcat [ ppr (cg_id info) | info <- rngVarEnv static_binds ], ptext SLIT("local binds for:"), - vcat [ ppr i | (MkCgIdInfo i _ _ _) <- rngVarEnv local_binds ], + vcat [ ppr (cg_id info) | info <- rngVarEnv local_binds ], ptext SLIT("SRT label") <+> pprCLabel srt ]) \end{code} @@ -244,9 +279,9 @@ nukeVolatileBinds :: CgBindings -> CgBindings nukeVolatileBinds binds = mkVarEnv (foldr keep_if_stable [] (rngVarEnv binds)) where - keep_if_stable (MkCgIdInfo i _ NoStableLoc entry_info) acc = acc - keep_if_stable (MkCgIdInfo i _ stable_loc entry_info) acc - = (i, MkCgIdInfo i NoVolatileLoc stable_loc entry_info) : acc + keep_if_stable (CgIdInfo { cg_stb = NoStableLoc }) acc = acc + keep_if_stable info acc + = (cg_id info, info { cg_vol = NoVolatileLoc }) : acc \end{code} @@ -256,46 +291,15 @@ nukeVolatileBinds binds %* * %************************************************************************ -I {\em think} all looking-up is done through @getCAddrMode(s)@. - \begin{code} -getCAddrModeAndInfo :: Id -> FCode (Id, CAddrMode, LambdaFormInfo) - -getCAddrModeAndInfo id - = do - maybe_cg_id_info <- lookupBindC_maybe id - case maybe_cg_id_info of - - -- Nothing => not in the environment, so should be imported - Nothing | isInternalName name -> cgLookupPanic id - | otherwise -> returnFC (id, global_amode, mkLFImported id) - - Just (MkCgIdInfo id' volatile_loc stable_loc lf_info) - -> do amode <- idInfoPiecesToAmode kind volatile_loc stable_loc - return (id', amode, lf_info) - where - name = getName id - global_amode = CLbl (mkClosureLabel name) kind - kind = idPrimRep id - -getCAddrMode :: Id -> FCode CAddrMode -getCAddrMode name = do - (_, amode, _) <- getCAddrModeAndInfo name - return amode -\end{code} - -\begin{code} -getCAddrModeIfVolatile :: Id -> FCode (Maybe CAddrMode) -getCAddrModeIfVolatile name --- | toplevelishId name = returnFC Nothing --- | otherwise - = do - (MkCgIdInfo _ volatile_loc stable_loc lf_info) <- lookupBindC name - case stable_loc of +getCAddrModeIfVolatile :: Id -> FCode (Maybe CmmExpr) +getCAddrModeIfVolatile id + = do { info <- getCgIdInfo id + ; case cg_stb info of NoStableLoc -> do -- Aha! So it is volatile! - amode <- idInfoPiecesToAmode (idPrimRep name) volatile_loc NoStableLoc + amode <- idInfoToAmode info return $ Just amode - a_stable_loc -> return Nothing + a_stable_loc -> return Nothing } \end{code} @getVolatileRegs@ gets a set of live variables, and returns a list of @@ -306,56 +310,57 @@ stable one (notably, on the stack), we modify the current bindings to forget the volatile one. \begin{code} -getVolatileRegs :: StgLiveVars -> FCode [MagicId] +getVolatileRegs :: StgLiveVars -> FCode [GlobalReg] getVolatileRegs vars = do - stuff <- mapFCs snaffle_it (varSetElems vars) - returnFC $ catMaybes stuff - where - snaffle_it var = do - (MkCgIdInfo _ volatile_loc stable_loc lf_info) <- lookupBindC var - let + do { stuff <- mapFCs snaffle_it (varSetElems vars) + ; returnFC $ catMaybes stuff } + where + snaffle_it var = do + { info <- getCgIdInfo var + ; let -- commoned-up code... - consider_reg reg = - if not (isVolatileReg reg) then - -- Potentially dies across C calls - -- For now, that's everything; we leave - -- it to the save-macros to decide which - -- regs *really* need to be saved. - returnFC Nothing - else - case stable_loc of - NoStableLoc -> returnFC (Just reg) -- got one! - is_a_stable_loc -> do - -- has both volatile & stable locations; - -- force it to rely on the stable location - modifyBindC var nuke_vol_bind - return Nothing - in - case volatile_loc of - RegLoc reg -> consider_reg reg - VirNodeLoc _ -> consider_reg node - non_reg_loc -> returnFC Nothing - - nuke_vol_bind (MkCgIdInfo i _ stable_loc lf_info) - = MkCgIdInfo i NoVolatileLoc stable_loc lf_info + consider_reg reg + = -- We assume that all regs can die across C calls + -- We leave it to the save-macros to decide which + -- regs *really* need to be saved. + case cg_stb info of + NoStableLoc -> returnFC (Just reg) -- got one! + is_a_stable_loc -> do + { -- has both volatile & stable locations; + -- force it to rely on the stable location + modifyBindC var nuke_vol_bind + ; return Nothing } + + ; case cg_vol info of + RegLoc (CmmGlobal reg) -> consider_reg reg + VirNodeLoc _ -> consider_reg node + other_loc -> returnFC Nothing -- Local registers + } + + nuke_vol_bind info = info { cg_vol = NoVolatileLoc } \end{code} \begin{code} -getArgAmodes :: [StgArg] -> FCode [CAddrMode] -getArgAmodes [] = returnFC [] -getArgAmodes (atom:atoms) - | isStgTypeArg atom - = getArgAmodes atoms - | otherwise = do - amode <- getArgAmode atom - amodes <- getArgAmodes atoms - return ( amode : amodes ) +getArgAmode :: StgArg -> FCode (CgRep, CmmExpr) +getArgAmode (StgVarArg var) + = do { info <- getCgIdInfo var + ; amode <- idInfoToAmode info + ; return (cgIdInfoArgRep info, amode ) } -getArgAmode :: StgArg -> FCode CAddrMode +getArgAmode (StgLitArg lit) + = do { cmm_lit <- cgLit lit + ; return (typeCgRep (literalType lit), CmmLit cmm_lit) } -getArgAmode (StgVarArg var) = getCAddrMode var -- The common case -getArgAmode (StgLitArg lit) = returnFC (CLit lit) +getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg" + +getArgAmodes :: [StgArg] -> FCode [(CgRep, CmmExpr)] +getArgAmodes [] = returnFC [] +getArgAmodes (atom:atoms) + | isStgTypeArg atom = getArgAmodes atoms + | otherwise = do { amode <- getArgAmode atom + ; amodes <- getArgAmodes atoms + ; return ( amode : amodes ) } \end{code} %************************************************************************ @@ -365,43 +370,40 @@ getArgAmode (StgLitArg lit) = returnFC (CLit lit) %************************************************************************ \begin{code} -bindNewToStack :: (Id, VirtualSpOffset) -> Code -bindNewToStack (name, offset) - = addBindC name info +bindArgsToStack :: [(Id, VirtualSpOffset)] -> Code +bindArgsToStack args + = mapCs bind args where - info = MkCgIdInfo name NoVolatileLoc (VirStkLoc offset) (mkLFArgument name) + bind(id, offset) = addBindC id (stackIdInfo id offset (mkLFArgument id)) -bindNewToNode :: Id -> VirtualHeapOffset -> LambdaFormInfo -> Code -bindNewToNode name offset lf_info - = addBindC name info +bindArgsToRegs :: [(Id, GlobalReg)] -> Code +bindArgsToRegs args + = mapCs bind args where - info = MkCgIdInfo name (VirNodeLoc offset) NoStableLoc lf_info + bind (arg, reg) = bindNewToReg arg (CmmGlobal reg) (mkLFArgument arg) + +bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code +bindNewToNode id offset lf_info + = addBindC id (nodeIdInfo id offset lf_info) -- Create a new temporary whose unique is that in the id, -- bind the id to it, and return the addressing mode for the -- temporary. -bindNewToTemp :: Id -> FCode CAddrMode +bindNewToTemp :: Id -> FCode CmmReg bindNewToTemp id - = do addBindC id id_info - return temp_amode - where - uniq = getUnique id - temp_amode = CTemp uniq (idPrimRep id) - id_info = tempIdInfo id uniq lf_info - lf_info = mkLFArgument id -- Always used of things we - -- know nothing about - -bindNewToReg :: Id -> MagicId -> LambdaFormInfo -> Code -bindNewToReg name magic_id lf_info - = addBindC name info + = do addBindC id (regIdInfo id temp_reg lf_info) + return temp_reg where - info = MkCgIdInfo name (RegLoc magic_id) NoStableLoc lf_info + uniq = getUnique id + temp_reg = CmmLocal (LocalReg uniq (argMachRep (idCgRep id))) + lf_info = mkLFArgument id -- Always used of things we + -- know nothing about -bindArgsToRegs :: [Id] -> [MagicId] -> Code -bindArgsToRegs args regs - = listCs (zipWithEqual "bindArgsToRegs" bind args regs) +bindNewToReg :: Id -> CmmReg -> LambdaFormInfo -> Code +bindNewToReg name reg lf_info + = addBindC name info where - arg `bind` reg = bindNewToReg arg reg (mkLFArgument arg) + info = mkCgIdInfo name (RegLoc reg) NoStableLoc lf_info \end{code} \begin{code} @@ -409,69 +411,7 @@ rebindToStack :: Id -> VirtualSpOffset -> Code rebindToStack name offset = modifyBindC name replace_stable_fn where - replace_stable_fn (MkCgIdInfo i vol stab einfo) - = MkCgIdInfo i vol (VirStkLoc offset) einfo -\end{code} - -%************************************************************************ -%* * -\subsection[CgBindery-liveness]{Build a liveness mask for the current stack} -%* * -%************************************************************************ - -There are four kinds of things on the stack: - - - pointer variables (bound in the environment) - - non-pointer variables (boudn in the environment) - - free slots (recorded in the stack free list) - - non-pointer data slots (recorded in the stack free list) - -We build up a bitmap of non-pointer slots by searching the environment -for all the pointer variables, and subtracting these from a bitmap -with initially all bits set (up to the size of the stack frame). - -\begin{code} -buildLivenessMask - :: VirtualSpOffset -- size of the stack frame - -> VirtualSpOffset -- offset from which the bitmap should start - -> FCode Bitmap -- mask for free/unlifted slots - -buildLivenessMask size sp = do { - -- find all live stack-resident pointers - binds <- getBinds; - ((vsp, _, free, _, _), heap_usage) <- getUsage; - - let { - rel_slots = sortLt (<) - [ sp - ofs -- get slots relative to top of frame - | (MkCgIdInfo id _ (VirStkLoc ofs) _) <- rngVarEnv binds, - isFollowableRep (idPrimRep id) - ]; - }; - - WARN( not (all (>=0) rel_slots), ppr size $$ ppr sp $$ ppr rel_slots $$ ppr binds ) - return (intsToReverseBitmap size rel_slots) - } - --- In a continuation, we want a liveness mask that starts from just after --- the return address, which is on the stack at realSp. - -buildContLivenessMask :: Id -> FCode Liveness - -- The Id is used just for its unique to make a label -buildContLivenessMask id = do - realSp <- getRealSp - - frame_sp <- getStackFrame - -- realSp points to the frame-header for the current stack frame, - -- and the end of this frame is frame_sp. The size is therefore - -- realSp - frame_sp - 1 (subtract one for the frame-header). - let frame_size = realSp - frame_sp - 1 - - mask <- buildLivenessMask frame_size (realSp-1) - - let liveness = Liveness (mkBitmapLabel (getName id)) frame_size mask - absC (maybeLargeBitmap liveness) - return liveness + replace_stable_fn info = info { cg_stb = VirStkLoc offset } \end{code} %************************************************************************ @@ -503,7 +443,7 @@ nukeDeadBindings live_vars = do let (dead_stk_slots, bs') = dead_slots live_vars [] [] - [ (i, b) | b@(MkCgIdInfo i _ _ _) <- rngVarEnv binds ] + [ (cg_id b, b) | b <- rngVarEnv binds ] setBinds $ mkVarEnv bs' freeStackSlots dead_stk_slots \end{code} @@ -529,19 +469,23 @@ dead_slots live_vars fbs ds ((v,i):bs) -- Instead keep it in the filtered bindings | otherwise - = case i of - MkCgIdInfo _ _ stable_loc _ - | is_stk_loc && size > 0 -> - dead_slots live_vars fbs ([offset-size+1 .. offset] ++ ds) bs - where - maybe_stk_loc = maybeStkLoc stable_loc - is_stk_loc = maybeToBool maybe_stk_loc - (Just offset) = maybe_stk_loc + = case cg_stb i of + VirStkLoc offset + | size > 0 + -> dead_slots live_vars fbs ([offset-size+1 .. offset] ++ ds) bs _ -> dead_slots live_vars fbs ds bs where + size :: WordOff + size = cgRepSizeW (cg_rep i) +\end{code} - size :: Int - size = (getPrimRepSize . typePrimRep . idType) v - +\begin{code} +getLiveStackSlots :: FCode [VirtualSpOffset] +-- Return the offsets of slots in stack containig live pointers +getLiveStackSlots + = do { binds <- getBinds + ; return [off | CgIdInfo { cg_stb = VirStkLoc off, + cg_rep = rep } <- rngVarEnv binds, + isFollowableArg rep] } \end{code} diff --git a/ghc/compiler/codeGen/CgCallConv.hs b/ghc/compiler/codeGen/CgCallConv.hs new file mode 100644 index 0000000000..fa98f96378 --- /dev/null +++ b/ghc/compiler/codeGen/CgCallConv.hs @@ -0,0 +1,507 @@ +----------------------------------------------------------------------------- +-- +-- CgCallConv +-- +-- The datatypes and functions here encapsulate the +-- calling and return conventions used by the code generator. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + + +module CgCallConv ( + -- Argument descriptors + mkArgDescr, argDescrType, + + -- Liveness + isBigLiveness, buildContLiveness, mkRegLiveness, + smallLiveness, mkLivenessCLit, + + -- Register assignment + assignCallRegs, assignReturnRegs, assignPrimOpCallRegs, + + -- Calls + constructSlowCall, slowArgs, slowCallPattern, + + -- Returns + CtrlReturnConvention(..), + ctrlReturnConvAlg, + dataReturnConvPrim, + getSequelAmode + ) where + +#include "HsVersions.h" + +import CgUtils ( emitRODataLits, mkWordCLit ) +import CgMonad + +import Constants ( mAX_FAMILY_SIZE_FOR_VEC_RETURNS, + mAX_Vanilla_REG, mAX_Float_REG, + mAX_Double_REG, mAX_Long_REG, + mAX_Real_Vanilla_REG, mAX_Real_Float_REG, + mAX_Real_Double_REG, mAX_Real_Long_REG, + bITMAP_BITS_SHIFT + ) + +import ClosureInfo ( ArgDescr(..), Liveness(..) ) +import CgStackery ( getSpRelOffset ) +import SMRep +import MachOp ( wordRep ) +import Cmm ( CmmExpr(..), GlobalReg(..), CmmLit(..), CmmReg(..), node ) +import CmmUtils ( mkLblExpr ) +import CLabel +import Maybes ( mapCatMaybes ) +import Id ( Id ) +import Name ( Name ) +import TyCon ( TyCon, tyConFamilySize ) +import Bitmap ( Bitmap, mAX_SMALL_BITMAP_SIZE, + mkBitmap, intsToReverseBitmap ) +import Util ( isn'tIn, sortLt ) +import CmdLineOpts ( opt_Unregisterised ) +import FastString ( LitString ) +import Outputable +import DATA_BITS + + +------------------------------------------------------------------------- +-- +-- Making argument descriptors +-- +-- An argument descriptor describes the layout of args on the stack, +-- both for * GC (stack-layout) purposes, and +-- * saving/restoring registers when a heap-check fails +-- +-- Void arguments aren't important, therefore (contrast constructSlowCall) +-- +------------------------------------------------------------------------- + +-- bring in ARG_P, ARG_N, etc. +#include "../includes/StgFun.h" + +------------------------- +argDescrType :: ArgDescr -> Int +-- The "argument type" RTS field type +argDescrType (ArgSpec n) = n +argDescrType (ArgGen liveness) + | isBigLiveness liveness = ARG_GEN_BIG + | otherwise = ARG_GEN + + +mkArgDescr :: Name -> [Id] -> FCode ArgDescr +mkArgDescr nm args + = case stdPattern arg_reps of + Just spec_id -> return (ArgSpec spec_id) + Nothing -> do { liveness <- mkLiveness nm size bitmap + ; return (ArgGen liveness) } + where + arg_reps = filter nonVoidArg (map idCgRep args) + -- Getting rid of voids eases matching of standard patterns + + bitmap = mkBitmap arg_bits + arg_bits = argBits arg_reps + size = length arg_bits + +argBits :: [CgRep] -> [Bool] -- True for non-ptr, False for ptr +argBits [] = [] +argBits (PtrArg : args) = False : argBits args +argBits (arg : args) = take (cgRepSizeW arg) (repeat True) ++ argBits args + +stdPattern :: [CgRep] -> Maybe Int +stdPattern [PtrArg] = Just ARG_P +stdPattern [FloatArg] = Just ARG_F +stdPattern [DoubleArg] = Just ARG_D +stdPattern [LongArg] = Just ARG_L +stdPattern [NonPtrArg] = Just ARG_N + +stdPattern [NonPtrArg,NonPtrArg] = Just ARG_NN +stdPattern [NonPtrArg,PtrArg] = Just ARG_NP +stdPattern [PtrArg,NonPtrArg] = Just ARG_PN +stdPattern [PtrArg,PtrArg] = Just ARG_PP + +stdPattern [NonPtrArg,NonPtrArg,NonPtrArg] = Just ARG_NNN +stdPattern [NonPtrArg,NonPtrArg,PtrArg] = Just ARG_NNP +stdPattern [NonPtrArg,PtrArg,NonPtrArg] = Just ARG_NPN +stdPattern [NonPtrArg,PtrArg,PtrArg] = Just ARG_NPP +stdPattern [PtrArg,NonPtrArg,NonPtrArg] = Just ARG_PNN +stdPattern [PtrArg,NonPtrArg,PtrArg] = Just ARG_PNP +stdPattern [PtrArg,PtrArg,NonPtrArg] = Just ARG_PPN +stdPattern [PtrArg,PtrArg,PtrArg] = Just ARG_PPP + +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPP +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPP +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPPP +stdPattern other = Nothing + + +------------------------------------------------------------------------- +-- +-- Liveness info +-- +------------------------------------------------------------------------- + +mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness +mkLiveness name size bits + | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word + = do { let lbl = mkBitmapLabel name + ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + : map mkWordCLit bits) + ; return (BigLiveness lbl) } + + | otherwise -- Bitmap fits in one word + = let + small_bits = case bits of + [] -> 0 + [b] -> fromIntegral b + _ -> panic "livenessToAddrMode" + in + return (smallLiveness size small_bits) + +smallLiveness :: Int -> StgWord -> Liveness +smallLiveness size small_bits = SmallLiveness bits + where bits = fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT) + +------------------- +isBigLiveness :: Liveness -> Bool +isBigLiveness (BigLiveness _) = True +isBigLiveness (SmallLiveness _) = False + +------------------- +mkLivenessCLit :: Liveness -> CmmLit +mkLivenessCLit (BigLiveness lbl) = CmmLabel lbl +mkLivenessCLit (SmallLiveness bits) = mkWordCLit bits + + +------------------------------------------------------------------------- +-- +-- Bitmap describing register liveness +-- across GC when doing a "generic" heap check +-- (a RET_DYN stack frame). +-- +-- NB. Must agree with these macros (currently in StgMacros.h): +-- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS(). +------------------------------------------------------------------------- + +mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord +mkRegLiveness regs ptrs nptrs + = (fromIntegral nptrs `shiftL` 16) .|. + (fromIntegral ptrs `shiftL` 24) .|. + all_non_ptrs `xor` reg_bits regs + where + all_non_ptrs = 0xff + + reg_bits [] = 0 + reg_bits ((id, VanillaReg i) : regs) | isFollowableArg (idCgRep id) + = (1 `shiftL` (i - 1)) .|. reg_bits regs + reg_bits (_ : regs) + = reg_bits regs + +------------------------------------------------------------------------- +-- +-- Pushing the arguments for a slow call +-- +------------------------------------------------------------------------- + +-- For a slow call, we must take a bunch of arguments and intersperse +-- some stg_ap_<pattern>_ret_info return addresses. +constructSlowCall :: [(CgRep,CmmExpr)] -> (CLabel, [(CgRep,CmmExpr)]) + -- don't forget the zero case +constructSlowCall [] + = (stg_ap_0, []) + where + stg_ap_0 = enterRtsRetLabel SLIT("stg_ap_0") + +constructSlowCall amodes + = (stg_ap_pat, these ++ slowArgs rest) + where + stg_ap_pat = enterRtsRetLabel arg_pat + (arg_pat, these, rest) = matchSlowPattern amodes + +enterRtsRetLabel arg_pat + | tablesNextToCode = mkRtsRetInfoLabel arg_pat + | otherwise = mkRtsRetLabel arg_pat + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)] +slowArgs [] = [] +slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest + where (arg_pat, args, rest) = matchSlowPattern amodes + stg_ap_pat = mkRtsRetInfoLabel arg_pat + +matchSlowPattern :: [(CgRep,CmmExpr)] + -> (LitString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)]) +matchSlowPattern amodes = (arg_pat, these, rest) + where (arg_pat, n) = slowCallPattern (map fst amodes) + (these, rest) = splitAt n amodes + +-- These cases were found to cover about 99% of all slow calls: +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppppp"), 6) +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_ppppp"), 5) +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppp"), 4) +slowCallPattern (PtrArg: PtrArg: PtrArg: VoidArg: _) = (SLIT("stg_ap_pppv"), 4) +slowCallPattern (PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_ppp"), 3) +slowCallPattern (PtrArg: PtrArg: VoidArg: _) = (SLIT("stg_ap_ppv"), 3) +slowCallPattern (PtrArg: PtrArg: _) = (SLIT("stg_ap_pp"), 2) +slowCallPattern (PtrArg: VoidArg: _) = (SLIT("stg_ap_pv"), 2) +slowCallPattern (PtrArg: _) = (SLIT("stg_ap_p"), 1) +slowCallPattern (VoidArg: _) = (SLIT("stg_ap_v"), 1) +slowCallPattern (NonPtrArg: _) = (SLIT("stg_ap_n"), 1) +slowCallPattern (FloatArg: _) = (SLIT("stg_ap_f"), 1) +slowCallPattern (DoubleArg: _) = (SLIT("stg_ap_d"), 1) +slowCallPattern (LongArg: _) = (SLIT("stg_ap_l"), 1) +slowCallPattern _ = panic "CgStackery.slowCallPattern" + +------------------------------------------------------------------------- +-- +-- Return conventions +-- +------------------------------------------------------------------------- + +-- A @CtrlReturnConvention@ says how {\em control} is returned. + +data CtrlReturnConvention + = VectoredReturn Int -- size of the vector table (family size) + | UnvectoredReturn Int -- family size + +ctrlReturnConvAlg :: TyCon -> CtrlReturnConvention +ctrlReturnConvAlg tycon + = case (tyConFamilySize tycon) of + size -> -- we're supposed to know... + if (size > (1::Int) && size <= mAX_FAMILY_SIZE_FOR_VEC_RETURNS) then + VectoredReturn size + else + UnvectoredReturn size + -- NB: unvectored returns Include size 0 (no constructors), so that + -- the following perverse code compiles (it crashed GHC in 5.02) + -- data T1 + -- data T2 = T2 !T1 Int + -- The only value of type T1 is bottom, which never returns anyway. + +dataReturnConvPrim :: CgRep -> CmmReg +dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim LongArg = CmmGlobal (LongReg 1) +dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1) +dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1) +dataReturnConvPrim VoidArg = panic "dataReturnConvPrim: void" + + +-- getSequelAmode returns an amode which refers to an info table. The info +-- table will always be of the RET(_VEC)?_(BIG|SMALL) kind. We're careful +-- not to handle real code pointers, just in case we're compiling for +-- an unregisterised/untailcallish architecture, where info pointers and +-- code pointers aren't the same. +-- DIRE WARNING. +-- The OnStack case of sequelToAmode delivers an Amode which is only +-- valid just before the final control transfer, because it assumes +-- that Sp is pointing to the top word of the return address. This +-- seems unclean but there you go. + +getSequelAmode :: FCode CmmExpr +getSequelAmode + = do { EndOfBlockInfo virt_sp sequel <- getEndOfBlockInfo + ; case sequel of + OnStack -> do { sp_rel <- getSpRelOffset virt_sp + ; returnFC (CmmLoad sp_rel wordRep) } + + UpdateCode -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel)) + CaseAlts lbl _ _ True -> returnFC (CmmLit (CmmLabel mkSeqInfoLabel)) + CaseAlts lbl _ _ False -> returnFC (CmmLit (CmmLabel lbl)) + } + +------------------------------------------------------------------------- +-- +-- Build a liveness mask for the current stack +-- +------------------------------------------------------------------------- + +-- There are four kinds of things on the stack: +-- +-- - pointer variables (bound in the environment) +-- - non-pointer variables (boudn in the environment) +-- - free slots (recorded in the stack free list) +-- - non-pointer data slots (recorded in the stack free list) +-- +-- We build up a bitmap of non-pointer slots by searching the environment +-- for all the pointer variables, and subtracting these from a bitmap +-- with initially all bits set (up to the size of the stack frame). + +buildContLiveness :: Name -- Basis for label (only) + -> [VirtualSpOffset] -- Live stack slots + -> FCode Liveness +buildContLiveness name live_slots + = do { stk_usg <- getStkUsage + ; let StackUsage { realSp = real_sp, + frameSp = frame_sp } = stk_usg + + start_sp :: VirtualSpOffset + start_sp = real_sp - retAddrSizeW + -- In a continuation, we want a liveness mask that + -- starts from just after the return address, which is + -- on the stack at real_sp. + + frame_size :: WordOff + frame_size = start_sp - frame_sp + -- real_sp points to the frame-header for the current + -- stack frame, and the end of this frame is frame_sp. + -- The size is therefore real_sp - frame_sp - retAddrSizeW + -- (subtract one for the frame-header = return address). + + rel_slots :: [WordOff] + rel_slots = sortLt (<) + [ start_sp - ofs -- Get slots relative to top of frame + | ofs <- live_slots ] + + bitmap = intsToReverseBitmap frame_size rel_slots + + ; WARN( not (all (>=0) rel_slots), + ppr name $$ ppr live_slots $$ ppr frame_size $$ ppr start_sp $$ ppr rel_slots ) + mkLiveness name frame_size bitmap } + + +------------------------------------------------------------------------- +-- +-- Register assignment +-- +------------------------------------------------------------------------- + +-- How to assign registers for +-- +-- 1) Calling a fast entry point. +-- 2) Returning an unboxed tuple. +-- 3) Invoking an out-of-line PrimOp. +-- +-- Registers are assigned in order. +-- +-- If we run out, we don't attempt to assign any further registers (even +-- though we might have run out of only one kind of register); we just +-- return immediately with the left-overs specified. +-- +-- The alternative version @assignAllRegs@ uses the complete set of +-- registers, including those that aren't mapped to real machine +-- registers. This is used for calling special RTS functions and PrimOps +-- which expect their arguments to always be in the same registers. + +assignCallRegs, assignPrimOpCallRegs, assignReturnRegs + :: [(CgRep,a)] -- Arg or result values to assign + -> ([(a, GlobalReg)], -- Register assignment in same order + -- for *initial segment of* input list + -- (but reversed; doesn't matter) + -- VoidRep args do not appear here + [(CgRep,a)]) -- Leftover arg or result values + +assignCallRegs args + = assign_regs args (mkRegTbl [node]) + -- The entry convention for a function closure + -- never uses Node for argument passing; instead + -- Node points to the function closure itself + +assignPrimOpCallRegs args + = assign_regs args (mkRegTbl_allRegs []) + -- For primops, *all* arguments must be passed in registers + +assignReturnRegs args + = assign_regs args (mkRegTbl []) + -- For returning unboxed tuples etc, + -- we use all regs + +assign_regs :: [(CgRep,a)] -- Arg or result values to assign + -> AvailRegs -- Regs still avail: Vanilla, Float, Double, Longs + -> ([(a, GlobalReg)], [(CgRep, a)]) +assign_regs args supply + = go args [] supply + where + go [] acc supply = (acc, []) -- Return the results reversed (doesn't matter) + go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and + = go args acc supply -- there's nothign to bind them to + go ((rep,arg) : args) acc supply + = case assign_reg rep supply of + Just (reg, supply') -> go args ((arg,reg):acc) supply' + Nothing -> (acc, (rep,arg):args) -- No more regs + +assign_reg :: CgRep -> AvailRegs -> Maybe (GlobalReg, AvailRegs) +assign_reg FloatArg (vs, f:fs, ds, ls) = Just (FloatReg f, (vs, fs, ds, ls)) +assign_reg DoubleArg (vs, fs, d:ds, ls) = Just (DoubleReg d, (vs, fs, ds, ls)) +assign_reg LongArg (vs, fs, ds, l:ls) = Just (LongReg l, (vs, fs, ds, ls)) +assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) +assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) + -- PtrArg and NonPtrArg both go in a vanilla register +assign_reg other not_enough_regs = Nothing + + +------------------------------------------------------------------------- +-- +-- Register supplies +-- +------------------------------------------------------------------------- + +-- Vanilla registers can contain pointers, Ints, Chars. +-- Floats and doubles have separate register supplies. +-- +-- We take these register supplies from the *real* registers, i.e. those +-- that are guaranteed to map to machine registers. + +useVanillaRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Vanilla_REG +useFloatRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Float_REG +useDoubleRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Double_REG +useLongRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Long_REG + +vanillaRegNos, floatRegNos, doubleRegNos, longRegNos :: [Int] +vanillaRegNos = regList useVanillaRegs +floatRegNos = regList useFloatRegs +doubleRegNos = regList useDoubleRegs +longRegNos = regList useLongRegs + +allVanillaRegNos, allFloatRegNos, allDoubleRegNos, allLongRegNos :: [Int] +allVanillaRegNos = regList mAX_Vanilla_REG +allFloatRegNos = regList mAX_Float_REG +allDoubleRegNos = regList mAX_Double_REG +allLongRegNos = regList mAX_Long_REG + +regList 0 = [] +regList n = [1 .. n] + +type AvailRegs = ( [Int] -- available vanilla regs. + , [Int] -- floats + , [Int] -- doubles + , [Int] -- longs (int64 and word64) + ) + +mkRegTbl :: [GlobalReg] -> AvailRegs +mkRegTbl regs_in_use + = mkRegTbl' regs_in_use vanillaRegNos floatRegNos doubleRegNos longRegNos + +mkRegTbl_allRegs :: [GlobalReg] -> AvailRegs +mkRegTbl_allRegs regs_in_use + = mkRegTbl' regs_in_use allVanillaRegNos allFloatRegNos allDoubleRegNos allLongRegNos + +mkRegTbl' regs_in_use vanillas floats doubles longs + = (ok_vanilla, ok_float, ok_double, ok_long) + where + ok_vanilla = mapCatMaybes (select VanillaReg) vanillas + ok_float = mapCatMaybes (select FloatReg) floats + ok_double = mapCatMaybes (select DoubleReg) doubles + ok_long = mapCatMaybes (select LongReg) longs + -- rep isn't looked at, hence we can use any old rep. + + select :: (Int -> GlobalReg) -> Int{-cand-} -> Maybe Int + -- one we've unboxed the Int, we make a GlobalReg + -- and see if it is already in use; if not, return its number. + + select mk_reg_fun cand + = let + reg = mk_reg_fun cand + in + if reg `not_elem` regs_in_use + then Just cand + else Nothing + where + not_elem = isn'tIn "mkRegTbl" + + diff --git a/ghc/compiler/codeGen/CgCase.lhs b/ghc/compiler/codeGen/CgCase.lhs index c805aaa413..c7b03ef13a 100644 --- a/ghc/compiler/codeGen/CgCase.lhs +++ b/ghc/compiler/codeGen/CgCase.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgCase.lhs,v 1.68 2004/08/10 09:02:38 simonmar Exp $ +% $Id: CgCase.lhs,v 1.69 2004/08/13 13:05:51 simonmar Exp $ % %******************************************************** %* * @@ -11,7 +11,7 @@ \begin{code} module CgCase ( cgCase, saveVolatileVarsAndRegs, - mkRetDirectTarget, restoreCurrentCostCentre + restoreCurrentCostCentre ) where #include "HsVersions.h" @@ -20,43 +20,42 @@ import {-# SOURCE #-} CgExpr ( cgExpr ) import CgMonad import StgSyn -import AbsCSyn - -import AbsCUtils ( mkAbstractCs, mkAbsCStmts, mkAlgAltsCSwitch, - getAmodeRep, shimFCallArg ) -import CgBindery ( getVolatileRegs, getArgAmodes, +import CgBindery ( getArgAmodes, bindNewToReg, bindNewToTemp, - getCAddrModeAndInfo, - rebindToStack, getCAddrMode, getCAddrModeIfVolatile, - buildContLivenessMask, nukeDeadBindings, + getCgIdInfo, getArgAmode, + rebindToStack, getCAddrModeIfVolatile, + nukeDeadBindings, idInfoToAmode ) import CgCon ( bindConArgs, bindUnboxedTupleComponents ) import CgHeapery ( altHeapCheck, unbxTupleHeapCheck ) -import CgRetConv ( dataReturnConvPrim, ctrlReturnConvAlg, +import CgCallConv ( dataReturnConvPrim, ctrlReturnConvAlg, CtrlReturnConvention(..) ) -import CgStackery ( allocPrimStack, allocStackTop, - deAllocStackTop, freeStackSlots, dataStackSlots +import CgStackery ( allocPrimStack, allocStackTop, getSpRelOffset, + deAllocStackTop, freeStackSlots ) import CgTailCall ( performTailCall ) -import CgUsages ( getSpRelOffset ) -import CLabel ( mkVecTblLabel, mkClosureTblLabel, - mkDefaultLabel, mkAltLabel, mkReturnInfoLabel - ) +import CgPrimOp ( cgPrimOp ) +import CgForeignCall ( cgForeignCall ) +import CgUtils ( newTemp, cgLit, emitLitSwitch, emitSwitch, + tagToClosure ) +import CgProf ( curCCS, curCCSAddr ) +import CgInfoTbls ( emitDirectReturnTarget, emitAlgReturnTarget, + dataConTagZ ) +import SMRep ( CgRep(..), retAddrSizeW, nonVoidArg, isVoidArg, + idCgRep, tyConCgRep, typeHint ) +import CmmUtils ( CmmStmts, noStmts, oneStmt, plusStmts ) +import Cmm +import MachOp ( wordRep ) import ClosureInfo ( mkLFArgument ) import CmdLineOpts ( opt_SccProfilingOn ) -import Id ( Id, idName, isDeadBinder ) -import DataCon ( dataConTag, fIRST_TAG, ConTag ) +import Id ( Id, idName, isDeadBinder, idType ) +import ForeignCall ( ForeignCall(..), CCallSpec(..), playSafe ) import VarSet ( varSetElems ) import CoreSyn ( AltCon(..) ) -import PrimOp ( primOpOutOfLine, PrimOp(..) ) -import PrimRep ( getPrimRepSize, retPrimRepSize, PrimRep(..) - ) -import TyCon ( TyCon, isEnumerationTyCon, tyConPrimRep ) -import Unique ( Unique, Uniquable(..), newTagUnique ) -import ForeignCall -import Util ( only ) -import List ( sortBy ) +import PrimOp ( PrimOp(..), primOpOutOfLine ) +import TyCon ( isEnumerationTyCon, tyConFamilySize ) +import Util ( isSingleton ) import Outputable \end{code} @@ -122,10 +121,11 @@ Special case #1: case of literal. \begin{code} cgCase (StgLit lit) live_in_whole_case live_in_alts bndr srt - alt_type@(PrimAlt tycon) alts - = bindNewToTemp bndr `thenFC` \ tmp_amode -> - absC (CAssign tmp_amode (CLit lit)) `thenC` - cgPrimAlts NoGC tmp_amode alts alt_type + alt_type@(PrimAlt tycon) alts + = do { tmp_reg <- bindNewToTemp bndr + ; cm_lit <- cgLit lit + ; stmtC (CmmAssign tmp_reg (CmmLit cm_lit)) + ; cgPrimAlts NoGC alt_type tmp_reg alts } \end{code} Special case #2: scrutinising a primitive-typed variable. No @@ -138,15 +138,15 @@ eliminate a heap check altogether. \begin{code} cgCase (StgApp v []) live_in_whole_case live_in_alts bndr srt alt_type@(PrimAlt tycon) alts - - = -- Careful! we can't just bind the default binder to the same thing - -- as the scrutinee, since it might be a stack location, and having - -- two bindings pointing at the same stack locn doesn't work (it - -- confuses nukeDeadBindings). Hence, use a new temp. - getCAddrMode v `thenFC` \ amode -> - bindNewToTemp bndr `thenFC` \ tmp_amode -> - absC (CAssign tmp_amode amode) `thenC` - cgPrimAlts NoGC tmp_amode alts alt_type + = do { -- Careful! we can't just bind the default binder to the same thing + -- as the scrutinee, since it might be a stack location, and having + -- two bindings pointing at the same stack locn doesn't work (it + -- confuses nukeDeadBindings). Hence, use a new temp. + v_info <- getCgIdInfo v + ; amode <- idInfoToAmode v_info + ; tmp_reg <- bindNewToTemp bndr + ; stmtC (CmmAssign tmp_reg amode) + ; cgPrimAlts NoGC alt_type tmp_reg alts } \end{code} Special case #3: inline PrimOps and foreign calls. @@ -154,85 +154,8 @@ Special case #3: inline PrimOps and foreign calls. \begin{code} cgCase (StgOpApp op args _) live_in_whole_case live_in_alts bndr srt alt_type alts - | inline_primop - = -- Get amodes for the arguments and results - getArgAmodes args `thenFC` \ arg_amodes1 -> - let - arg_amodes - | StgFCallOp{} <- op = zipWith shimFCallArg args arg_amodes1 - | otherwise = arg_amodes1 - in - getVolatileRegs live_in_alts `thenFC` \ vol_regs -> - - case alt_type of - PrimAlt tycon -- PRIMITIVE ALTS - -> bindNewToTemp bndr `thenFC` \ tmp_amode -> - absC (COpStmt [tmp_amode] op arg_amodes vol_regs) `thenC` - -- Note: no liveness arg - cgPrimAlts NoGC tmp_amode alts alt_type - - UbxTupAlt tycon -- UNBOXED TUPLE ALTS - -> -- No heap check, no yield, just get in there and do it. - -- NB: the case binder isn't bound to anything; - -- it has a unboxed tuple type - mapFCs bindNewToTemp res_ids `thenFC` \ res_tmps -> - absC (COpStmt res_tmps op arg_amodes vol_regs) `thenC` - cgExpr rhs - where - [(_, res_ids, _, rhs)] = alts - - AlgAlt tycon -- ENUMERATION TYPE RETURN - | StgPrimOp primop <- op - -> ASSERT( isEnumerationTyCon tycon ) - let - do_enum_primop :: PrimOp -> FCode CAddrMode -- Returns amode for result - do_enum_primop TagToEnumOp -- No code! - = returnFC (only arg_amodes) - - do_enum_primop primop - = absC (COpStmt [tag_amode] op arg_amodes vol_regs) `thenC` - returnFC tag_amode - where - tag_amode = CTemp (newTagUnique (getUnique bndr) 'C') IntRep - -- Being a bit short of uniques for temporary - -- variables here, we use newTagUnique to - -- generate a new unique from the case binder. - -- The case binder's unique will presumably - -- have the 'c' tag (generated by CoreToStg), - -- so we just change its tag to 'C' (for - -- 'case') to ensure it doesn't clash with - -- anything else. We can't use the unique - -- from the case binder, becaus e this is used - -- to hold the actual result closure (via the - -- call to bindNewToTemp) - in - do_enum_primop primop `thenFC` \ tag_amode -> - - -- Bind the default binder if necessary - -- (avoiding it avoids the assignment) - -- The deadness info is set by StgVarInfo - (if (isDeadBinder bndr) - then nopC - else bindNewToTemp bndr `thenFC` \ tmp_amode -> - absC (CAssign tmp_amode (tagToClosure tycon tag_amode)) - ) `thenC` - - -- Compile the alts - cgAlgAlts NoGC (getUnique bndr) - Nothing{-cc_slot-} False{-no semi-tagging-} - (AlgAlt tycon) alts `thenFC` \ tagged_alts -> - - -- Do the switch - absC (mkAlgAltsCSwitch tag_amode tagged_alts) - - other -> pprPanic "cgCase: case of primop has strange alt type" (ppr alt_type) - where - inline_primop = case op of - StgPrimOp primop -> not (primOpOutOfLine primop) - --StgFCallOp (CCall (CCallSpec _ _ PlayRisky)) _ -> True - -- unsafe foreign calls are "inline" - _otherwise -> False - + | not (primOpOutOfLine primop) + = cgInlinePrimOp primop args bndr alt_type live_in_alts alts \end{code} TODO: Case-of-case of primop can probably be done inline too (but @@ -240,6 +163,30 @@ maybe better to translate it out beforehand). See ghc/lib/misc/PackedString.lhs for examples where this crops up (with 4.02). +Special case #4: inline foreign calls: an unsafe foreign call can be done +right here, just like an inline primop. + +\begin{code} +cgCase (StgOpApp op@(StgFCallOp fcall _) args _) + live_in_whole_case live_in_alts bndr srt alt_type alts + | unsafe_foreign_call + = ASSERT( isSingleton alts ) + do -- *must* be an unboxed tuple alt. + -- exactly like the cgInlinePrimOp case for unboxed tuple alts.. + { res_tmps <- mapFCs bindNewToTemp non_void_res_ids + ; let res_hints = map (typeHint.idType) non_void_res_ids + ; cgForeignCall (zip res_tmps res_hints) fcall args live_in_alts + ; cgExpr rhs } + where + (_, res_ids, _, rhs) = head alts + non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids + + unsafe_foreign_call + = case fcall of + CCall (CCallSpec _ _ s) -> not (playSafe s) + _other -> False +\end{code} + Special case: scrutinising a non-primitive variable. This can be done a little better than the general case, because we can reuse/trim the stack slot holding the variable (if it is in one). @@ -247,8 +194,8 @@ we can reuse/trim the stack slot holding the variable (if it is in one). \begin{code} cgCase (StgApp fun args) live_in_whole_case live_in_alts bndr srt alt_type alts - = getCAddrModeAndInfo fun `thenFC` \ (fun', fun_amode, lf_info) -> - getArgAmodes args `thenFC` \ arg_amodes -> + = do { fun_info <- getCgIdInfo fun + ; arg_amodes <- getArgAmodes args -- Nuking dead bindings *before* calculating the saves is the -- value-add here. We might end up freeing up some slots currently @@ -256,19 +203,18 @@ cgCase (StgApp fun args) -- NOTE: we need to look up the variables used in the call before -- doing this, because some of them may not be in the environment -- afterward. - nukeDeadBindings live_in_alts `thenC` - saveVolatileVarsAndRegs live_in_alts - `thenFC` \ (save_assts, alts_eob_info, maybe_cc_slot) -> - - forkEval alts_eob_info - ( allocStackTop retPrimRepSize - `thenFC` \_ -> nopC ) - ( deAllocStackTop retPrimRepSize `thenFC` \_ -> - cgEvalAlts maybe_cc_slot bndr srt alt_type alts ) - `thenFC` \ scrut_eob_info -> - - setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info) $ - performTailCall fun' fun_amode lf_info arg_amodes save_assts + ; nukeDeadBindings live_in_alts + ; (save_assts, alts_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_alts + + ; scrut_eob_info + <- forkEval alts_eob_info + (allocStackTop retAddrSizeW >> nopC) + (do { deAllocStackTop retAddrSizeW + ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts }) + + ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info) + (performTailCall fun_info arg_amodes save_assts) } \end{code} Note about return addresses: we *always* push a return address, even @@ -286,26 +232,27 @@ Finally, here is the general case. \begin{code} cgCase expr live_in_whole_case live_in_alts bndr srt alt_type alts - = -- Figure out what volatile variables to save - nukeDeadBindings live_in_whole_case `thenC` + = do { -- Figure out what volatile variables to save + nukeDeadBindings live_in_whole_case - saveVolatileVarsAndRegs live_in_alts - `thenFC` \ (save_assts, alts_eob_info, maybe_cc_slot) -> - - -- Save those variables right now! - absC save_assts `thenC` - - -- generate code for the alts - forkEval alts_eob_info - (nukeDeadBindings live_in_alts `thenC` - allocStackTop retPrimRepSize -- space for retn address - `thenFC` \_ -> nopC - ) - (deAllocStackTop retPrimRepSize `thenFC` \_ -> - cgEvalAlts maybe_cc_slot bndr srt alt_type alts) `thenFC` \ scrut_eob_info -> - - setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info) $ - cgExpr expr + ; (save_assts, alts_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_alts + + -- Save those variables right now! + ; emitStmts save_assts + + -- generate code for the alts + ; scrut_eob_info + <- forkEval alts_eob_info + (do { nukeDeadBindings live_in_alts + ; allocStackTop retAddrSizeW -- space for retn address + ; nopC }) + (do { deAllocStackTop retAddrSizeW + ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts }) + + ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info) + (cgExpr expr) + } \end{code} There's a lot of machinery going on behind the scenes to manage the @@ -329,25 +276,93 @@ because we don't reserve it until just before the eval. TODO!! Problem: however, we have to save the current cost centre stack somewhere, because at the eval point the current CCS might be -different. So we pick a free stack slot and save CCCS in it. The -problem with this is that this slot isn't recorded as free/unboxed in -the environment, so a case expression in the scrutinee will have the -wrong bitmap attached. Fortunately we don't ever seem to see -case-of-case at the back end. One solution might be to shift the -saved CCS to the correct place in the activation record just before -the jump. - --SDM - -(one consequence of the above is that activation records on the stack -don't follow the layout of closures when we're profiling. The CCS -could be anywhere within the record). +different. So we pick a free stack slot and save CCCS in it. One +consequence of this is that activation records on the stack don't +follow the layout of closures when we're profiling. The CCS could be +anywhere within the record). \begin{code} -maybeReserveSeqFrame PolyAlt (EndOfBlockInfo args_sp (CaseAlts amode stuff _)) - = EndOfBlockInfo (args_sp + retPrimRepSize) (CaseAlts amode stuff True) +maybeReserveSeqFrame PolyAlt (EndOfBlockInfo args_sp (CaseAlts amode stuff bndr _)) + = EndOfBlockInfo (args_sp + retAddrSizeW) (CaseAlts amode stuff bndr True) maybeReserveSeqFrame other scrut_eob_info = scrut_eob_info \end{code} + +%************************************************************************ +%* * + Inline primops +%* * +%************************************************************************ + +\begin{code} +cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts + | isVoidArg (idCgRep bndr) + = ASSERT( con == DEFAULT && isSingleton alts && null bs ) + do { -- VOID RESULT; just sequencing, + -- so get in there and do it + cgPrimOp [] primop args live_in_alts + ; cgExpr rhs } + where + (con,bs,_,rhs) = head alts + +cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts + = do { -- PRIMITIVE ALTS, with non-void result + tmp_reg <- bindNewToTemp bndr + ; cgPrimOp [tmp_reg] primop args live_in_alts + ; cgPrimAlts NoGC (PrimAlt tycon) tmp_reg alts } + +cgInlinePrimOp primop args bndr (UbxTupAlt tycon) live_in_alts alts + = ASSERT( isSingleton alts ) + do { -- UNBOXED TUPLE ALTS + -- No heap check, no yield, just get in there and do it. + -- NB: the case binder isn't bound to anything; + -- it has a unboxed tuple type + + res_tmps <- mapFCs bindNewToTemp non_void_res_ids + ; cgPrimOp res_tmps primop args live_in_alts + ; cgExpr rhs } + where + (_, res_ids, _, rhs) = head alts + non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids + +cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts + = do { -- ENUMERATION TYPE RETURN + -- Typical: case a ># b of { True -> ..; False -> .. } + -- The primop itself returns an index into the table of + -- closures for the enumeration type. + tag_amode <- ASSERT( isEnumerationTyCon tycon ) + do_enum_primop primop + + -- Bind the default binder if necessary + -- (avoiding it avoids the assignment) + -- The deadness info is set by StgVarInfo + ; whenC (not (isDeadBinder bndr)) + (do { tmp_reg <- bindNewToTemp bndr + ; stmtC (CmmAssign tmp_reg (tagToClosure tycon tag_amode)) }) + + -- Compile the alts + ; (branches, mb_deflt) <- cgAlgAlts NoGC Nothing{-cc_slot-} + (AlgAlt tycon) alts + + -- Do the switch + ; emitSwitch tag_amode branches mb_deflt 0 (tyConFamilySize tycon - 1) + } + where + + do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result + do_enum_primop TagToEnumOp -- No code! + | [arg] <- args = do + (_,e) <- getArgAmode arg + return e + do_enum_primop primop + = do tmp <- newTemp wordRep + cgPrimOp [tmp] primop args live_in_alts + returnFC (CmmReg tmp) + +cgInlinePrimOp primop arg_amodes bndr PolyAlt live_in_alts alts + = pprPanic "cgCase: case of primop has polymorphic type" (ppr bndr) +\end{code} + %************************************************************************ %* * \subsection[CgCase-alts]{Alternatives} @@ -368,6 +383,21 @@ cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, if -- to be a label so that we can duplicate it -- without risk of duplicating code +cgEvalAlts cc_slot bndr srt alt_type@(PrimAlt tycon) alts + = do { let rep = tyConCgRep tycon + reg = dataReturnConvPrim rep -- Bottom for voidRep + + ; abs_c <- forkProc $ do + { -- Bind the case binder, except if it's void + -- (reg is bottom in that case) + whenC (nonVoidArg rep) $ + bindNewToReg bndr reg (mkLFArgument bndr) + ; restoreCurrentCostCentre cc_slot True + ; cgPrimAlts GCMayHappen alt_type reg alts } + + ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt + ; returnFC (CaseAlts lbl Nothing bndr False) } + cgEvalAlts cc_slot bndr srt (UbxTupAlt _) [(con,args,_,rhs)] = -- Unboxed tuple case -- By now, the simplifier should have have turned it @@ -376,38 +406,24 @@ cgEvalAlts cc_slot bndr srt (UbxTupAlt _) [(con,args,_,rhs)] -- case e of DEFAULT -> e ASSERT2( case con of { DataAlt _ -> True; other -> False }, text "cgEvalAlts: dodgy case of unboxed tuple type" ) - - forkAbsC ( -- forkAbsC for the RHS, so that the envt is - -- not changed for the mkRetDirect call - bindUnboxedTupleComponents args `thenFC` \ (live_regs, ptrs, nptrs, _) -> - -- restore the CC *after* binding the tuple components, so that we - -- get the stack offset of the saved CC right. - restoreCurrentCostCentre cc_slot True `thenC` - -- Generate a heap check if necessary - unbxTupleHeapCheck live_regs ptrs nptrs AbsCNop ( - -- And finally the code for the alternative - cgExpr rhs - )) `thenFC` \ abs_c -> - mkRetDirectTarget bndr abs_c srt `thenFC` \ lbl -> - returnFC (CaseAlts lbl Nothing False) - -cgEvalAlts cc_slot bndr srt alt_type@(PrimAlt tycon) alts - = forkAbsC ( -- forkAbsC for the RHS, so that the envt is - -- not changed for the mkRetDirect call - restoreCurrentCostCentre cc_slot True `thenC` - bindNewToReg bndr reg (mkLFArgument bndr) `thenC` - cgPrimAlts GCMayHappen (CReg reg) alts alt_type - ) `thenFC` \ abs_c -> - mkRetDirectTarget bndr abs_c srt `thenFC` \ lbl -> - returnFC (CaseAlts lbl Nothing False) - where - reg = dataReturnConvPrim kind - kind = tyConPrimRep tycon + do { -- forkAbsC for the RHS, so that the envt is + -- not changed for the emitDirectReturn call + abs_c <- forkProc $ do + { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args + -- Restore the CC *after* binding the tuple components, + -- so that we get the stack offset of the saved CC right. + ; restoreCurrentCostCentre cc_slot True + -- Generate a heap check if necessary + -- and finally the code for the alternative + ; unbxTupleHeapCheck live_regs ptrs nptrs noStmts + (cgExpr rhs) } + ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt + ; returnFC (CaseAlts lbl Nothing bndr False) } cgEvalAlts cc_slot bndr srt alt_type alts = -- Algebraic and polymorphic case - -- Bind the default binder - bindNewToReg bndr node (mkLFArgument bndr) `thenC` + do { -- Bind the default binder + bindNewToReg bndr nodeReg (mkLFArgument bndr) -- Generate sequel info for use downstream -- At the moment, we only do it if the type is vector-returnable. @@ -418,25 +434,16 @@ cgEvalAlts cc_slot bndr srt alt_type alts -- -- which is worse than having the alt code in the switch statement - let ret_conv = case alt_type of - AlgAlt tc -> ctrlReturnConvAlg tc - PolyAlt -> UnvectoredReturn 0 - - use_labelled_alts = case ret_conv of - VectoredReturn _ -> True - _ -> False - - semi_tagged_stuff = cgSemiTaggedAlts use_labelled_alts bndr alts - - in - cgAlgAlts GCMayHappen (getUnique bndr) - cc_slot use_labelled_alts - alt_type alts `thenFC` \ tagged_alt_absCs -> + ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts - mkRetVecTarget bndr tagged_alt_absCs - srt ret_conv `thenFC` \ return_vec -> + ; (lbl, branches) <- emitAlgReturnTarget (idName bndr) + alts mb_deflt srt ret_conv - returnFC (CaseAlts return_vec semi_tagged_stuff False) + ; returnFC (CaseAlts lbl branches bndr False) } + where + ret_conv = case alt_type of + AlgAlt tc -> ctrlReturnConvAlg tc + PolyAlt -> UnvectoredReturn 0 \end{code} @@ -462,94 +469,42 @@ are inlined alternatives. \begin{code} cgAlgAlts :: GCFlag - -> Unique -> Maybe VirtualSpOffset - -> Bool -- True <=> branches must be labelled - -- (used for semi-tagging) - -> AltType -- ** AlgAlt or PolyAlt only ** - -> [StgAlt] -- The alternatives - -> FCode [(AltCon, AbstractC)] -- The branches + -> AltType -- ** AlgAlt or PolyAlt only ** + -> [StgAlt] -- The alternatives + -> FCode ( [(ConTagZ, CgStmts)], -- The branches + Maybe CgStmts ) -- The default case + +cgAlgAlts gc_flag cc_slot alt_type alts + = do alts <- forkAlts [ cgAlgAlt gc_flag cc_slot alt_type alt | alt <- alts] + let + mb_deflt = case alts of -- DEFAULT is always first, if present + ((DEFAULT,blks) : _) -> Just blks + other -> Nothing + + branches = [(dataConTagZ con, blks) + | (DataAlt con, blks) <- alts] + -- in + return (branches, mb_deflt) -cgAlgAlts gc_flag uniq restore_cc must_label_branches alt_type alts - = forkAlts [ cgAlgAlt gc_flag uniq restore_cc must_label_branches alt_type alt - | alt <- alts] cgAlgAlt :: GCFlag - -> Unique -> Maybe VirtualSpOffset -> Bool -- turgid state - -> AltType -- ** AlgAlt or PolyAlt only ** + -> Maybe VirtualSpOffset -- Turgid state + -> AltType -- ** AlgAlt or PolyAlt only ** -> StgAlt - -> FCode (AltCon, AbstractC) - -cgAlgAlt gc_flag uniq cc_slot must_label_branch - alt_type (con, args, use_mask, rhs) - = getAbsC (bind_con_args con args `thenFC` \ _ -> - restoreCurrentCostCentre cc_slot True `thenC` - maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) - ) `thenFC` \ abs_c -> - let - final_abs_c | must_label_branch = CCodeBlock lbl abs_c - | otherwise = abs_c - in - returnFC (con, final_abs_c) + -> FCode (AltCon, CgStmts) + +cgAlgAlt gc_flag cc_slot alt_type (con, args, use_mask, rhs) + = do { abs_c <- getCgStmts $ do + { bind_con_args con args + ; restoreCurrentCostCentre cc_slot True + ; maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) } + ; return (con, abs_c) } where - lbl = case con of - DataAlt dc -> mkAltLabel uniq (dataConTag dc) - DEFAULT -> mkDefaultLabel uniq - other -> pprPanic "cgAlgAlt" (ppr con) - bind_con_args DEFAULT args = nopC bind_con_args (DataAlt dc) args = bindConArgs dc args \end{code} -%************************************************************************ -%* * -\subsection[CgCase-semi-tagged-alts]{The code to deal with sem-tagging} -%* * -%************************************************************************ - -Turgid-but-non-monadic code to conjure up the required info from -algebraic case alternatives for semi-tagging. - -\begin{code} -cgSemiTaggedAlts :: Bool -- True <=> use semitagging: each alt will be labelled - -> Id - -> [StgAlt] - -> SemiTaggingStuff - -cgSemiTaggedAlts False binder alts - = Nothing -cgSemiTaggedAlts True binder alts - = Just ([st_alt con args | (DataAlt con, args, _, _) <- alts], - case head alts of - (DEFAULT, _, _, _) -> Just st_deflt - other -> Nothing) - where - uniq = getUnique binder - - st_deflt = (binder, - (CCallProfCtrMacro FSLIT("RET_SEMI_BY_DEFAULT") [], -- ToDo: monadise? - mkDefaultLabel uniq)) - - st_alt con args -- Ha! Nothing to do; Node already points to the thing - = (con_tag, - (CCallProfCtrMacro FSLIT("RET_SEMI_IN_HEAP") -- ToDo: monadise? - [mkIntCLit (length args)], -- how big the thing in the heap is - join_label) - ) - where - con_tag = dataConTag con - join_label = mkAltLabel uniq con_tag - - -tagToClosure :: TyCon -> CAddrMode -> CAddrMode --- Primops returning an enumeration type (notably Bool) --- actually return an index into --- the table of closures for the enumeration type -tagToClosure tycon tag_amode - = CVal (CIndex closure_tbl tag_amode PtrRep) PtrRep - where - closure_tbl = CLbl (mkClosureTblLabel tycon) PtrRep -\end{code} %************************************************************************ %* * @@ -566,29 +521,31 @@ As usual, no binders in the alternatives are yet bound. \begin{code} cgPrimAlts :: GCFlag - -> CAddrMode -- Scrutinee + -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck + -> CmmReg -- Scrutinee -> [StgAlt] -- Alternatives - -> AltType -> Code +-- NB: cgPrimAlts emits code that does the case analysis. +-- It's often used in inline situations, rather than to genearte +-- a labelled return point. That's why its interface is a little +-- different to cgAlgAlts +-- -- INVARIANT: the default binder is already bound -cgPrimAlts gc_flag scrutinee alts alt_type - = forkAlts (map (cgPrimAlt gc_flag alt_type) alts) `thenFC` \ tagged_absCs -> - let - ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default - alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others] - in - absC (CSwitch scrutinee alt_absCs deflt_absC) - -- CSwitch does sensible things with one or zero alternatives +cgPrimAlts gc_flag alt_type scrutinee alts + = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts) + ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default + alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others] + ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC } cgPrimAlt :: GCFlag -> AltType - -> StgAlt -- The alternative - -> FCode (AltCon, AbstractC) -- Its compiled form + -> StgAlt -- The alternative + -> FCode (AltCon, CgStmts) -- Its compiled form cgPrimAlt gc_flag alt_type (con, [], [], rhs) = ASSERT( case con of { DEFAULT -> True; LitAlt _ -> True; other -> False } ) - getAbsC (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs)) `thenFC` \ abs_c -> - returnFC (con, abs_c) + do { abs_c <- getCgStmts (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs)) + ; returnFC (con, abs_c) } \end{code} @@ -605,52 +562,42 @@ maybeAltHeapCheck -> Code -- Continuation -> Code maybeAltHeapCheck NoGC _ code = code -maybeAltHeapCheck GCMayHappen alt_type code - = -- HWL: maybe need yield here - -- yield [node] True -- XXX live regs wrong - altHeapCheck alt_type code +maybeAltHeapCheck GCMayHappen alt_type code = altHeapCheck alt_type code saveVolatileVarsAndRegs :: StgLiveVars -- Vars which should be made safe - -> FCode (AbstractC, -- Assignments to do the saves + -> FCode (CmmStmts, -- Assignments to do the saves EndOfBlockInfo, -- sequel for the alts Maybe VirtualSpOffset) -- Slot for current cost centre saveVolatileVarsAndRegs vars - = saveVolatileVars vars `thenFC` \ var_saves -> - saveCurrentCostCentre `thenFC` \ (maybe_cc_slot, cc_save) -> - getEndOfBlockInfo `thenFC` \ eob_info -> - returnFC (mkAbstractCs [var_saves, cc_save], - eob_info, - maybe_cc_slot) + = do { var_saves <- saveVolatileVars vars + ; (maybe_cc_slot, cc_save) <- saveCurrentCostCentre + ; eob_info <- getEndOfBlockInfo + ; returnFC (var_saves `plusStmts` cc_save, + eob_info, + maybe_cc_slot) } saveVolatileVars :: StgLiveVars -- Vars which should be made safe - -> FCode AbstractC -- Assignments to to the saves + -> FCode CmmStmts -- Assignments to to the saves saveVolatileVars vars - = save_em (varSetElems vars) + = do { stmts_s <- mapFCs save_it (varSetElems vars) + ; return (foldr plusStmts noStmts stmts_s) } where - save_em [] = returnFC AbsCNop - - save_em (var:vars) - = getCAddrModeIfVolatile var `thenFC` \ v -> - case v of - Nothing -> save_em vars -- Non-volatile, so carry on - - - Just vol_amode -> -- Aha! It's volatile - save_var var vol_amode `thenFC` \ abs_c -> - save_em vars `thenFC` \ abs_cs -> - returnFC (abs_c `mkAbsCStmts` abs_cs) + save_it var + = do { v <- getCAddrModeIfVolatile var + ; case v of + Nothing -> return noStmts -- Non-volatile + Just vol_amode -> save_var var vol_amode -- Aha! It's volatile + } save_var var vol_amode - = allocPrimStack (getPrimRepSize kind) `thenFC` \ slot -> - rebindToStack var slot `thenC` - getSpRelOffset slot `thenFC` \ sp_rel -> - returnFC (CAssign (CVal sp_rel kind) vol_amode) - where - kind = getAmodeRep vol_amode + = do { slot <- allocPrimStack (idCgRep var) + ; rebindToStack var slot + ; sp_rel <- getSpRelOffset slot + ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) } \end{code} --------------------------------------------------------------------------- @@ -663,123 +610,24 @@ virtual offset of the location, to pass on to the alternatives, and \begin{code} saveCurrentCostCentre :: FCode (Maybe VirtualSpOffset, -- Where we decide to store it - AbstractC) -- Assignment to save it + CmmStmts) -- Assignment to save it saveCurrentCostCentre - = if not opt_SccProfilingOn then - returnFC (Nothing, AbsCNop) - else - allocPrimStack (getPrimRepSize CostCentreRep) `thenFC` \ slot -> - dataStackSlots [slot] `thenC` - getSpRelOffset slot `thenFC` \ sp_rel -> - returnFC (Just slot, - CAssign (CVal sp_rel CostCentreRep) (CReg CurCostCentre)) + | not opt_SccProfilingOn + = returnFC (Nothing, noStmts) + | otherwise + = do { slot <- allocPrimStack PtrArg + ; sp_rel <- getSpRelOffset slot + ; returnFC (Just slot, + oneStmt (CmmStore sp_rel curCCS)) } -- Sometimes we don't free the slot containing the cost centre after restoring it -- (see CgLetNoEscape.cgLetNoEscapeBody). restoreCurrentCostCentre :: Maybe VirtualSpOffset -> Bool -> Code restoreCurrentCostCentre Nothing _freeit = nopC restoreCurrentCostCentre (Just slot) freeit - = getSpRelOffset slot `thenFC` \ sp_rel -> - (if freeit then freeStackSlots [slot] else nopC) `thenC` - absC (CCallProfCCMacro FSLIT("RESTORE_CCCS") [CVal sp_rel CostCentreRep]) - -- we use the RESTORE_CCCS macro, rather than just - -- assigning into CurCostCentre, in case RESTORE_CCCS - -- has some sanity-checking in it. -\end{code} - -%************************************************************************ -%* * -\subsection[CgCase-return-vec]{Building a return vector} -%* * -%************************************************************************ - -Build a return vector, and return a suitable label addressing -mode for it. - -\begin{code} -mkRetDirectTarget :: Id -- Used for labelling only - -> AbstractC -- Return code - -> SRT -- Live CAFs in return code - -> FCode CAddrMode -- Emit the labelled return block, - -- and return its label -mkRetDirectTarget bndr abs_c srt - = buildContLivenessMask bndr `thenFC` \ liveness -> - getSRTInfo name srt `thenFC` \ srt_info -> - absC (CRetDirect uniq abs_c srt_info liveness) `thenC` - return lbl - where - name = idName bndr - uniq = getUnique name - lbl = CLbl (mkReturnInfoLabel uniq) RetRep + = do { sp_rel <- getSpRelOffset slot + ; whenC freeit (freeStackSlots [slot]) + ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) } \end{code} -\begin{code} -mkRetVecTarget :: Id -- Just for its unique - -> [(AltCon, AbstractC)] -- Branch codes - -> SRT -- Continuation's SRT - -> CtrlReturnConvention - -> FCode CAddrMode - -mkRetVecTarget bndr tagged_alt_absCs srt (UnvectoredReturn 0) - = ASSERT( null other_alts ) - mkRetDirectTarget bndr deflt_absC srt - where - ((DEFAULT, deflt_absC) : other_alts) = tagged_alt_absCs - -mkRetVecTarget bndr tagged_alt_absCs srt (UnvectoredReturn n) - = mkRetDirectTarget bndr switch_absC srt - where - -- Find the tag explicitly rather than using tag_reg for now. - -- on architectures with lots of regs the tag will be loaded - -- into tag_reg by the code doing the returning. - tag = CMacroExpr WordRep GET_TAG [CVal (nodeRel 0) DataPtrRep] - switch_absC = mkAlgAltsCSwitch tag tagged_alt_absCs - - -mkRetVecTarget bndr tagged_alt_absCs srt (VectoredReturn table_size) - = buildContLivenessMask bndr `thenFC` \ liveness -> - getSRTInfo name srt `thenFC` \ srt_info -> - let - ret_vector = CRetVector vtbl_lbl vector_table srt_info liveness - in - absC (mkAbstractCs alts_absCs `mkAbsCStmts` ret_vector) `thenC` - -- Alts come first, because we don't want to declare all the symbols - - return (CLbl vtbl_lbl DataPtrRep) - where - tags = [fIRST_TAG .. (table_size+fIRST_TAG-1)] - vector_table = map mk_vector_entry tags - alts_absCs = map snd (sortBy cmp tagged_alt_absCs) - -- The sort is unnecessary; just there for now - -- to make the new order the same as the old - (DEFAULT,_) `cmp` (DEFAULT,_) = EQ - (DEFAULT,_) `cmp` _ = GT - (DataAlt d1,_) `cmp` (DataAlt d2,_) = dataConTag d1 `compare` dataConTag d2 - (DataAlt d1,_) `cmp` (DEFAULT, _) = LT - -- Others impossible - - name = idName bndr - uniq = getUnique name - vtbl_lbl = mkVecTblLabel uniq - - deflt_lbl :: CAddrMode - deflt_lbl = case tagged_alt_absCs of - (DEFAULT, abs_c) : _ -> get_block_label abs_c - other -> mkIntCLit 0 - -- 'other' case: the simplifier might have eliminated a case - -- so we may have e.g. case xs of - -- [] -> e - -- In that situation the default should never be taken, - -- so we just use '0' (=> seg fault if used) - - mk_vector_entry :: ConTag -> CAddrMode - mk_vector_entry tag - = case [ absC | (DataAlt d, absC) <- tagged_alt_absCs, dataConTag d == tag ] of - -- The comprehension neatly, and correctly, ignores the DEFAULT - [] -> deflt_lbl - [abs_c] -> get_block_label abs_c - _ -> panic "mkReturnVector: too many" - - get_block_label (CCodeBlock lbl _) = CLbl lbl CodePtrRep -\end{code} diff --git a/ghc/compiler/codeGen/CgClosure.lhs b/ghc/compiler/codeGen/CgClosure.lhs index 6e77dc7853..dc5e9eae35 100644 --- a/ghc/compiler/codeGen/CgClosure.lhs +++ b/ghc/compiler/codeGen/CgClosure.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgClosure.lhs,v 1.62 2003/11/17 14:23:31 simonmar Exp $ +% $Id: CgClosure.lhs,v 1.63 2004/08/13 13:05:54 simonmar Exp $ % \section[CgClosure]{Code generation for closures} @@ -13,6 +13,7 @@ with {\em closures} on the RHSs of let(rec)s. See also module CgClosure ( cgTopRhsClosure, cgStdRhsClosure, cgRhsClosure, + emitBlackHoleCode, ) where #include "HsVersions.h" @@ -21,37 +22,38 @@ import {-# SOURCE #-} CgExpr ( cgExpr ) import CgMonad import CgBindery -import CgUpdate ( pushUpdateFrame ) import CgHeapery -import CgStackery -import CgUsages +import CgStackery ( mkVirtStkOffsets, pushUpdateFrame, getVirtSp, + setRealAndVirtualSp ) +import CgProf ( chooseDynCostCentres, ldvEnter, enterCostCentre, + costCentreFrom ) +import CgTicky +import CgParallel ( granYield, granFetchAndReschedule ) +import CgInfoTbls ( emitClosureCodeAndInfoTable, getSRTInfo ) +import CgCallConv ( assignCallRegs, mkArgDescr ) +import CgUtils ( emitDataLits, addIdReps, cmmRegOffW, + emitRtsCallWithVols ) import ClosureInfo -- lots and lots of stuff - -import AbsCUtils ( getAmodeRep, mkAbstractCs ) -import AbsCSyn -import CLabel - +import SMRep ( CgRep, cgRepSizeW, argMachRep, fixedHdrSize, WordOff, + idCgRep ) +import MachOp ( MachHint(..) ) +import Cmm +import CmmUtils ( CmmStmts, mkStmts, oneStmt, plusStmts, noStmts, + mkLblExpr ) +import CLabel ( mkRtsDataLabel, mkClosureLabel, mkRednCountsLabel, + mkSlowEntryLabel, mkIndStaticInfoLabel ) import StgSyn -import CmdLineOpts ( opt_GranMacros, opt_SccProfilingOn, opt_DoTickyProfiling ) +import CmdLineOpts ( opt_DoTickyProfiling ) import CostCentre -import Id ( Id, idName, idType, idPrimRep ) -import Name ( Name, isInternalName ) +import Id ( Id, idName, idType ) +import Name ( Name ) import Module ( Module, pprModule ) import ListSetOps ( minusList ) -import PrimRep ( PrimRep(..), getPrimRepSize ) -import Util ( isIn, splitAtList ) -import CmdLineOpts ( opt_SccProfilingOn ) +import Util ( isIn, mapAccumL, zipWithEqual ) +import BasicTypes ( TopLevelFlag(..) ) +import Constants ( oFFSET_StgInd_indirectee, wORD_SIZE ) import Outputable import FastString - -import Name ( nameOccName ) -import OccName ( occNameFS ) - --- Turgid imports for showTypeCategory -import PrelNames -import TcType ( Type, isDictTy, tcSplitTyConApp_maybe, tcSplitFunTy_maybe ) -import TyCon ( isPrimTyCon, isTupleTyCon, isEnumerationTyCon, maybeTyConSingleCon ) -import Maybe \end{code} %******************************************************** @@ -68,45 +70,29 @@ cgTopRhsClosure :: Id -> CostCentreStack -- Optional cost centre annotation -> StgBinderInfo -> SRT + -> UpdateFlag -> [Id] -- Args -> StgExpr - -> LambdaFormInfo -> FCode (Id, CgIdInfo) -cgTopRhsClosure id ccs binder_info srt args body lf_info - = - let - name = idName id - in - -- LAY OUT THE OBJECT - getSRTInfo name srt `thenFC` \ srt_info -> - moduleName `thenFC` \ mod_name -> - let - name = idName id - descr = closureDescription mod_name name - closure_info = layOutStaticNoFVClosure id lf_info srt_info descr +cgTopRhsClosure id ccs binder_info srt upd_flag args body = do + { -- LAY OUT THE OBJECT + let name = idName id + ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args + ; srt_info <- getSRTInfo name srt + ; mod_name <- moduleName + ; let descr = closureDescription mod_name name + closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr closure_label = mkClosureLabel name - cg_id_info = stableAmodeIdInfo id (CLbl closure_label PtrRep) lf_info - in - - -- BUILD THE OBJECT (IF NECESSARY) - ( - ({- if staticClosureRequired name binder_info lf_info - then -} - absC (mkStaticClosure closure_label closure_info ccs [] True) - {- else - nopC -} - ) - `thenC` - - -- GENERATE THE INFO TABLE (IF NECESSARY) - forkClosureBody (closureCodeBody binder_info closure_info - ccs args body) + cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info + closure_rep = mkStaticClosureFields closure_info ccs True [] - ) `thenC` - - returnFC (id, cg_id_info) + -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) + ; emitDataLits closure_label closure_rep + ; forkClosureBody (closureCodeBody binder_info closure_info + ccs args body) + ; returnFC (id, cg_id_info) } \end{code} %******************************************************** @@ -129,29 +115,26 @@ cgStdRhsClosure -> [StgArg] -- payload -> FCode (Id, CgIdInfo) -cgStdRhsClosure binder cc binder_info fvs args body lf_info payload - -- AHA! A STANDARD-FORM THUNK - = ( - -- LAY OUT THE OBJECT - getArgAmodes payload `thenFC` \ amodes -> - moduleName `thenFC` \ mod_name -> - let - descr = closureDescription mod_name (idName binder) - - (closure_info, amodes_w_offsets) - = layOutDynClosure binder getAmodeRep amodes lf_info NoC_SRT descr - -- No SRT for a standard-form closure - - (use_cc, blame_cc) = chooseDynCostCentres cc args fvs body - in +cgStdRhsClosure bndr cc bndr_info fvs args body lf_info payload + = do -- AHA! A STANDARD-FORM THUNK + { -- LAY OUT THE OBJECT + amodes <- getArgAmodes payload + ; mod_name <- moduleName + ; let (tot_wds, ptr_wds, amodes_w_offsets) = mkVirtHeapOffsets amodes + + descr = closureDescription mod_name (idName bndr) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + NoC_SRT -- No SRT for a std-form closure + descr + + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body -- BUILD THE OBJECT - allocDynClosure closure_info use_cc blame_cc amodes_w_offsets - ) - `thenFC` \ heap_offset -> + ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets -- RETURN - returnFC (binder, heapIdInfo binder heap_offset lf_info) + ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } \end{code} Here's the general case. @@ -162,15 +145,13 @@ cgRhsClosure :: Id -> StgBinderInfo -> SRT -> [Id] -- Free vars + -> UpdateFlag -> [Id] -- Args -> StgExpr - -> LambdaFormInfo -> FCode (Id, CgIdInfo) -cgRhsClosure binder cc binder_info srt fvs args body lf_info - = ( - -- LAY OUT THE OBJECT - -- +cgRhsClosure bndr cc bndr_info srt fvs upd_flag args body = do + { -- LAY OUT THE OBJECT -- If the binder is itself a free variable, then don't store -- it in the closure. Instead, just bind it to Node on entry. -- NB we can be sure that Node will point to it, because we @@ -179,62 +160,63 @@ cgRhsClosure binder cc binder_info srt fvs args body lf_info -- stored in the closure itself, so it will make sure that -- Node points to it... let - is_elem = isIn "cgRhsClosure" - - binder_is_a_fv = binder `is_elem` fvs - reduced_fvs = if binder_is_a_fv - then fvs `minusList` [binder] - else fvs - - name = idName binder - in - - mapFCs getCAddrModeAndInfo reduced_fvs `thenFC` \ fvs_w_amodes_and_info -> - getSRTInfo name srt `thenFC` \ srt_info -> - moduleName `thenFC` \ mod_name -> - let - descr = closureDescription mod_name (idName binder) - - closure_info :: ClosureInfo - bind_details :: [((Id, CAddrMode, LambdaFormInfo), VirtualHeapOffset)] - - (closure_info, bind_details) - = layOutDynClosure binder get_kind - fvs_w_amodes_and_info lf_info srt_info descr - - bind_fv ((id, _, lf_info), offset) = bindNewToNode id offset lf_info - - amodes_w_offsets = [(amode,offset) | ((_,amode,_), offset) <- bind_details] - - get_kind (id, _, _) = idPrimRep id - in + name = idName bndr + is_elem = isIn "cgRhsClosure" + bndr_is_a_fv = bndr `is_elem` fvs + reduced_fvs | bndr_is_a_fv = fvs `minusList` [bndr] + | otherwise = fvs + + ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args + ; fv_infos <- mapFCs getCgIdInfo reduced_fvs + ; srt_info <- getSRTInfo name srt + ; mod_name <- moduleName + ; let bind_details :: [(CgIdInfo, VirtualHpOffset)] + (tot_wds, ptr_wds, bind_details) = mkVirtHeapOffsets (map add_rep fv_infos) + add_rep info = (cgIdInfoArgRep info, info) + + descr = closureDescription mod_name name + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + srt_info descr -- BUILD ITS INFO TABLE AND CODE - forkClosureBody ( - -- Bind the fvs - mapCs bind_fv bind_details `thenC` + ; forkClosureBody (do + { -- Bind the fvs + let bind_fv (info, offset) + = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info) + ; mapCs bind_fv bind_details -- Bind the binder itself, if it is a free var - (if binder_is_a_fv then - bindNewToReg binder node lf_info - else - nopC) `thenC` - + ; whenC bndr_is_a_fv (bindNewToReg bndr nodeReg lf_info) + -- Compile the body - closureCodeBody binder_info closure_info cc args body - ) `thenC` + ; closureCodeBody bndr_info closure_info cc args body }) -- BUILD THE OBJECT - let - (use_cc, blame_cc) = chooseDynCostCentres cc args fvs body - in - allocDynClosure closure_info use_cc blame_cc amodes_w_offsets - ) `thenFC` \ heap_offset -> + ; let + to_amode (info, offset) = do { amode <- idInfoToAmode info + ; return (amode, offset) } + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + ; amodes_w_offsets <- mapFCs to_amode bind_details + ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets -- RETURN - returnFC (binder, heapIdInfo binder heap_offset lf_info) + ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } + + +mkClosureLFInfo :: Id -- The binder + -> TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> UpdateFlag -- Update flag + -> [Id] -- Args + -> FCode LambdaFormInfo +mkClosureLFInfo bndr top fvs upd_flag args + | null args = return (mkLFThunk (idType bndr) top fvs upd_flag) + | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args + ; return (mkLFReEntrant top fvs args arg_descr) } \end{code} + %************************************************************************ %* * \subsection[code-for-closures]{The code for closures} @@ -253,32 +235,23 @@ closureCodeBody :: StgBinderInfo There are two main cases for the code for closures. If there are {\em no arguments}, then the closure is a thunk, and not in normal form. So it should set up an update frame (if it is shared). +NB: Thunks cannot have a primitive type! \begin{code} -closureCodeBody binder_info closure_info cc [] body - = -- thunks cannot have a primitive type! - getAbsC body_code `thenFC` \ body_absC -> - - absC (CClosureInfoAndCode closure_info body_absC) - where - is_box = case body of { StgApp fun [] -> True; _ -> False } - - ticky_ent_lit = if (isStaticClosure closure_info) - then FSLIT("TICK_ENT_STATIC_THK") - else FSLIT("TICK_ENT_DYN_THK") - - body_code = profCtrC ticky_ent_lit [] `thenC` - -- node always points when profiling, so this is ok: - ldvEnter `thenC` - thunkWrapper closure_info ( - -- We only enter cc after setting up update so - -- that cc of enclosing scope will be recorded - -- in update frame CAF/DICT functions will be - -- subsumed by this enclosing cc - enterCostCentreCode closure_info cc IsThunk is_box `thenC` - cgExpr body - ) - +closureCodeBody binder_info cl_info cc [] body = do + { body_absC <- getCgStmts $ do + { tickyEnterThunk cl_info + ; ldvEnter (CmmReg nodeReg) -- NB: Node always points when profiling + ; thunkWrapper cl_info $ do + -- We only enter cc after setting up update so + -- that cc of enclosing scope will be recorded + -- in update frame CAF/DICT functions will be + -- subsumed by this enclosing cc + { enterCostCentre cl_info cc body + ; cgExpr body } + } + + ; emitClosureCodeAndInfoTable cl_info [] body_absC } \end{code} If there is /at least one argument/, then this closure is in @@ -289,105 +262,60 @@ argSatisfactionCheck (by calling fetchAndReschedule). There info if Node points to closure is available. -- HWL \begin{code} -closureCodeBody binder_info closure_info cc all_args body - = let arg_reps = map idPrimRep all_args in - - getEntryConvention name lf_info arg_reps `thenFC` \ entry_conv -> - - let - -- Arg mapping for the entry point; as many args as poss in - -- registers; the rest on the stack - -- arg_regs are the registers used for arg passing - -- stk_args are the args which are passed on the stack - -- - -- Args passed on the stack are not tagged. - -- - arg_regs = case entry_conv of - DirectEntry lbl arity regs -> regs - _ -> panic "closureCodeBody" - in - - -- If this function doesn't have a specialised ArgDescr, we need - -- to generate the function's arg bitmap, slow-entry code, and - -- register-save code for the heap-check failure - -- - (case closureFunInfo closure_info of - Just (_, ArgGen slow_lbl liveness) -> - absC (maybeLargeBitmap liveness) `thenC` - absC (mkSlowEntryCode name slow_lbl arg_regs arg_reps) `thenC` - returnFC (mkRegSaveCode arg_regs arg_reps) - - other -> returnFC AbsCNop - ) - `thenFC` \ reg_save_code -> - - -- get the current virtual Sp (it might not be zero, eg. if we're - -- compiling a let-no-escape). - getVirtSp `thenFC` \vSp -> - - let - (reg_args, stk_args) = splitAtList arg_regs all_args - - (sp_stk_args, stk_offsets) - = mkVirtStkOffsets vSp idPrimRep stk_args - - entry_code = do - mod_name <- moduleName - profCtrC FSLIT("TICK_CTR") [ - CLbl ticky_ctr_label DataPtrRep, - mkCString (mkFastString (ppr_for_ticky_name mod_name name)), - mkIntCLit stg_arity, -- total # of args - mkIntCLit sp_stk_args, -- # passed on stk - mkCString (mkFastString (map (showTypeCategory . idType) all_args)) - ] - let prof = - profCtrC ticky_ent_lit [ - CLbl ticky_ctr_label DataPtrRep - ] - - -- Bind args to regs/stack as appropriate, and - -- record expected position of sps. - bindArgsToRegs reg_args arg_regs - mapCs bindNewToStack stk_offsets - setRealAndVirtualSp sp_stk_args - - -- Enter the closures cc, if required - enterCostCentreCode closure_info cc IsFunction False - - -- Do the business - funWrapper closure_info arg_regs reg_save_code - (prof >> cgExpr body) - in - - setTickyCtrLabel ticky_ctr_label ( - - forkAbsC entry_code `thenFC` \ entry_abs_c -> - moduleName `thenFC` \ mod_name -> - - -- Now construct the info table - absC (CClosureInfoAndCode closure_info entry_abs_c) - ) - where - ticky_ctr_label = mkRednCountsLabel name - - ticky_ent_lit = - if (isStaticClosure closure_info) - then FSLIT("TICK_ENT_STATIC_FUN_DIRECT") - else FSLIT("TICK_ENT_DYN_FUN_DIRECT") - - stg_arity = length all_args - lf_info = closureLFInfo closure_info - - -- Manufacture labels - name = closureName closure_info - - --- When printing the name of a thing in a ticky file, we want to --- give the module name even for *local* things. We print --- just "x (M)" rather that "M.x" to distinguish them from the global kind. -ppr_for_ticky_name mod_name name - | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name))) - | otherwise = showSDocDebug (ppr name) +closureCodeBody binder_info cl_info cc args body = do + { -- Get the current virtual Sp (it might not be zero, + -- eg. if we're compiling a let-no-escape). + vSp <- getVirtSp + ; let (reg_args, other_args) = assignCallRegs (addIdReps args) + (sp_top, stk_args) = mkVirtStkOffsets vSp other_args + + -- Allocate the global ticky counter + ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) + ; emitTickyCounter cl_info args sp_top + + -- ...and establish the ticky-counter + -- label for this block + ; setTickyCtrLabel ticky_ctr_lbl $ do + + -- Emit the slow-entry code + { reg_save_code <- mkSlowEntryCode cl_info reg_args + + -- Emit the main entry code + ; blks <- forkProc $ + mkFunEntryCode cl_info cc reg_args stk_args + sp_top reg_save_code body + ; emitClosureCodeAndInfoTable cl_info [] blks + }} + + + +mkFunEntryCode :: ClosureInfo + -> CostCentreStack + -> [(Id,GlobalReg)] -- Args in regs + -> [(Id,VirtualSpOffset)] -- Args on stack + -> VirtualSpOffset -- Last allocated word on stack + -> CmmStmts -- Register-save code in case of GC + -> StgExpr + -> Code +-- The main entry code for the closure +mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do + { -- Bind args to regs/stack as appropriate, + -- and record expected position of sps + ; bindArgsToRegs reg_args + ; bindArgsToStack stk_args + ; setRealAndVirtualSp sp_top + + -- Enter the cost-centre, if required + -- ToDo: It's not clear why this is outside the funWrapper, + -- but the tickyEnterFun is inside. Perhaps we can put + -- them together? + ; enterCostCentre cl_info cc body + + -- Do the business + ; funWrapper cl_info reg_args reg_save_code $ do + { tickyEnterFun cl_info + ; cgExpr body } + } \end{code} The "slow entry" code for a function. This entry point takes its @@ -402,84 +330,45 @@ The slow entry point is used in two places: (b) returning from a heap-check failure \begin{code} -mkSlowEntryCode :: Name -> CLabel -> [MagicId] -> [PrimRep] -> AbstractC -mkSlowEntryCode name lbl regs reps - = CCodeBlock lbl ( - mkAbstractCs [assts, stk_adj, jump] - ) +mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts +-- If this function doesn't have a specialised ArgDescr, we need +-- to generate the function's arg bitmap, slow-entry code, and +-- register-save code for the heap-check failure +-- Here, we emit the slow-entry code, and +-- return the register-save assignments +mkSlowEntryCode cl_info reg_args + | Just (_, ArgGen _) <- closureFunInfo cl_info + = do { emitSimpleProc slow_lbl (emitStmts load_stmts) + ; return save_stmts } + | otherwise = return noStmts where - stk_offsets = scanl (\off rep -> off - getPrimRepSize rep) 0 reps - - assts = mkAbstractCs (zipWith3 mk_asst reps regs stk_offsets) - mk_asst rep reg offset = CAssign (CReg reg) (CVal (spRel 0 offset) rep) - - stk_adj = CAssign (CReg Sp) (CAddr (spRel 0 stk_final_offset)) - stk_final_offset = head (drop (length regs) stk_offsets) - - jump = CJump (CLbl (mkEntryLabel name) CodePtrRep) - -mkRegSaveCode :: [MagicId] -> [PrimRep] -> AbstractC -mkRegSaveCode regs reps - = mkAbstractCs [stk_adj, assts] - where - stk_adj = CAssign (CReg Sp) (CAddr (spRel 0 (negate stk_final_offset))) - - stk_final_offset = head (drop (length regs) stk_offsets) - stk_offsets = scanl (\off rep -> off - getPrimRepSize rep) 0 reps - - assts = mkAbstractCs (zipWith3 mk_asst reps regs stk_offsets) - mk_asst rep reg offset = CAssign (CVal (spRel 0 offset) rep) (CReg reg) + name = closureName cl_info + slow_lbl = mkSlowEntryLabel name + + load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] + save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts + + reps_w_regs :: [(CgRep,GlobalReg)] + reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args] + (final_stk_offset, stk_offsets) + = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off)) + 0 reps_w_regs + + load_assts = zipWithEqual "mk_load" mk_load reps_w_regs stk_offsets + mk_load (rep,reg) offset = CmmAssign (CmmGlobal reg) + (CmmLoad (cmmRegOffW spReg offset) + (argMachRep rep)) + + save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets + mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegRep reg ) + CmmStore (cmmRegOffW spReg offset) + (CmmReg (CmmGlobal reg)) + + stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) + stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) + jump_to_entry = CmmJump (mkLblExpr (enterIdLabel name)) [] \end{code} -For lexically scoped profiling we have to load the cost centre from -the closure entered, if the costs are not supposed to be inherited. -This is done immediately on entering the fast entry point. - -Load current cost centre from closure, if not inherited. -Node is guaranteed to point to it, if profiling and not inherited. - -\begin{code} -data IsThunk = IsThunk | IsFunction -- Bool-like, local --- #ifdef DEBUG - deriving Eq --- #endif - -enterCostCentreCode - :: ClosureInfo -> CostCentreStack - -> IsThunk - -> Bool -- is_box: this closure is a special box introduced by SCCfinal - -> Code - -enterCostCentreCode closure_info ccs is_thunk is_box - = if not opt_SccProfilingOn then - nopC - else - ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) - - if isSubsumedCCS ccs then - ASSERT(isToplevClosure closure_info) - ASSERT(is_thunk == IsFunction) - costCentresC FSLIT("ENTER_CCS_FSUB") [] - - else if isDerivedFromCurrentCCS ccs then - if re_entrant && not is_box - then costCentresC FSLIT("ENTER_CCS_FCL") [CReg node] - else costCentresC FSLIT("ENTER_CCS_TCL") [CReg node] - - else if isCafCCS ccs then - ASSERT(isToplevClosure closure_info) - ASSERT(is_thunk == IsThunk) - -- might be a PAP, in which case we want to subsume costs - if re_entrant - then costCentresC FSLIT("ENTER_CCS_FSUB") [] - else costCentresC FSLIT("ENTER_CCS_CAF") c_ccs - - else panic "enterCostCentreCode" - - where - c_ccs = [mkCCostCentreStack ccs] - re_entrant = closureReEntrant closure_info -\end{code} %************************************************************************ %* * @@ -489,62 +378,42 @@ enterCostCentreCode closure_info ccs is_thunk is_box \begin{code} thunkWrapper:: ClosureInfo -> Code -> Code -thunkWrapper closure_info thunk_code - = -- Stack and heap overflow checks - nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points -> +thunkWrapper closure_info thunk_code = do + { let node_points = nodeMustPointToIt (closureLFInfo closure_info) -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node -- (we prefer fetchAndReschedule-style context switches to yield ones) - (if opt_GranMacros - then if node_points - then fetchAndReschedule [] node_points - else yield [] node_points - else absC AbsCNop) `thenC` - - let closure_lbl - | node_points = Nothing - | otherwise = Just (closureLabelFromCI closure_info) - in - - -- stack and/or heap checks - thunkChecks closure_lbl ( - - -- Overwrite with black hole if necessary - blackHoleIt closure_info node_points `thenC` - - setupUpdate closure_info ( -- setupUpdate *encloses* the rest - - -- Finally, do the business - thunk_code - )) + ; if node_points + then granFetchAndReschedule [] node_points + else granYield [] node_points + + -- Stack and/or heap checks + ; thunkEntryChecks closure_info $ do + { -- Overwrite with black hole if necessary + whenC (blackHoleOnEntry closure_info && node_points) + (blackHoleIt closure_info) + ; setupUpdate closure_info thunk_code } + -- setupUpdate *encloses* the thunk_code + } funWrapper :: ClosureInfo -- Closure whose code body this is - -> [MagicId] -- List of argument registers (if any) - -> AbstractC -- reg saves for the heap check failure + -> [(Id,GlobalReg)] -- List of argument registers (if any) + -> CmmStmts -- reg saves for the heap check failure -> Code -- Body of function being compiled -> Code -funWrapper closure_info arg_regs reg_save_code fun_body - = -- Stack overflow check - nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points -> - - -- enter for Ldv profiling - (if node_points then ldvEnter else nopC) `thenC` +funWrapper closure_info arg_regs reg_save_code fun_body = do + { let node_points = nodeMustPointToIt (closureLFInfo closure_info) - (if opt_GranMacros - then yield arg_regs node_points - else absC AbsCNop) `thenC` + -- Enter for Ldv profiling + ; whenC node_points (ldvEnter (CmmReg nodeReg)) - let closure_lbl - | node_points = Nothing - | otherwise = Just (closureLabelFromCI closure_info) - in + -- GranSim yeild poin + ; granYield arg_regs node_points - -- heap and/or stack checks - funEntryChecks closure_lbl reg_save_code ( - - -- Finally, do the business - fun_body - ) + -- Heap and/or stack checks wrap the function body + ; funEntryChecks closure_info reg_save_code + fun_body + } \end{code} @@ -556,78 +425,150 @@ funWrapper closure_info arg_regs reg_save_code fun_body \begin{code} -blackHoleIt :: ClosureInfo -> Bool -> Code -- Only called for closures with no args - -blackHoleIt closure_info node_points - = if blackHoleOnEntry closure_info && node_points - then - let - info_label = infoTableLabelFromCI closure_info - args = [ CLbl info_label DataPtrRep ] - in - absC (if closureSingleEntry(closure_info) then - CMacroStmt UPD_BH_SINGLE_ENTRY args - else - CMacroStmt UPD_BH_UPDATABLE args) - else +blackHoleIt :: ClosureInfo -> Code +-- Only called for closures with no args +-- Node points to the closure +blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info) + +emitBlackHoleCode :: Bool -> Code +emitBlackHoleCode is_single_entry + | eager_blackholing = do + tickyBlackHole (not is_single_entry) + stmtC (CmmStore (CmmReg nodeReg) (CmmLit (CmmLabel bh_lbl))) + | otherwise = nopC + where + bh_lbl | is_single_entry = mkRtsDataLabel SLIT("stg_SE_BLACKHOLE_info") + | otherwise = mkRtsDataLabel SLIT("stg_BLACKHOLE_info") + + -- If we wanted to do eager blackholing with slop filling, + -- we'd need to do it at the *end* of a basic block, otherwise + -- we overwrite the free variables in the thunk that we still + -- need. We have a patch for this from Andy Cheadle, but not + -- incorporated yet. --SDM [6/2004] + -- + -- Profiling needs slop filling (to support LDV profiling), so + -- currently eager blackholing doesn't work with profiling. + -- + -- TICKY_TICKY needs EAGER_BLACKHOLING to verify no double-entries of + -- single-entry thunks. + eager_blackholing + | opt_DoTickyProfiling = True + | otherwise = False + \end{code} \begin{code} setupUpdate :: ClosureInfo -> Code -> Code -- Only called for closures with no args -- Nota Bene: this function does not change Node (even if it's a CAF), -- so that the cost centre in the original closure can still be - -- extracted by a subsequent ENTER_CC_TCL - --- I've tidied up the code for this function, but it should still do the same as --- it did before (modulo ticky stuff). KSW 1999-04. + -- extracted by a subsequent enterCostCentre setupUpdate closure_info code - = if closureReEntrant closure_info - then - code - else - case (closureUpdReqd closure_info, isStaticClosure closure_info) of - (False,False) -> profCtrC FSLIT("TICK_UPDF_OMITTED") [] `thenC` - code - (False,True ) -> (if opt_DoTickyProfiling - then - -- blackhole the SE CAF - link_caf seCafBlackHoleClosureInfo `thenFC` \ _ -> nopC - else - nopC) `thenC` - profCtrC FSLIT("TICK_UPD_CAF_BH_SINGLE_ENTRY") [mkCString cl_name] `thenC` - profCtrC FSLIT("TICK_UPDF_OMITTED") [] `thenC` - code - (True ,False) -> pushUpdateFrame (CReg node) code - (True ,True ) -> -- blackhole the (updatable) CAF: - link_caf cafBlackHoleClosureInfo `thenFC` \ update_closure -> - profCtrC FSLIT("TICK_UPD_CAF_BH_UPDATABLE") [mkCString cl_name] `thenC` - pushUpdateFrame update_closure code - where - cl_name :: FastString - cl_name = (occNameFS . nameOccName . closureName) closure_info - - link_caf :: (ClosureInfo -> ClosureInfo) -- function yielding BH closure_info - -> FCode CAddrMode -- Returns amode for closure to be updated - link_caf bhCI - = -- To update a CAF we must allocate a black hole, link the CAF onto the - -- CAF list, then update the CAF to point to the fresh black hole. - -- This function returns the address of the black hole, so it can be - -- updated with the new value when available. - - -- Alloc black hole specifying CC_HDR(Node) as the cost centre - let - use_cc = CMacroExpr PtrRep CCS_HDR [nodeReg] - blame_cc = use_cc - in - allocDynClosure (bhCI closure_info) use_cc blame_cc [] `thenFC` \ heap_offset -> - getHpRelOffset heap_offset `thenFC` \ hp_rel -> - let amode = CAddr hp_rel - in - absC (CMacroStmt UPD_CAF [CReg node, amode]) `thenC` - returnFC amode + | closureReEntrant closure_info + = code + + | not (isStaticClosure closure_info) + = if closureUpdReqd closure_info + then do { tickyPushUpdateFrame; pushUpdateFrame (CmmReg nodeReg) code } + else do { tickyUpdateFrameOmitted; code } + + | otherwise -- A static closure + = do { tickyUpdateBhCaf closure_info + + ; if closureUpdReqd closure_info + then do -- Blackhole the (updatable) CAF: + { upd_closure <- link_caf closure_info True + ; pushUpdateFrame upd_closure code } + else do + { -- No update reqd, you'd think we don't need to + -- black-hole it. But when ticky-ticky is on, we + -- black-hole it regardless, to catch errors in which + -- an allegedly single-entry closure is entered twice + -- + -- We discard the pointer returned by link_caf, because + -- we don't push an update frame + whenC opt_DoTickyProfiling -- Blackhole even a SE CAF + (link_caf closure_info False >> nopC) + ; tickyUpdateFrameOmitted + ; code } + } + + +----------------------------------------------------------------------------- +-- Entering a CAF +-- +-- When a CAF is first entered, it creates a black hole in the heap, +-- and updates itself with an indirection to this new black hole. +-- +-- We update the CAF with an indirection to a newly-allocated black +-- hole in the heap. We also set the blocking queue on the newly +-- allocated black hole to be empty. +-- +-- Why do we make a black hole in the heap when we enter a CAF? +-- +-- - for a generational garbage collector, which needs a fast +-- test for whether an updatee is in an old generation or not +-- +-- - for the parallel system, which can implement updates more +-- easily if the updatee is always in the heap. (allegedly). +-- +-- When debugging, we maintain a separate CAF list so we can tell when +-- a CAF has been garbage collected. + +-- newCAF must be called before the itbl ptr is overwritten, since +-- newCAF records the old itbl ptr in order to do CAF reverting +-- (which Hugs needs to do in order that combined mode works right.) +-- + +-- ToDo [Feb 04] This entire link_caf nonsense could all be moved +-- into the "newCAF" RTS procedure, which we call anyway, including +-- the allocation of the black-hole indirection closure. +-- That way, code size would fall, the CAF-handling code would +-- be closer together, and the compiler wouldn't need to know +-- about off_indirectee etc. + +link_caf :: ClosureInfo + -> Bool -- True <=> updatable, False <=> single-entry + -> FCode CmmExpr -- Returns amode for closure to be updated +-- To update a CAF we must allocate a black hole, link the CAF onto the +-- CAF list, then update the CAF to point to the fresh black hole. +-- This function returns the address of the black hole, so it can be +-- updated with the new value when available. The reason for all of this +-- is that we only want to update dynamic heap objects, not static ones, +-- so that generational GC is easier. +link_caf cl_info is_upd = do + { -- Alloc black hole specifying CC_HDR(Node) as the cost centre + ; let use_cc = costCentreFrom (CmmReg nodeReg) + blame_cc = use_cc + ; hp_offset <- allocDynClosure bh_cl_info use_cc blame_cc [] + ; hp_rel <- getHpRelOffset hp_offset + + -- Call the RTS function newCAF to add the CAF to the CafList + -- so that the garbage collector can find them + -- This must be done *before* the info table pointer is overwritten, + -- because the old info table ptr is needed for reversion + ; emitRtsCallWithVols SLIT("newCAF") [(CmmReg nodeReg,PtrHint)] [node] + -- node is live, so save it. + + -- Overwrite the closure with a (static) indirection + -- to the newly-allocated black hole + ; stmtsC [ CmmStore (cmmRegOffW nodeReg off_indirectee) hp_rel + , CmmStore (CmmReg nodeReg) ind_static_info ] + + ; returnFC hp_rel } + where + bh_cl_info :: ClosureInfo + bh_cl_info | is_upd = cafBlackHoleClosureInfo cl_info + | otherwise = seCafBlackHoleClosureInfo cl_info + + ind_static_info :: CmmExpr + ind_static_info = mkLblExpr mkIndStaticInfoLabel + + off_indirectee :: WordOff + off_indirectee = fixedHdrSize + oFFSET_StgInd_indirectee*wORD_SIZE \end{code} + %************************************************************************ %* * \subsection[CgClosure-Description]{Profiling Closure Description.} @@ -635,99 +576,17 @@ setupUpdate closure_info code %************************************************************************ For "global" data constructors the description is simply occurrence -name of the data constructor itself (see \ref{CgConTbls-info-tables}). - -Otherwise it is determind by @closureDescription@ from the let -binding information. +name of the data constructor itself. Otherwise it is determined by +@closureDescription@ from the let binding information. \begin{code} closureDescription :: Module -- Module -> Name -- Id of closure binding -> String - -- Not called for StgRhsCon which have global info tables built in -- CgConTbls.lhs with a description generated from the data constructor - closureDescription mod_name name - = showSDoc ( - hcat [char '<', - pprModule mod_name, - char '.', - ppr name, - char '>']) + = showSDoc (hcat [char '<', pprModule mod_name, + char '.', ppr name, char '>']) \end{code} -\begin{code} -chooseDynCostCentres ccs args fvs body - = let - use_cc -- cost-centre we record in the object - = if currentOrSubsumedCCS ccs - then CReg CurCostCentre - else mkCCostCentreStack ccs - - blame_cc -- cost-centre on whom we blame the allocation - = case (args, fvs, body) of - ([], _, StgApp fun [{-no args-}]) - -> mkCCostCentreStack overheadCCS - _ -> use_cc - - -- if it's an utterly trivial RHS, then it must be - -- one introduced by boxHigherOrderArgs for profiling, - -- so we charge it to "OVERHEAD". - - -- This looks like a HACK to me --SDM - in - (use_cc, blame_cc) -\end{code} - - -\begin{code} -showTypeCategory :: Type -> Char - {- - {C,I,F,D} char, int, float, double - T tuple - S other single-constructor type - {c,i,f,d} unboxed ditto - t *unpacked* tuple - s *unpacked" single-cons... - - v void# - a primitive array - - E enumeration type - + dictionary, unless it's a ... - L List - > function - M other (multi-constructor) data-con type - . other type - - reserved for others to mark as "uninteresting" - -} -showTypeCategory ty - = if isDictTy ty - then '+' - else - case tcSplitTyConApp_maybe ty of - Nothing -> if isJust (tcSplitFunTy_maybe ty) - then '>' - else '.' - - Just (tycon, _) -> - let utc = getUnique tycon in - if utc == charDataConKey then 'C' - else if utc == intDataConKey then 'I' - else if utc == floatDataConKey then 'F' - else if utc == doubleDataConKey then 'D' - else if utc == smallIntegerDataConKey || - utc == largeIntegerDataConKey then 'J' - else if utc == charPrimTyConKey then 'c' - else if (utc == intPrimTyConKey || utc == wordPrimTyConKey - || utc == addrPrimTyConKey) then 'i' - else if utc == floatPrimTyConKey then 'f' - else if utc == doublePrimTyConKey then 'd' - else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus - else if isEnumerationTyCon tycon then 'E' - else if isTupleTyCon tycon then 'T' - else if isJust (maybeTyConSingleCon tycon) then 'S' - else if utc == listTyConKey then 'L' - else 'M' -- oh, well... -\end{code} diff --git a/ghc/compiler/codeGen/CgCon.lhs b/ghc/compiler/codeGen/CgCon.lhs index 4b8e8c2bac..3cd67e4294 100644 --- a/ghc/compiler/codeGen/CgCon.lhs +++ b/ghc/compiler/codeGen/CgCon.lhs @@ -11,49 +11,53 @@ with {\em constructors} on the RHSs of let(rec)s. See also module CgCon ( cgTopRhsCon, buildDynCon, bindConArgs, bindUnboxedTupleComponents, - cgReturnDataCon + cgReturnDataCon, + cgTyCon ) where #include "HsVersions.h" import CgMonad -import AbsCSyn import StgSyn -import AbsCUtils ( getAmodeRep ) import CgBindery ( getArgAmodes, bindNewToNode, - bindArgsToRegs, - idInfoToAmode, stableAmodeIdInfo, - heapIdInfo, CgIdInfo, bindNewToStack + bindArgsToRegs, idInfoToAmode, stableIdInfo, + heapIdInfo, CgIdInfo, bindArgsToStack ) -import CgStackery ( mkVirtStkOffsets, freeStackSlots ) -import CgUsages ( getRealSp, getVirtSp, setRealAndVirtualSp ) -import CgRetConv ( assignRegs ) +import CgStackery ( mkVirtStkOffsets, freeStackSlots, + getRealSp, getVirtSp, setRealAndVirtualSp ) +import CgUtils ( addIdReps, cmmLabelOffW, emitRODataLits, emitDataLits ) +import CgCallConv ( assignReturnRegs ) import Constants ( mAX_INTLIKE, mIN_INTLIKE, mAX_CHARLIKE, mIN_CHARLIKE ) -import CgHeapery ( allocDynClosure ) -import CgTailCall ( performReturn, mkStaticAlgReturnCode, - returnUnboxedTuple ) -import CLabel ( mkClosureLabel ) -import ClosureInfo ( mkConLFInfo, mkLFArgument, layOutDynConstr, - layOutStaticConstr, mkStaticClosure - ) +import CgHeapery ( allocDynClosure, layOutDynConstr, + layOutStaticConstr, mkStaticClosureFields ) +import CgTailCall ( performReturn, emitKnownConReturnCode, returnUnboxedTuple ) +import CgProf ( mkCCostCentreStack, ldvEnter, curCCS ) +import CgTicky +import CgInfoTbls ( emitClosureCodeAndInfoTable, dataConTagZ ) +import CLabel ( mkClosureLabel, mkRtsDataLabel, mkClosureTblLabel ) +import ClosureInfo ( mkConLFInfo, mkLFArgument ) +import CmmUtils ( mkLblExpr ) +import Cmm +import SMRep ( WordOff, CgRep, separateByPtrFollowness, + fixedHdrSize, typeCgRep ) import CostCentre ( currentOrSubsumedCCS, dontCareCCS, CostCentreStack, currentCCS ) -import DataCon ( DataCon, dataConTag, +import Constants ( mIN_INTLIKE, mAX_INTLIKE, mIN_CHARLIKE, mAX_CHARLIKE ) +import TyCon ( TyCon, tyConDataCons, isEnumerationTyCon, tyConName ) +import DataCon ( DataCon, dataConRepArgTys, isNullaryDataCon, isUnboxedTupleCon, dataConWorkId, dataConName, dataConRepArity ) -import Id ( Id, idName, idPrimRep, isDeadBinder ) -import Literal ( Literal(..) ) +import Id ( Id, idName, isDeadBinder ) +import Type ( Type ) import PrelInfo ( maybeCharLikeCon, maybeIntLikeCon ) -import PrimRep ( PrimRep(..), isFollowableRep ) -import Util import Outputable - -import List ( partition ) -import Char ( ord ) +import Util ( lengthIs ) +import ListSetOps ( assocMaybe ) \end{code} + %************************************************************************ %* * \subsection[toplevel-constructors]{Top-level constructors} @@ -68,34 +72,32 @@ cgTopRhsCon :: Id -- Name of thing bound to this RHS cgTopRhsCon id con args = ASSERT( not (isDllConApp con args) ) ASSERT( args `lengthIs` dataConRepArity con ) + do { -- LAY IT OUT + ; amodes <- getArgAmodes args + + ; let + name = idName id + lf_info = mkConLFInfo con + closure_label = mkClosureLabel name + caffy = any stgArgHasCafRefs args + (closure_info, amodes_w_offsets) = layOutStaticConstr con amodes + closure_rep = mkStaticClosureFields + closure_info + dontCareCCS -- Because it's static data + caffy -- Has CAF refs + payload + + payload = map get_lit amodes_w_offsets + get_lit (CmmLit lit, _offset) = lit + get_lit other = pprPanic "CgCon.get_lit" (ppr other) + -- NB1: amodes_w_offsets is sorted into ptrs first, then non-ptrs + -- NB2: all the amodes should be Lits! + + -- BUILD THE OBJECT + ; emitDataLits closure_label closure_rep - -- LAY IT OUT - getArgAmodes args `thenFC` \ amodes -> - - let - name = idName id - lf_info = mkConLFInfo con - closure_label = mkClosureLabel name - (closure_info, amodes_w_offsets) - = layOutStaticConstr con getAmodeRep amodes - caffy = any stgArgHasCafRefs args - in - - -- BUILD THE OBJECT - absC (mkStaticClosure - closure_label - closure_info - dontCareCCS -- because it's static data - (map fst amodes_w_offsets) -- Sorted into ptrs first, then nonptrs - caffy -- has CAF refs - ) `thenC` - -- NOTE: can't use idCafInfo instead of nonEmptySRT above, - -- because top-level constructors that were floated by - -- CorePrep don't have CafInfo attached. The SRT is more - -- reliable. - - -- RETURN - returnFC (id, stableAmodeIdInfo id (CLbl closure_label PtrRep) lf_info) + -- RETURN + ; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) } \end{code} %************************************************************************ @@ -106,13 +108,13 @@ cgTopRhsCon id con args \subsection[code-for-constructors]{The code for constructors} \begin{code} -buildDynCon :: Id -- Name of the thing to which this constr will - -- be bound - -> CostCentreStack -- Where to grab cost centre from; - -- current CCS if currentOrSubsumedCCS - -> DataCon -- The data constructor - -> [CAddrMode] -- Its args - -> FCode CgIdInfo -- Return details about how to find it +buildDynCon :: Id -- Name of the thing to which this constr will + -- be bound + -> CostCentreStack -- Where to grab cost centre from; + -- current CCS if currentOrSubsumedCCS + -> DataCon -- The data constructor + -> [(CgRep,CmmExpr)] -- Its args + -> FCode CgIdInfo -- Return details about how to find it -- We used to pass a boolean indicating whether all the -- args were of size zero, so we could use a static @@ -135,9 +137,9 @@ at all. \begin{code} buildDynCon binder cc con [] - = returnFC (stableAmodeIdInfo binder - (CLbl (mkClosureLabel (dataConName con)) PtrRep) - (mkConLFInfo con)) + = returnFC (stableIdInfo binder + (mkLblExpr (mkClosureLabel (dataConName con))) + (mkConLFInfo con)) \end{code} The following three paragraphs about @Char@-like and @Int@-like @@ -163,36 +165,41 @@ Because of this, we use can safely return an addressing mode. \begin{code} buildDynCon binder cc con [arg_amode] - | maybeIntLikeCon con && in_range_int_lit arg_amode - = returnFC (stableAmodeIdInfo binder (CIntLike arg_amode) (mkConLFInfo con)) - where - in_range_int_lit (CLit (MachInt val)) = val <= mAX_INTLIKE && val >= mIN_INTLIKE - in_range_int_lit _other_amode = False + | maybeIntLikeCon con + , (_, CmmLit (CmmInt val _)) <- arg_amode + , let val_int = (fromIntegral val) :: Int + , val_int <= mAX_INTLIKE && val_int >= mIN_INTLIKE + = do { let intlike_lbl = mkRtsDataLabel SLIT("stg_INTLIKE_closure") + offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) + -- INTLIKE closures consist of a header and one word payload + intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) + ; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) } buildDynCon binder cc con [arg_amode] - | maybeCharLikeCon con && in_range_char_lit arg_amode - = returnFC (stableAmodeIdInfo binder (CCharLike arg_amode) (mkConLFInfo con)) - where - in_range_char_lit (CLit (MachChar val)) = - ord val <= mAX_CHARLIKE && ord val >= mIN_CHARLIKE - in_range_char_lit _other_amode = False + | maybeCharLikeCon con + , (_, CmmLit (CmmInt val _)) <- arg_amode + , let val_int = (fromIntegral val) :: Int + , val_int <= mAX_CHARLIKE && val_int >= mIN_CHARLIKE + = do { let charlike_lbl = mkRtsDataLabel SLIT("stg_CHARLIKE_closure") + offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) + -- CHARLIKE closures consist of a header and one word payload + charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) + ; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) } \end{code} Now the general case. \begin{code} buildDynCon binder ccs con args - = allocDynClosure closure_info use_cc blame_cc amodes_w_offsets `thenFC` \ hp_off -> - returnFC (heapIdInfo binder hp_off lf_info) + = do { hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + ; returnFC (heapIdInfo binder hp_off lf_info) } where lf_info = mkConLFInfo con - - (closure_info, amodes_w_offsets) = layOutDynConstr con getAmodeRep args + (closure_info, amodes_w_offsets) = layOutDynConstr con args use_cc -- cost-centre to stick in the object - = if currentOrSubsumedCCS ccs - then CReg CurCostCentre - else mkCCostCentreStack ccs + | currentOrSubsumedCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) blame_cc = use_cc -- cost-centre on which to blame the alloc (same) \end{code} @@ -211,16 +218,13 @@ binders $args$, assuming that we have just returned from a @case@ which found a $con$. \begin{code} -bindConArgs - :: DataCon -> [Id] -- Constructor and args - -> Code - +bindConArgs :: DataCon -> [Id] -> Code bindConArgs con args = ASSERT(not (isUnboxedTupleCon con)) mapCs bind_arg args_w_offsets where bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg) - (_, args_w_offsets) = layOutDynConstr con idPrimRep args + (_, args_w_offsets) = layOutDynConstr con (addIdReps args) \end{code} Unboxed tuples are handled slightly differently - the object is @@ -228,56 +232,53 @@ returned in registers and on the stack instead of the heap. \begin{code} bindUnboxedTupleComponents - :: [Id] -- Aargs - -> FCode ([MagicId], -- Regs assigned - Int, -- Number of pointer stack slots - Int, -- Number of non-pointer stack slots + :: [Id] -- Args + -> FCode ([(Id,GlobalReg)], -- Regs assigned + WordOff, -- Number of pointer stack slots + WordOff, -- Number of non-pointer stack slots VirtualSpOffset) -- Offset of return address slot -- (= realSP on entry) bindUnboxedTupleComponents args - = -- Assign as many components as possible to registers - let (arg_regs, _leftovers) = assignRegs [] (map idPrimRep args) - (reg_args, stk_args) = splitAtList arg_regs args - - -- separate the rest of the args into pointers and non-pointers - (ptr_args, nptr_args) = - partition (isFollowableRep . idPrimRep) stk_args - in + = do { + vsp <- getVirtSp + ; rsp <- getRealSp + + -- Assign as many components as possible to registers + ; let (reg_args, stk_args) = assignReturnRegs (addIdReps args) + + -- Separate the rest of the args into pointers and non-pointers + (ptr_args, nptr_args) = separateByPtrFollowness stk_args - -- Allocate the rest on the stack - -- The real SP points to the return address, above which any - -- leftover unboxed-tuple components will be allocated - getVirtSp `thenFC` \ vsp -> - getRealSp `thenFC` \ rsp -> - let - (ptr_sp, ptr_offsets) = mkVirtStkOffsets rsp idPrimRep ptr_args - (nptr_sp, nptr_offsets) = mkVirtStkOffsets ptr_sp idPrimRep nptr_args - ptrs = ptr_sp - rsp - nptrs = nptr_sp - ptr_sp - in - - -- The stack pointer points to the last stack-allocated component - setRealAndVirtualSp nptr_sp `thenC` - - -- We have just allocated slots starting at real SP + 1, and set the new - -- virtual SP to the topmost allocated slot. - -- If the virtual SP started *below* the real SP, we've just jumped over - -- some slots that won't be in the free-list, so put them there - -- This commonly happens because we've freed the return-address slot - -- (trimming back the virtual SP), but the real SP still points to that slot - freeStackSlots [vsp+1,vsp+2 .. rsp] `thenC` - - bindArgsToRegs reg_args arg_regs `thenC` - mapCs bindNewToStack ptr_offsets `thenC` - mapCs bindNewToStack nptr_offsets `thenC` - - returnFC (arg_regs, ptrs, nptrs, rsp) + -- Allocate the rest on the stack + -- The real SP points to the return address, above which any + -- leftover unboxed-tuple components will be allocated + (ptr_sp, ptr_offsets) = mkVirtStkOffsets rsp ptr_args + (nptr_sp, nptr_offsets) = mkVirtStkOffsets ptr_sp nptr_args + ptrs = ptr_sp - rsp + nptrs = nptr_sp - ptr_sp + + -- The stack pointer points to the last stack-allocated component + ; setRealAndVirtualSp nptr_sp + + -- We have just allocated slots starting at real SP + 1, and set the new + -- virtual SP to the topmost allocated slot. + -- If the virtual SP started *below* the real SP, we've just jumped over + -- some slots that won't be in the free-list, so put them there + -- This commonly happens because we've freed the return-address slot + -- (trimming back the virtual SP), but the real SP still points to that slot + ; freeStackSlots [vsp+1,vsp+2 .. rsp] + + ; bindArgsToRegs reg_args + ; bindArgsToStack ptr_offsets + ; bindArgsToStack nptr_offsets + + ; returnFC (reg_args, ptrs, nptrs, rsp) } \end{code} %************************************************************************ %* * -\subsubsection[CgRetConv-cgReturnDataCon]{Actually generate code for a constructor return} + Actually generate code for a constructor return %* * %************************************************************************ @@ -285,47 +286,41 @@ bindUnboxedTupleComponents args Note: it's the responsibility of the @cgReturnDataCon@ caller to be sure the @amodes@ passed don't conflict with each other. \begin{code} -cgReturnDataCon :: DataCon -> [CAddrMode] -> Code +cgReturnDataCon :: DataCon -> [(CgRep, CmmExpr)] -> Code cgReturnDataCon con amodes = ASSERT( amodes `lengthIs` dataConRepArity con ) - getEndOfBlockInfo `thenFC` \ (EndOfBlockInfo args_sp sequel) -> - - case sequel of - - CaseAlts _ (Just (alts, Just (deflt_bndr, (_,deflt_lbl)))) False - | not (dataConTag con `is_elem` map fst alts) - -> - -- Special case! We're returning a constructor to the default case - -- of an enclosing case. For example: - -- - -- case (case e of (a,b) -> C a b) of - -- D x -> ... - -- y -> ...<returning here!>... - -- - -- In this case, - -- if the default is a non-bind-default (ie does not use y), - -- then we should simply jump to the default join point; - - if isDeadBinder deflt_bndr - then performReturn AbsCNop {- No reg assts -} jump_to_join_point - else build_it_then jump_to_join_point - where - is_elem = isIn "cgReturnDataCon" - jump_to_join_point sequel = absC (CJump (CLbl deflt_lbl CodePtrRep)) - -- Ignore the sequel: we've already looked at it above - - other_sequel -- The usual case - | isUnboxedTupleCon con -> returnUnboxedTuple amodes - | otherwise -> build_it_then (mkStaticAlgReturnCode con) - + do { EndOfBlockInfo _ sequel <- getEndOfBlockInfo + ; case sequel of + CaseAlts _ (Just (alts, deflt_lbl)) bndr _ + -> -- Ho! We know the constructor so we can + -- go straight to the right alternative + case assocMaybe alts (dataConTagZ con) of { + Just join_lbl -> build_it_then (jump_to join_lbl) ; + Nothing + -- Special case! We're returning a constructor to the default case + -- of an enclosing case. For example: + -- + -- case (case e of (a,b) -> C a b) of + -- D x -> ... + -- y -> ...<returning here!>... + -- + -- In this case, + -- if the default is a non-bind-default (ie does not use y), + -- then we should simply jump to the default join point; + + | isDeadBinder bndr -> performReturn (jump_to deflt_lbl) + | otherwise -> build_it_then (jump_to deflt_lbl) } + + other_sequel -- The usual case + | isUnboxedTupleCon con -> returnUnboxedTuple amodes + | otherwise -> build_it_then (emitKnownConReturnCode con) + } where - move_to_reg :: CAddrMode -> MagicId -> AbstractC - move_to_reg src_amode dest_reg = CAssign (CReg dest_reg) src_amode - - build_it_then return = - -- BUILD THE OBJECT IN THE HEAP - -- The first "con" says that the name bound to this + jump_to lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) []) + build_it_then return_code + = do { -- BUILD THE OBJECT IN THE HEAP + -- The first "con" says that the name bound to this -- closure is "con", which is a bit of a fudge, but it only -- affects profiling @@ -333,12 +328,108 @@ cgReturnDataCon con amodes -- temporary variable, if the closure is a CHARLIKE. -- funnily enough, this makes the unique always come -- out as '54' :-) - buildDynCon (dataConWorkId con) currentCCS con amodes `thenFC` \ idinfo -> - idInfoToAmode PtrRep idinfo `thenFC` \ amode -> + tickyReturnNewCon (length amodes) + ; idinfo <- buildDynCon (dataConWorkId con) currentCCS con amodes + ; amode <- idInfoToAmode idinfo + ; checkedAbsC (CmmAssign nodeReg amode) + ; performReturn return_code } +\end{code} - -- RETURN - profCtrC FSLIT("TICK_RET_NEW") [mkIntCLit (length amodes)] `thenC` - -- could use doTailCall here. - performReturn (move_to_reg amode node) return +%************************************************************************ +%* * + Generating static stuff for algebraic data types +%* * +%************************************************************************ + + [These comments are rather out of date] + +\begin{tabular}{lll} +Info tbls & Macro & Kind of constructor \\ +\hline +info & @CONST_INFO_TABLE@& Zero arity (no info -- compiler uses static closure)\\ +info & @CHARLIKE_INFO_TABLE@& Charlike (no info -- compiler indexes fixed array)\\ +info & @INTLIKE_INFO_TABLE@& Intlike; the one macro generates both info tbls\\ +info & @SPEC_INFO_TABLE@& SPECish, and bigger than or equal to @MIN_UPD_SIZE@\\ +info & @GEN_INFO_TABLE@& GENish (hence bigger than or equal to @MIN_UPD_SIZE@)\\ +\end{tabular} + +Possible info tables for constructor con: + +\begin{description} +\item[@_con_info@:] +Used for dynamically let(rec)-bound occurrences of +the constructor, and for updates. For constructors +which are int-like, char-like or nullary, when GC occurs, +the closure tries to get rid of itself. + +\item[@_static_info@:] +Static occurrences of the constructor +macro: @STATIC_INFO_TABLE@. +\end{description} + +For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; +it's place is taken by the top level defn of the constructor. + +For charlike and intlike closures there is a fixed array of static +closures predeclared. + +\begin{code} +cgTyCon :: TyCon -> FCode [Cmm] -- each constructor gets a separate Cmm +cgTyCon tycon + = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) + + -- Generate a table of static closures for an enumeration type + -- Put the table after the data constructor decls, because the + -- datatype closure table (for enumeration types) + -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + ; extra <- + if isEnumerationTyCon tycon then do + tbl <- getCmm (emitRODataLits (mkClosureTblLabel + (tyConName tycon)) + [ CmmLabel (mkClosureLabel (dataConName con)) + | con <- tyConDataCons tycon]) + return [tbl] + else + return [] + + ; return (extra ++ constrs) + } +\end{code} + +Generate the entry code, info tables, and (for niladic constructor) the +static closure, for a constructor. + +\begin{code} +cgDataCon :: DataCon -> Code +cgDataCon data_con + = do { -- Don't need any dynamic closure code for zero-arity constructors + whenC (not (isNullaryDataCon data_con)) + (emit_info dyn_cl_info tickyEnterDynCon) + + -- Dynamic-Closure first, to reduce forward references + ; emit_info static_cl_info tickyEnterStaticCon } + + where + emit_info cl_info ticky_code + = do { code_blks <- getCgStmts the_code + ; emitClosureCodeAndInfoTable cl_info [] code_blks } + where + the_code = do { ticky_code + ; ldvEnter (CmmReg nodeReg) + ; body_code } + + arg_reps :: [(CgRep, Type)] + arg_reps = [(typeCgRep ty, ty) | ty <- dataConRepArgTys data_con] + + -- To allow the debuggers, interpreters, etc to cope with static + -- data structures (ie those built at compile time), we take care that + -- info-table contains the information we need. + (static_cl_info, _) = layOutStaticConstr data_con arg_reps + (dyn_cl_info, arg_things) = layOutDynConstr data_con arg_reps + + body_code = do { -- NB: We don't set CC when entering data (WDP 94/06) + tickyReturnOldCon (length arg_things) + ; performReturn (emitKnownConReturnCode data_con) } + -- noStmts: Ptr to thing already in Node \end{code} diff --git a/ghc/compiler/codeGen/CgConTbls.lhs b/ghc/compiler/codeGen/CgConTbls.lhs deleted file mode 100644 index 37ced1ee2b..0000000000 --- a/ghc/compiler/codeGen/CgConTbls.lhs +++ /dev/null @@ -1,163 +0,0 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 -% -\section[CgConTbls]{Info tables and update bits for constructors} - -\begin{code} -module CgConTbls ( genStaticConBits ) where - -#include "HsVersions.h" - -import AbsCSyn -import CgMonad - -import AbsCUtils ( mkAbstractCs, mkAbsCStmts ) -import CgTailCall ( performReturn, mkStaticAlgReturnCode ) -import ClosureInfo ( layOutStaticConstr, layOutDynConstr, ClosureInfo ) -import DataCon ( DataCon, dataConRepArgTys, isNullaryDataCon ) -import TyCon ( tyConDataCons, isEnumerationTyCon, TyCon ) -import Type ( typePrimRep ) -import CmdLineOpts -\end{code} - -For every constructor we generate the following info tables: - A static info table, for static instances of the constructor, - - Plus: - -\begin{tabular}{lll} -Info tbls & Macro & Kind of constructor \\ -\hline -info & @CONST_INFO_TABLE@& Zero arity (no info -- compiler uses static closure)\\ -info & @CHARLIKE_INFO_TABLE@& Charlike (no info -- compiler indexes fixed array)\\ -info & @INTLIKE_INFO_TABLE@& Intlike; the one macro generates both info tbls\\ -info & @SPEC_INFO_TABLE@& SPECish, and bigger than or equal to @MIN_UPD_SIZE@\\ -info & @GEN_INFO_TABLE@& GENish (hence bigger than or equal to @MIN_UPD_SIZE@)\\ -\end{tabular} - -Possible info tables for constructor con: - -\begin{description} -\item[@_con_info@:] -Used for dynamically let(rec)-bound occurrences of -the constructor, and for updates. For constructors -which are int-like, char-like or nullary, when GC occurs, -the closure tries to get rid of itself. - -\item[@_static_info@:] -Static occurrences of the constructor -macro: @STATIC_INFO_TABLE@. -\end{description} - - -For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; -it's place is taken by the top level defn of the constructor. - -For charlike and intlike closures there is a fixed array of static -closures predeclared. - -\begin{code} -genStaticConBits :: CompilationInfo -- global info about the compilation - -> [TyCon] -- tycons to generate - -> AbstractC -- output - -genStaticConBits comp_info gen_tycons - = -- for each type constructor: - -- grab all its data constructors; - -- for each one, generate an info table - -- for each specialised type constructor - -- for each specialisation of the type constructor - -- grab data constructors, and generate info tables - - -- ToDo: for tycons and specialisations which are not - -- declared in this module we must ensure that the - -- C labels are local to this module i.e. static - -- since they may be duplicated in other modules - - mkAbstractCs [ gen_for_tycon tc `mkAbsCStmts` enum_closure_table tc - | tc <- gen_tycons ] - where - gen_for_tycon :: TyCon -> AbstractC - gen_for_tycon tycon = mkAbstractCs [ genConInfo comp_info data_con - | data_con <- tyConDataCons tycon ] - - enum_closure_table tycon - | isEnumerationTyCon tycon = CClosureTbl tycon - | otherwise = AbsCNop - -- Put the table after the data constructor decls, because the - -- datatype closure table (for enumeration types) - -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff -\end{code} - - -%************************************************************************ -%* * -\subsection[CgConTbls-info-tables]{Generating info tables for constructors} -%* * -%************************************************************************ - -Generate the entry code, info tables, and (for niladic constructor) the -static closure, for a constructor. - -\begin{code} -genConInfo :: CompilationInfo -> DataCon -> AbstractC - -genConInfo comp_info data_con - = -- Order of things is to reduce forward references - mkAbstractCs [if opt_EnsureSplittableC then CSplitMarker else AbsCNop, - closure_code, - static_code] - where - (closure_info, body_code) = mkConCodeAndInfo data_con - - -- To allow the debuggers, interpreters, etc to cope with static - -- data structures (ie those built at compile time), we take care that - -- info-table contains the information we need. - (static_ci,_) = layOutStaticConstr data_con typePrimRep arg_tys - - static_body = initC comp_info ( - profCtrC FSLIT("TICK_ENT_STATIC_CON") [CReg node] `thenC` - ldv_enter_and_body_code) - - closure_body = initC comp_info ( - profCtrC FSLIT("TICK_ENT_DYN_CON") [CReg node] `thenC` - ldv_enter_and_body_code) - - ldv_enter_and_body_code = ldvEnter `thenC` body_code - - -- Don't need any dynamic closure code for zero-arity constructors - closure_code = if zero_arity_con then - AbsCNop - else - CClosureInfoAndCode closure_info closure_body - - static_code = CClosureInfoAndCode static_ci static_body - - zero_arity_con = isNullaryDataCon data_con - -- We used to check that all the arg-sizes were zero, but we don't - -- really have any constructors with only zero-size args, and it's - -- just one more thing to go wrong. - - arg_tys = dataConRepArgTys data_con -\end{code} - -\begin{code} -mkConCodeAndInfo :: DataCon -- Data constructor - -> (ClosureInfo, Code) -- The info table - -mkConCodeAndInfo con - = let - arg_tys = dataConRepArgTys con - - (closure_info, arg_things) = layOutDynConstr con typePrimRep arg_tys - - body_code - = -- NB: We don't set CC when entering data (WDP 94/06) - profCtrC FSLIT("TICK_RET_OLD") - [mkIntCLit (length arg_things)] `thenC` - - performReturn AbsCNop -- Ptr to thing already in Node - (mkStaticAlgReturnCode con) - in - (closure_info, body_code) -\end{code} diff --git a/ghc/compiler/codeGen/CgExpr.lhs b/ghc/compiler/codeGen/CgExpr.lhs index 88771b911c..d72c7c5a4c 100644 --- a/ghc/compiler/codeGen/CgExpr.lhs +++ b/ghc/compiler/codeGen/CgExpr.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgExpr.lhs,v 1.58 2004/08/10 09:02:41 simonmar Exp $ +% $Id: CgExpr.lhs,v 1.59 2004/08/13 13:05:58 simonmar Exp $ % %******************************************************** %* * @@ -17,38 +17,39 @@ module CgExpr ( cgExpr ) where import Constants ( mAX_SPEC_SELECTEE_SIZE, mAX_SPEC_AP_SIZE ) import StgSyn import CgMonad -import AbsCSyn -import AbsCUtils ( mkAbstractCs, getAmodeRep, shimFCallArg ) -import CLabel ( mkClosureTblLabel ) -import SMRep ( fixedHdrSize ) +import SMRep ( fixedHdrSize, isFollowableArg, CgRep(..), argMachRep, + nonVoidArg, idCgRep, typeCgRep, typeHint, + primRepToCgRep ) import CoreSyn ( AltCon(..) ) +import CgProf ( emitSetCCC ) +import CgHeapery ( layOutDynConstr ) import CgBindery ( getArgAmodes, getArgAmode, CgIdInfo, nukeDeadBindings, addBindC, addBindsC ) import CgCase ( cgCase, saveVolatileVarsAndRegs ) import CgClosure ( cgRhsClosure, cgStdRhsClosure ) import CgCon ( buildDynCon, cgReturnDataCon ) import CgLetNoEscape ( cgLetNoEscapeClosure ) -import CgRetConv ( dataReturnConvPrim ) -import CgTailCall ( cgTailCall, performReturn, performPrimReturn, - mkDynamicAlgReturnCode, mkPrimReturnCode, - tailCallPrimOp, ccallReturnUnboxedTuple - ) -import ClosureInfo ( mkClosureLFInfo, mkSelectorLFInfo, - mkApLFInfo, layOutDynConstr ) -import CostCentre ( sccAbleCostCentre, isSccCountCostCentre ) -import Id ( idPrimRep, Id ) +import CgCallConv ( dataReturnConvPrim ) +import CgTailCall +import CgInfoTbls ( emitDirectReturnInstr ) +import CgForeignCall ( emitForeignCall, shimForeignCallArg ) +import CgPrimOp ( cgPrimOp ) +import CgUtils ( addIdReps, newTemp, assignTemp, cgLit, tagToClosure ) +import ClosureInfo ( mkSelectorLFInfo, mkApLFInfo ) +import Cmm ( CmmExpr(..), CmmStmt(..), CmmReg, nodeReg ) +import MachOp ( wordRep, MachHint ) import VarSet +import Literal ( literalType ) import PrimOp ( primOpOutOfLine, getPrimOpResultInfo, PrimOp(..), PrimOpResultInfo(..) ) -import PrimRep ( PrimRep(..), isFollowableRep ) +import Id ( Id ) import TyCon ( isUnboxedTupleTyCon, isEnumerationTyCon ) -import Type ( Type, typePrimRep, tyConAppArgs, - tyConAppTyCon, repType ) +import Type ( Type, tyConAppArgs, tyConAppTyCon, repType, + PrimRep(VoidRep) ) import Maybes ( maybeToBool ) import ListSetOps ( assocMaybe ) -import Unique ( mkBuiltinUnique ) -import BasicTypes ( TopLevelFlag(..), RecFlag(..) ) +import BasicTypes ( RecFlag(..) ) import Util ( lengthIs ) import Outputable \end{code} @@ -84,8 +85,8 @@ cgExpr (StgApp fun args) = cgTailCall fun args \begin{code} cgExpr (StgConApp con args) - = getArgAmodes args `thenFC` \ amodes -> - cgReturnDataCon con amodes + = do { amodes <- getArgAmodes args + ; cgReturnDataCon con amodes } \end{code} Literals are similar to constructors; they return by putting @@ -94,99 +95,100 @@ top of the stack. \begin{code} cgExpr (StgLit lit) - = performPrimReturn (text "literal" <+> ppr lit) (CLit lit) + = do { cmm_lit <- cgLit lit + ; performPrimReturn rep (CmmLit cmm_lit) } + where + rep = typeCgRep (literalType lit) \end{code} %******************************************************** %* * -%* STG PrimApps (unboxed primitive ops) * +%* PrimOps and foreign calls. %* * %******************************************************** -Here is where we insert real live machine instructions. - -NOTE about _ccall_GC_: +NOTE about "safe" foreign calls: a safe foreign call is never compiled +inline in a case expression. When we see -A _ccall_GC_ is treated as an out-of-line primop (returns True -for primOpOutOfLine) so that when we see the call in case context case (ccall ...) of { ... } -we get a proper stack frame on the stack when we perform it. When we -get in a tail-call position, however, we need to actually perform the -call, so we treat it as an inline primop. + +We generate a proper return address for the alternatives and push the +stack frame before doing the call, so that in the event that the call +re-enters the RTS the stack is in a sane state. \begin{code} -cgExpr (StgOpApp op@(StgFCallOp _ _) args res_ty) - = primRetUnboxedTuple op args res_ty +cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do + {- + First, copy the args into temporaries. We're going to push + a return address right before doing the call, so the args + must be out of the way. + -} + reps_n_amodes <- getArgAmodes stg_args + let + -- Get the *non-void* args, and jiggle them with shimForeignCall + arg_exprs = [ shimForeignCallArg stg_arg expr + | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, + nonVoidArg rep] + -- in + arg_tmps <- mapM assignTemp arg_exprs + let + arg_hints = zip arg_tmps (map (typeHint.stgArgType) stg_args) + -- in + {- + Now, allocate some result regs. + -} + (res_reps,res_regs,res_hints) <- newUnboxedTupleRegs res_ty + ccallReturnUnboxedTuple (zip res_reps (map CmmReg res_regs)) $ + emitForeignCall (zip res_regs res_hints) fcall + arg_hints emptyVarSet{-no live vars-} + -- tagToEnum# is special: we need to pull the constructor out of the table, -- and perform an appropriate return. cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) = ASSERT(isEnumerationTyCon tycon) - getArgAmode arg `thenFC` \amode -> - -- save the tag in a temporary in case amode overlaps - -- with node. - absC (CAssign dyn_tag amode) `thenC` - performReturn ( - CAssign (CReg node) - (CVal (CIndex - (CLbl (mkClosureTblLabel tycon) PtrRep) - dyn_tag PtrRep) PtrRep)) - (\ sequel -> mkDynamicAlgReturnCode tycon dyn_tag sequel) + do { (_,amode) <- getArgAmode arg + ; amode' <- assignTemp amode -- We're going to use it twice, + -- so save in a temp if non-trivial + ; stmtC (CmmAssign nodeReg (tagToClosure tycon amode')) + ; performReturn (emitAlgReturnCode tycon amode') } where - dyn_tag = CTemp (mkBuiltinUnique 0) IntRep - -- The '0' is just to get a random spare temp - -- - -- if you're reading this code in the attempt to figure + -- If you're reading this code in the attempt to figure -- out why the compiler panic'ed here, it is probably because -- you used tagToEnum# in a non-monomorphic setting, e.g., -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x# - -- -- That won't work. - -- tycon = tyConAppTyCon res_ty cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) - | primOpOutOfLine primop - = tailCallPrimOp primop args - - | otherwise - = getArgAmodes args `thenFC` \ arg_amodes -> - - case (getPrimOpResultInfo primop) of - - ReturnsPrim kind -> - let result_amode = CReg (dataReturnConvPrim kind) in - performReturn - (COpStmt [result_amode] op arg_amodes [{-no vol_regs-}]) - (mkPrimReturnCode (text "primapp)" <+> ppr x)) - - -- otherwise, must be returning an enumerated type (eg. Bool). - -- we've only got the tag in R2, so we have to load the constructor - -- itself into R1. - - ReturnsAlg tycon - | isUnboxedTupleTyCon tycon -> primRetUnboxedTuple op args res_ty - - | isEnumerationTyCon tycon -> - performReturn - (COpStmt [dyn_tag] op arg_amodes [{-no vol_regs-}]) - (\ sequel -> - absC (CAssign (CReg node) closure_lbl) `thenC` - mkDynamicAlgReturnCode tycon dyn_tag sequel) - - where - -- Pull a unique out of thin air to put the tag in. - -- It shouldn't matter if this overlaps with anything - we're - -- about to return anyway. - dyn_tag = CTemp (mkBuiltinUnique 0) IntRep - - closure_lbl = CVal (CIndex - (CLbl (mkClosureTblLabel tycon) PtrRep) - dyn_tag PtrRep) PtrRep - + | primOpOutOfLine primop + = tailCallPrimOp primop args + + | ReturnsPrim VoidRep <- result_info + = do cgPrimOp [] primop args emptyVarSet + performReturn emitDirectReturnInstr + + | ReturnsPrim rep <- result_info + = do cgPrimOp [dataReturnConvPrim (primRepToCgRep rep)] + primop args emptyVarSet + performReturn emitDirectReturnInstr + + | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon + = do (reps, regs, _hints) <- newUnboxedTupleRegs res_ty + cgPrimOp regs primop args emptyVarSet{-no live vars-} + returnUnboxedTuple (zip reps (map CmmReg regs)) + + | ReturnsAlg tycon <- result_info, isEnumerationTyCon tycon + -- c.f. cgExpr (...TagToEnumOp...) + = do tag_reg <- newTemp wordRep + cgPrimOp [tag_reg] primop args emptyVarSet + stmtC (CmmAssign nodeReg (tagToClosure tycon (CmmReg tag_reg))) + performReturn (emitAlgReturnCode tycon (CmmReg tag_reg)) + where + result_info = getPrimOpResultInfo primop \end{code} %******************************************************** @@ -227,20 +229,21 @@ cgExpr (StgLet (StgRec pairs) expr) \begin{code} cgExpr (StgLetNoEscape live_in_whole_let live_in_rhss bindings body) - = -- Figure out what volatile variables to save - nukeDeadBindings live_in_whole_let `thenC` - saveVolatileVarsAndRegs live_in_rhss - `thenFC` \ (save_assts, rhs_eob_info, maybe_cc_slot) -> + = do { -- Figure out what volatile variables to save + ; nukeDeadBindings live_in_whole_let + ; (save_assts, rhs_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_rhss -- Save those variables right now! - absC save_assts `thenC` + ; emitStmts save_assts -- Produce code for the rhss -- and add suitable bindings to the environment - cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot bindings `thenC` + ; cgLetNoEscapeBindings live_in_rhss rhs_eob_info + maybe_cc_slot bindings -- Do the body - setEndOfBlockInfo rhs_eob_info (cgExpr body) + ; setEndOfBlockInfo rhs_eob_info (cgExpr body) } \end{code} @@ -252,18 +255,11 @@ cgExpr (StgLetNoEscape live_in_whole_let live_in_rhss bindings body) SCC expressions are treated specially. They set the current cost centre. + \begin{code} -cgExpr (StgSCC cc expr) - = ASSERT(sccAbleCostCentre cc) - costCentresC - FSLIT("SET_CCC") - [mkCCostCentre cc, mkIntCLit (if isSccCountCostCentre cc then 1 else 0)] - `thenC` - cgExpr expr +cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr \end{code} -ToDo: counting of dict sccs ... - %******************************************************** %* * %* Non-top-level bindings * @@ -279,9 +275,9 @@ cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) -- the Id is passed along so a binding can be set up cgRhs name (StgRhsCon maybe_cc con args) - = getArgAmodes args `thenFC` \ amodes -> - buildDynCon name maybe_cc con amodes `thenFC` \ idinfo -> - returnFC (name, idinfo) + = do { amodes <- getArgAmodes args + ; idinfo <- buildDynCon name maybe_cc con amodes + ; returnFC (name, idinfo) } cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) = mkRhsClosure name cc bi srt fvs upd_flag args body @@ -328,7 +324,7 @@ mkRhsClosure bndr cc bi srt cgStdRhsClosure bndr cc bi [the_fv] [] body lf_info [StgVarArg the_fv] where lf_info = mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag) - (_, params_w_offsets) = layOutDynConstr con idPrimRep params + (_, params_w_offsets) = layOutDynConstr con (addIdReps params) -- Just want the layout maybe_offset = assocMaybe params_w_offsets selectee Just the_offset = maybe_offset @@ -359,7 +355,7 @@ mkRhsClosure bndr cc bi srt body@(StgApp fun_id args) | args `lengthIs` (arity-1) - && all isFollowableRep (map idPrimRep fvs) + && all isFollowableArg (map idCgRep fvs) && isUpdatable upd_flag && arity <= mAX_SPEC_AP_SIZE @@ -370,17 +366,15 @@ mkRhsClosure bndr cc bi srt lf_info = mkApLFInfo bndr upd_flag arity -- the payload has to be in the correct order, hence we can't -- just use the fvs. - payload = StgVarArg fun_id : args - arity = length fvs + payload = StgVarArg fun_id : args + arity = length fvs \end{code} The default case ~~~~~~~~~~~~~~~~ \begin{code} mkRhsClosure bndr cc bi srt fvs upd_flag args body - = cgRhsClosure bndr cc bi srt fvs args body lf_info - where - lf_info = mkClosureLFInfo bndr NotTopLevel fvs upd_flag args + = cgRhsClosure bndr cc bi srt fvs upd_flag args body \end{code} @@ -392,20 +386,19 @@ mkRhsClosure bndr cc bi srt fvs upd_flag args body \begin{code} cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot (StgNonRec binder rhs) - = cgLetNoEscapeRhs live_in_rhss rhs_eob_info maybe_cc_slot - NonRecursive binder rhs - `thenFC` \ (binder, info) -> - addBindC binder info + = do { (binder,info) <- cgLetNoEscapeRhs live_in_rhss rhs_eob_info + maybe_cc_slot + NonRecursive binder rhs + ; addBindC binder info } cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot (StgRec pairs) - = fixC (\ new_bindings -> - addBindsC new_bindings `thenC` - listFCs [ cgLetNoEscapeRhs full_live_in_rhss + = do { new_bindings <- fixC (\ new_bindings -> do + { addBindsC new_bindings + ; listFCs [ cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot Recursive b e - | (b,e) <- pairs ] - ) `thenFC` \ new_bindings -> + | (b,e) <- pairs ] }) - addBindsC new_bindings + ; addBindsC new_bindings } where -- We add the binders to the live-in-rhss set so that we don't -- delete the bindings for the binder from the environment! @@ -443,41 +436,15 @@ cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder Little helper for primitives that return unboxed tuples. - \begin{code} -primRetUnboxedTuple :: StgOp -> [StgArg] -> Type -> Code -primRetUnboxedTuple op args res_ty - = getArgAmodes args `thenFC` \ arg_amodes1 -> - {- - For a foreign call, we might need to fiddle with some of the args: - for example, when passing a ByteArray#, we pass a ptr to the goods - rather than the heap object. - -} - let - arg_amodes - | StgFCallOp{} <- op = zipWith shimFCallArg args arg_amodes1 - | otherwise = arg_amodes1 - in - {- - put all the arguments in temporaries so they don't get stomped when - we push the return address. - -} - let - n_args = length args - arg_uniqs = map mkBuiltinUnique [0 .. n_args-1] - arg_reps = map getAmodeRep arg_amodes - arg_temps = zipWith CTemp arg_uniqs arg_reps - in - absC (mkAbstractCs (zipWith CAssign arg_temps arg_amodes)) `thenC` - {- - allocate some temporaries for the return values. - -} - let - ty_args = tyConAppArgs (repType res_ty) - prim_reps = map typePrimRep ty_args - temp_uniqs = map mkBuiltinUnique [ n_args .. n_args + length ty_args - 1] - temp_amodes = zipWith CTemp temp_uniqs prim_reps - in - ccallReturnUnboxedTuple temp_amodes - (absC (COpStmt temp_amodes op arg_temps [])) +newUnboxedTupleRegs :: Type -> FCode ([CgRep], [CmmReg], [MachHint]) +newUnboxedTupleRegs res_ty = + let + ty_args = tyConAppArgs (repType res_ty) + (reps,hints) = unzip [ (rep, typeHint ty) | ty <- ty_args, + let rep = typeCgRep ty, + nonVoidArg rep ] + in do + regs <- mapM (newTemp . argMachRep) reps + return (reps,regs,hints) \end{code} diff --git a/ghc/compiler/codeGen/CgForeignCall.hs b/ghc/compiler/codeGen/CgForeignCall.hs new file mode 100644 index 0000000000..9a8ef9e0c4 --- /dev/null +++ b/ghc/compiler/codeGen/CgForeignCall.hs @@ -0,0 +1,216 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for foreign calls. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgForeignCall ( + emitForeignCall, + cgForeignCall, + shimForeignCallArg, + emitSaveThreadState, -- will be needed by the Cmm parser + emitLoadThreadState, -- ditto + emitCloseNursery, + emitOpenNursery, + ) where + +#include "HsVersions.h" + +import StgSyn ( StgLiveVars, StgArg, stgArgType ) +import CgProf ( curCCS, curCCSAddr ) +import CgBindery ( getVolatileRegs, getArgAmodes ) +import CgMonad +import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW, newTemp ) +import Type ( tyConAppTyCon, repType ) +import TysPrim +import CLabel ( mkForeignLabel, mkRtsCodeLabel ) +import Cmm +import CmmUtils +import MachOp +import SMRep +import ForeignCall +import Constants +import CmdLineOpts ( opt_SccProfilingOn ) +import Outputable + +import Monad ( when ) + +-- ----------------------------------------------------------------------------- +-- Code generation for Foreign Calls + +cgForeignCall + :: [(CmmReg,MachHint)] -- where to put the results + -> ForeignCall -- the op + -> [StgArg] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code +cgForeignCall results fcall stg_args live + = do + reps_n_amodes <- getArgAmodes stg_args + let + -- Get the *non-void* args, and jiggle them with shimForeignCall + arg_exprs = [ shimForeignCallArg stg_arg expr + | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, + nonVoidArg rep] + + arg_hints = zip arg_exprs (map (typeHint.stgArgType) stg_args) + -- in + emitForeignCall results fcall arg_hints live + + +emitForeignCall + :: [(CmmReg,MachHint)] -- where to put the results + -> ForeignCall -- the op + -> [(CmmExpr,MachHint)] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +emitForeignCall results (CCall (CCallSpec target cconv safety)) args live + | not (playSafe safety) + = do + vols <- getVolatileRegs live + stmtC (the_call vols) + + | otherwise -- it's a safe foreign call + = do + vols <- getVolatileRegs live + id <- newTemp wordRep + emitSaveThreadState + stmtC (CmmCall (CmmForeignCall suspendThread CCallConv) [(id,NoHint)] + [ (CmmReg (CmmGlobal BaseReg), NoHint) ] + Nothing{-save all; ToDo-} + ) + stmtC (the_call vols) + stmtC (CmmCall (CmmForeignCall resumeThread CCallConv) [] + [ (CmmReg id, NoHint) ] (Just vols) + ) + emitLoadThreadState + + where + (call_args, cmm_target) + = case target of + StaticTarget lbl -> (args, CmmLit (CmmLabel + (mkForeignLabel lbl Nothing False))) + -- ToDo: what about the size here? + -- it is currently tacked on by the NCG. + DynamicTarget -> case args of (fn,_):rest -> (rest, fn) + + the_call vols = CmmCall (CmmForeignCall cmm_target cconv) + results call_args (Just vols) + + +emitForeignCall results (DNCall _) args live + = panic "emitForeignCall: DNCall" + +suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread"))) +resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread"))) + +-- ----------------------------------------------------------------------------- +-- Save/restore the thread state in the TSO + +-- This stuff can't be done in suspendThread/resumeThread, because it +-- refers to global registers which aren't available in the C world. + +emitSaveThreadState = do + -- CurrentTSO->sp = Sp; + stmtC $ CmmStore (cmmOffset stgCurrentTSO tso_SP) stgSp + emitCloseNursery + -- and save the current cost centre stack in the TSO when profiling: + when opt_SccProfilingOn $ + stmtC (CmmStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) + + -- CurrentNursery->free = Hp+1; +emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1) + +emitLoadThreadState = do + tso <- newTemp wordRep + stmtsC [ + -- tso = CurrentTSO; + CmmAssign tso stgCurrentTSO, + -- Sp = tso->sp; + CmmAssign sp (CmmLoad (cmmOffset (CmmReg tso) tso_SP) + wordRep), + -- SpLim = tso->stack + RESERVED_STACK_WORDS; + CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg tso) tso_STACK) + rESERVED_STACK_WORDS) + ] + emitOpenNursery + -- and load the current cost centre stack from the TSO when profiling: + when opt_SccProfilingOn $ + stmtC (CmmStore curCCSAddr + (CmmLoad (cmmOffset (CmmReg tso) tso_CCCS) wordRep)) + +emitOpenNursery = stmtsC [ + -- Hp = CurrentNursery->free - 1; + CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)), + + -- HpLim = CurrentNursery->start + + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; + CmmAssign hpLim + (cmmOffsetExpr + (CmmLoad nursery_bdescr_start wordRep) + (cmmOffset + (CmmMachOp mo_wordMul [ + CmmMachOp (MO_S_Conv I32 wordRep) + [CmmLoad nursery_bdescr_blocks I32], + CmmLit (mkIntCLit bLOCK_SIZE) + ]) + (-1) + ) + ) + ] + + +nursery_bdescr_free = cmmOffset stgCurrentNursery oFFSET_bdescr_free +nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start +nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks + +tso_SP = tsoFieldB oFFSET_StgTSO_sp +tso_STACK = tsoFieldB oFFSET_StgTSO_stack +tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS + +-- The TSO struct has a variable header, and an optional StgTSOProfInfo in +-- the middle. The fields we're interested in are after the StgTSOProfInfo. +tsoFieldB :: ByteOff -> ByteOff +tsoFieldB off + | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE + | otherwise = off + fixedHdrSize * wORD_SIZE + +tsoProfFieldB :: ByteOff -> ByteOff +tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE + +stgSp = CmmReg sp +stgHp = CmmReg hp +stgCurrentTSO = CmmReg currentTSO +stgCurrentNursery = CmmReg currentNursery + +sp = CmmGlobal Sp +spLim = CmmGlobal SpLim +hp = CmmGlobal Hp +hpLim = CmmGlobal HpLim +currentTSO = CmmGlobal CurrentTSO +currentNursery = CmmGlobal CurrentNursery + +-- ----------------------------------------------------------------------------- +-- For certain types passed to foreign calls, we adjust the actual +-- value passed to the call. Two main cases: for ForeignObj# we pass +-- the pointer inside the ForeignObj# closure, and for ByteArray#/Array# we +-- pass the address of the actual array, not the address of the heap object. + +shimForeignCallArg :: StgArg -> CmmExpr -> CmmExpr +shimForeignCallArg arg expr + | tycon == foreignObjPrimTyCon + = cmmLoadIndexW expr fixedHdrSize + + | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon + = cmmOffsetB expr arrPtrsHdrSize + + | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon + = cmmOffsetB expr arrWordsHdrSize + + | otherwise = expr + where + -- should be a tycon app, since this is a foreign call + tycon = tyConAppTyCon (repType (stgArgType arg)) diff --git a/ghc/compiler/codeGen/CgHeapery.lhs b/ghc/compiler/codeGen/CgHeapery.lhs index 2329dcb6d2..6abffe72dc 100644 --- a/ghc/compiler/codeGen/CgHeapery.lhs +++ b/ghc/compiler/codeGen/CgHeapery.lhs @@ -1,43 +1,234 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgHeapery.lhs,v 1.39 2003/07/28 16:05:35 simonmar Exp $ +% $Id: CgHeapery.lhs,v 1.40 2004/08/13 13:06:00 simonmar Exp $ % \section[CgHeapery]{Heap management functions} \begin{code} module CgHeapery ( - funEntryChecks, altHeapCheck, unbxTupleHeapCheck, thunkChecks, - allocDynClosure, + initHeapUsage, getVirtHp, setVirtHp, setRealHp, + getHpRelOffset, hpRel, - -- new functions, basically inserting macro calls into Code -- HWL - ,fetchAndReschedule, yield + funEntryChecks, thunkEntryChecks, + altHeapCheck, unbxTupleHeapCheck, + hpChkGen, hpChkNodePointsAssignSp0, + stkChkGen, stkChkNodePoints, + + layOutDynConstr, layOutStaticConstr, + mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure, + + allocDynClosure, emitSetDynHdr ) where #include "HsVersions.h" -import AbsCSyn +import Constants ( mIN_UPD_SIZE ) import StgSyn ( AltType(..) ) -import CLabel +import CLabel ( CLabel, mkRtsCodeLabel ) +import CgUtils ( mkWordCLit, cmmRegOffW, cmmOffsetW, + cmmOffsetExprB ) import CgMonad -import CgStackery ( getFinalStackHW ) -import AbsCUtils ( mkAbstractCs, getAmodeRep ) -import CgUsages ( getVirtAndRealHp, getRealSp, setVirtHp, setRealHp, - initHeapUsage - ) -import CgRetConv ( dataReturnConvPrim ) -import ClosureInfo ( closureSize, closureGoodStuffSize, - slopSize, allocProfilingMsg, ClosureInfo - ) +import CgProf ( staticProfHdr, profDynAlloc, dynProfHdr ) +import CgTicky ( staticTickyHdr, tickyDynAlloc, tickyAllocHeap ) +import CgParallel ( staticGranHdr, staticParHdr, doGranAllocate ) +import CgStackery ( getFinalStackHW, getRealSp ) +import CgCallConv ( mkRegLiveness ) +import ClosureInfo ( closureSize, closureUpdReqd, + staticClosureNeedsLink, + mkConInfo, + infoTableLabelFromCI, closureLabelFromCI, + nodeMustPointToIt, closureLFInfo, + ClosureInfo ) +import SMRep ( CgRep(..), cgRepSizeW, separateByPtrFollowness, + WordOff, fixedHdrSize, isVoidArg, primRepToCgRep ) + +import Cmm ( CmmLit(..), CmmStmt(..), CmmExpr(..), GlobalReg(..), + CmmReg(..), hpReg, nodeReg, spReg ) +import MachOp ( mo_wordULt, mo_wordUGt, mo_wordSub ) +import CmmUtils ( mkIntCLit, CmmStmts, noStmts, oneStmt, plusStmts, + mkStmts ) +import Id ( Id ) +import DataCon ( DataCon ) import TyCon ( tyConPrimRep ) -import PrimRep ( PrimRep(..), isFollowableRep ) -import CmdLineOpts ( opt_GranMacros ) +import CostCentre ( CostCentreStack ) +import Util ( mapAccumL, filterOut ) +import Constants ( wORD_SIZE ) import Outputable -#ifdef DEBUG -import PprAbsC ( pprMagicId ) -#endif import GLAEXTS + +\end{code} + + +%************************************************************************ +%* * +\subsection[CgUsages-heapery]{Monad things for fiddling with heap usage} +%* * +%************************************************************************ + +The heap always grows upwards, so hpRel is easy + +\begin{code} +hpRel :: VirtualHpOffset -- virtual offset of Hp + -> VirtualHpOffset -- virtual offset of The Thing + -> WordOff -- integer word offset +hpRel hp off = off - hp +\end{code} + +@initHeapUsage@ applies a function to the amount of heap that it uses. +It initialises the heap usage to zeros, and passes on an unchanged +heap usage. + +It is usually a prelude to performing a GC check, so everything must +be in a tidy and consistent state. + +rje: Note the slightly suble fixed point behaviour needed here + +\begin{code} +initHeapUsage :: (VirtualHpOffset -> Code) -> Code +initHeapUsage fcode + = do { orig_hp_usage <- getHpUsage + ; setHpUsage initHpUsage + ; fixC (\heap_usage2 -> do + { fcode (heapHWM heap_usage2) + ; getHpUsage }) + ; setHpUsage orig_hp_usage } + +setVirtHp :: VirtualHpOffset -> Code +setVirtHp new_virtHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {virtHp = new_virtHp}) } + +getVirtHp :: FCode VirtualHpOffset +getVirtHp + = do { hp_usage <- getHpUsage + ; return (virtHp hp_usage) } + +setRealHp :: VirtualHpOffset -> Code +setRealHp new_realHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {realHp = new_realHp}) } + +getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr +getHpRelOffset virtual_offset + = do { hp_usg <- getHpUsage + ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) } +\end{code} + + +%************************************************************************ +%* * + Layout of heap objects +%* * +%************************************************************************ + +\begin{code} +layOutDynConstr, layOutStaticConstr + :: DataCon + -> [(CgRep,a)] + -> (ClosureInfo, + [(a,VirtualHpOffset)]) + +layOutDynConstr = layOutConstr False +layOutStaticConstr = layOutConstr True + +layOutConstr is_static data_con args + = (mkConInfo is_static data_con tot_wds ptr_wds, + things_w_offsets) + where + (tot_wds, -- #ptr_wds + #nonptr_wds + ptr_wds, -- #ptr_wds + things_w_offsets) = mkVirtHeapOffsets args +\end{code} + +@mkVirtHeapOffsets@ always returns boxed things with smaller offsets +than the unboxed things, and furthermore, the offsets in the result +list + +\begin{code} +mkVirtHeapOffsets + :: [(CgRep,a)] -- Things to make offsets for + -> (WordOff, -- *Total* number of words allocated + WordOff, -- Number of words allocated for *pointers* + [(a, VirtualHpOffset)]) + -- Things with their offsets from start of + -- object in order of increasing offset + +-- First in list gets lowest offset, which is initial offset + 1. + +mkVirtHeapOffsets things + = let non_void_things = filterOut (isVoidArg . fst) things + (ptrs, non_ptrs) = separateByPtrFollowness non_void_things + (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs + (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs + in + (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets) + where + computeOffset wds_so_far (rep, thing) + = (wds_so_far + cgRepSizeW rep, (thing, fixedHdrSize + wds_so_far)) +\end{code} + + +%************************************************************************ +%* * + Lay out a static closure +%* * +%************************************************************************ + +Make a static closure, adding on any extra padding needed for CAFs, +and adding a static link field if necessary. + +\begin{code} +mkStaticClosureFields + :: ClosureInfo + -> CostCentreStack + -> Bool -- Has CAF refs + -> [CmmLit] -- Payload + -> [CmmLit] -- The full closure +mkStaticClosureFields cl_info ccs caf_refs payload + = mkStaticClosure info_lbl ccs payload padding_wds static_link_field + where + info_lbl = infoTableLabelFromCI cl_info + + upd_reqd = closureUpdReqd cl_info + + -- for the purposes of laying out the static closure, we consider all + -- thunks to be "updatable", so that the static link field is always + -- in the same place. + padding_wds + | not upd_reqd = [] + | otherwise = replicate n (mkIntCLit 0) -- a bunch of 0s + where n = max 0 (mIN_UPD_SIZE - length payload) + + -- We always have a static link field for a thunk, it's used to + -- save the closure's info pointer when we're reverting CAFs + -- (see comment in Storage.c) + static_link_field + | upd_reqd || staticClosureNeedsLink cl_info = [static_link_value] + | otherwise = [] + + -- for a static constructor which has NoCafRefs, we set the + -- static link field to a non-zero value so the garbage + -- collector will ignore it. + static_link_value + | caf_refs = mkIntCLit 0 + | otherwise = mkIntCLit 1 + +mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] + -> [CmmLit] -> [CmmLit] -> [CmmLit] +mkStaticClosure info_lbl ccs payload padding_wds static_link_field + = [CmmLabel info_lbl] + ++ variable_header_words + ++ payload + ++ padding_wds + ++ static_link_field + where + variable_header_words + = staticGranHdr + ++ staticParHdr + ++ staticProfHdr ccs + ++ staticTickyHdr \end{code} %************************************************************************ @@ -54,86 +245,53 @@ beginning of every slow entry code in order to simulate the fetching of closures. If fetching is necessary (i.e. current closure is not local) then an automatic context switch is done. ------------------------------------------------------------------------------ +-------------------------------------------------------------- A heap/stack check at a function or thunk entry point. \begin{code} -funEntryChecks :: Maybe CLabel -> AbstractC -> Code -> Code -funEntryChecks closure_lbl reg_save_code code - = hpStkCheck closure_lbl True reg_save_code code - -thunkChecks :: Maybe CLabel -> Code -> Code -thunkChecks closure_lbl code - = hpStkCheck closure_lbl False AbsCNop code - -hpStkCheck - :: Maybe CLabel -- function closure - -> Bool -- is a function? (not a thunk) - -> AbstractC -- register saves - -> Code - -> Code - -hpStkCheck closure_lbl is_fun reg_save_code code - = getFinalStackHW (\ spHw -> - getRealSp `thenFC` \ sp -> - let stk_words = spHw - sp in - initHeapUsage (\ hHw -> - - getTickyCtrLabel `thenFC` \ ticky_ctr -> - - absC (checking_code stk_words hHw ticky_ctr) `thenC` - - setRealHp hHw `thenC` - code)) - +funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code +funEntryChecks cl_info reg_save_code code + = hpStkCheck cl_info True reg_save_code code + +thunkEntryChecks :: ClosureInfo -> Code -> Code +thunkEntryChecks cl_info code + = hpStkCheck cl_info False noStmts code + +hpStkCheck :: ClosureInfo -- Function closure + -> Bool -- Is a function? (not a thunk) + -> CmmStmts -- Register saves + -> Code + -> Code + +hpStkCheck cl_info is_fun reg_save_code code + = getFinalStackHW $ \ spHw -> do + { sp <- getRealSp + ; let stk_words = spHw - sp + ; initHeapUsage $ \ hpHw -> do + { -- Emit heap checks, but be sure to do it lazily so + -- that the conditionals on hpHw don't cause a black hole + codeOnly $ do + { do_checks stk_words hpHw full_save_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + } where - node_asst - | Just lbl <- closure_lbl = CAssign nodeReg (CLbl lbl PtrRep) - | otherwise = AbsCNop - - save_code = mkAbstractCs [node_asst, reg_save_code] - - checking_code stk hp ctr - = mkAbstractCs - [ if is_fun - then do_checks_fun stk hp save_code - else do_checks_np stk hp save_code, - if hp == 0 - then AbsCNop - else profCtrAbsC FSLIT("TICK_ALLOC_HEAP") - [ mkIntCLit hp, CLbl ctr DataPtrRep ] - ] - - --- For functions: - -do_checks_fun - :: Int -- stack headroom - -> Int -- heap headroom - -> AbstractC -- assignments to perform on failure - -> AbstractC -do_checks_fun 0 0 _ = AbsCNop -do_checks_fun 0 hp_words assts = - CCheck HP_CHK_FUN [ mkIntCLit hp_words ] assts -do_checks_fun stk_words 0 assts = - CCheck STK_CHK_FUN [ mkIntCLit stk_words ] assts -do_checks_fun stk_words hp_words assts = - CCheck HP_STK_CHK_FUN [ mkIntCLit stk_words, mkIntCLit hp_words ] assts - --- For thunks: - -do_checks_np - :: Int -- stack headroom - -> Int -- heap headroom - -> AbstractC -- assignments to perform on failure - -> AbstractC -do_checks_np 0 0 _ = AbsCNop -do_checks_np 0 hp_words assts = - CCheck HP_CHK_NP [ mkIntCLit hp_words ] assts -do_checks_np stk_words 0 assts = - CCheck STK_CHK_NP [ mkIntCLit stk_words ] assts -do_checks_np stk_words hp_words assts = - CCheck HP_STK_CHK_NP [ mkIntCLit stk_words, mkIntCLit hp_words ] assts + node_asst + | nodeMustPointToIt (closureLFInfo cl_info) + = noStmts + | otherwise + = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl))) + closure_lbl = closureLabelFromCI cl_info + + full_save_code = node_asst `plusStmts` reg_save_code + + rts_label | is_fun = CmmReg (CmmGlobal GCFun) + -- Function entry point + | otherwise = CmmReg (CmmGlobal GCEnter1) + -- Thunk or case return + -- In the thunk/case-return case, R1 points to a closure + -- which should be (re)-entered after GC \end{code} Heap checks in a case alternative are nice and easy, provided this is @@ -153,12 +311,6 @@ For primitive returns, we have an unlifted value in some register (either R1 or FloatReg1 or DblReg1). This means using specialised heap-check code for these cases. -For unboxed tuple returns, there are an arbitrary number of possibly -unboxed return values, some of which will be in registers, and the -others will be on the stack. We always organise the stack-resident -fields into pointers & non-pointers, and pass the number of each to -the heap check code. - \begin{code} altHeapCheck :: AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt @@ -166,150 +318,183 @@ altHeapCheck -> Code -- Continuation -> Code altHeapCheck alt_type code - = initHeapUsage (\ hHw -> - do_heap_chk hHw `thenC` - setRealHp hHw `thenC` - code) + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do + { do_checks 0 {- no stack chk -} hpHw + noStmts {- nothign to save -} + (rts_label alt_type) + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } where - do_heap_chk :: HeapOffset -> Code - do_heap_chk words_required - = getTickyCtrLabel `thenFC` \ ctr -> - absC ( -- NB The conditional is inside the absC, - -- so the monadic stuff doesn't depend on - -- the value of words_required! - if words_required == 0 - then AbsCNop - else mkAbstractCs - [ CCheck (checking_code alt_type) - [mkIntCLit words_required] AbsCNop, - profCtrAbsC FSLIT("TICK_ALLOC_HEAP") - [ mkIntCLit words_required, CLbl ctr DataPtrRep ] - ]) - - checking_code PolyAlt - = HP_CHK_UNPT_R1 -- Do *not* enter R1 after a heap check in - -- a polymorphic case. It might be a function - -- and the entry code for a function (currently) - -- applies it - -- - -- However R1 is guaranteed to be a pointer - - checking_code (AlgAlt tc) - = HP_CHK_NP -- Enter R1 after the heap check; it's a pointer - -- The "NP" is short for "Node (R1) Points to it" + rts_label PolyAlt = CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "stg_gc_unpt_r1"))) + -- Do *not* enter R1 after a heap check in + -- a polymorphic case. It might be a function + -- and the entry code for a function (currently) + -- applies it + -- + -- However R1 is guaranteed to be a pointer + + rts_label (AlgAlt tc) = stg_gc_enter1 + -- Enter R1 after the heap check; it's a pointer - checking_code (PrimAlt tc) - = case dataReturnConvPrim (tyConPrimRep tc) of - VoidReg -> HP_CHK_NOREGS - FloatReg 1# -> HP_CHK_F1 - DoubleReg 1# -> HP_CHK_D1 - LongReg _ 1# -> HP_CHK_L1 - VanillaReg rep 1# - | isFollowableRep rep -> HP_CHK_UNPT_R1 -- R1 is boxed but unlifted: - | otherwise -> HP_CHK_UNBX_R1 -- R1 is unboxed -#ifdef DEBUG - other_reg -> pprPanic "CgHeapery.altHeapCheck" (ppr tc <+> pprMagicId other_reg) -#endif - --- Unboxed tuple alternatives and let-no-escapes (the two most annoying --- constructs to generate code for!): + rts_label (PrimAlt tc) + = CmmLit $ CmmLabel $ + case primRepToCgRep (tyConPrimRep tc) of + VoidArg -> mkRtsCodeLabel SLIT( "stg_gc_noregs") + FloatArg -> mkRtsCodeLabel SLIT( "stg_gc_f1") + DoubleArg -> mkRtsCodeLabel SLIT( "stg_gc_d1") + LongArg -> mkRtsCodeLabel SLIT( "stg_gc_l1") + -- R1 is boxed but unlifted: + PtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unpt_r1") + -- R1 is unboxed: + NonPtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unbx_r1") + + rts_label (UbxTupAlt _) = panic "altHeapCheck" +\end{code} + +Unboxed tuple alternatives and let-no-escapes (the two most annoying +constructs to generate code for!) For unboxed tuple returns, there +are an arbitrary number of possibly unboxed return values, some of +which will be in registers, and the others will be on the stack. We +always organise the stack-resident fields into pointers & +non-pointers, and pass the number of each to the heap check code. + +\begin{code} unbxTupleHeapCheck - :: [MagicId] -- live registers - -> Int -- no. of stack slots containing ptrs - -> Int -- no. of stack slots containing nonptrs - -> AbstractC -- code to insert in the failure path + :: [(Id, GlobalReg)] -- Live registers + -> WordOff -- no. of stack slots containing ptrs + -> WordOff -- no. of stack slots containing nonptrs + -> CmmStmts -- code to insert in the failure path -> Code -> Code unbxTupleHeapCheck regs ptrs nptrs fail_code code - -- we can't manage more than 255 pointers/non-pointers in a generic - -- heap check. + -- We can't manage more than 255 pointers/non-pointers + -- in a generic heap check. | ptrs > 255 || nptrs > 255 = panic "altHeapCheck" - | otherwise = initHeapUsage (\ hHw -> do_heap_chk hHw `thenC` code) + | otherwise + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do { do_checks 0 {- no stack check -} hpHw + full_fail_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } where - do_heap_chk words_required - = getTickyCtrLabel `thenFC` \ ctr -> - absC ( if words_required == 0 - then AbsCNop - else mkAbstractCs - [ checking_code words_required, - profCtrAbsC FSLIT("TICK_ALLOC_HEAP") - [ mkIntCLit words_required, CLbl ctr DataPtrRep ] - ] - ) `thenC` - setRealHp words_required - - liveness = I# (word2Int# (mkRegLiveness regs ptrs nptrs)) - checking_code words_required = CCheck HP_CHK_UNBX_TUPLE - [mkIntCLit words_required, - mkIntCLit liveness] - fail_code - --- build up a bitmap of the live pointer registers - -#if __GLASGOW_HASKELL__ >= 503 -shiftL = uncheckedShiftL# -#else -shiftL = shiftL# -#endif - -mkRegLiveness :: [MagicId] -> Int -> Int -> Word# -mkRegLiveness [] (I# ptrs) (I# nptrs) = - (int2Word# nptrs `shiftL` 16#) `or#` (int2Word# ptrs `shiftL` 24#) -mkRegLiveness (VanillaReg rep i : regs) ptrs nptrs | isFollowableRep rep - = ((int2Word# 1#) `shiftL` (i -# 1#)) `or#` mkRegLiveness regs ptrs nptrs -mkRegLiveness (_ : regs) ptrs nptrs = mkRegLiveness regs ptrs nptrs - --- The two functions below are only used in a GranSim setup --- Emit macro for simulating a fetch and then reschedule - -fetchAndReschedule :: [MagicId] -- Live registers - -> Bool -- Node reqd? - -> Code - -fetchAndReschedule regs node_reqd = - if (node `elem` regs || node_reqd) - then fetch_code `thenC` reschedule_code - else absC AbsCNop - where - liveness_mask = mkRegLiveness regs 0 0 - reschedule_code = absC (CMacroStmt GRAN_RESCHEDULE [ - mkIntCLit (I# (word2Int# liveness_mask)), - mkIntCLit (if node_reqd then 1 else 0)]) - - --HWL: generate GRAN_FETCH macro for GrAnSim - -- currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai - fetch_code = absC (CMacroStmt GRAN_FETCH []) + full_fail_code = fail_code `plusStmts` oneStmt assign_liveness + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + (CmmLit (mkWordCLit liveness)) + liveness = mkRegLiveness regs ptrs nptrs + rts_label = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_ut"))) + \end{code} -The @GRAN_YIELD@ macro is taken from JSM's code for Concurrent Haskell. It -allows to context-switch at places where @node@ is not alive (it uses the -@Continue@ rather than the @EnterNodeCode@ function in the RTS). We emit -this kind of macro at the beginning of the following kinds of basic bocks: -\begin{itemize} - \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally - we use @fetchAndReschedule@ at a slow entry code. - \item Fast entry code (see @CgClosure.lhs@). - \item Alternatives in case expressions (@CLabelledCode@ structures), provided - that they are not inlined (see @CgCases.lhs@). These alternatives will - be turned into separate functions. -\end{itemize} + +%************************************************************************ +%* * + Heap/Stack Checks. +%* * +%************************************************************************ + +When failing a check, we save a return address on the stack and +jump to a pre-compiled code fragment that saves the live registers +and returns to the scheduler. + +The return address in most cases will be the beginning of the basic +block in which the check resides, since we need to perform the check +again on re-entry because someone else might have stolen the resource +in the meantime. \begin{code} -yield :: [MagicId] -- Live registers - -> Bool -- Node reqd? - -> Code - -yield regs node_reqd = - if opt_GranMacros && node_reqd - then yield_code - else absC AbsCNop - where - liveness_mask = mkRegLiveness regs 0 0 - yield_code = - absC (CMacroStmt GRAN_YIELD - [mkIntCLit (I# (word2Int# liveness_mask))]) +do_checks :: WordOff -- Stack headroom + -> WordOff -- Heap headroom + -> CmmStmts -- Assignments to perform on failure + -> CmmExpr -- Rts address to jump to on failure + -> Code +do_checks 0 0 _ _ = nopC +do_checks stk hp reg_save_code rts_lbl + = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE))) + (CmmLit (mkIntCLit (hp*wORD_SIZE))) + (stk /= 0) (hp /= 0) reg_save_code rts_lbl + +-- The offsets are now in *bytes* +do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl + = do { doGranAllocate hp_expr + + -- Emit a block for the heap-check-failure code + ; blk_id <- forkLabelledCode $ do + { whenC hp_nonzero $ + stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr) + ; emitStmts reg_save_code + ; stmtC (CmmJump rts_lbl []) } + + -- Check for stack overflow *FIRST*; otherwise + -- we might bumping Hp and then failing stack oflo + ; whenC stk_nonzero + (stmtC (CmmCondBranch stk_oflo blk_id)) + + ; whenC hp_nonzero + (stmtsC [CmmAssign hpReg + (cmmOffsetExprB (CmmReg hpReg) hp_expr), + CmmCondBranch hp_oflo blk_id]) + -- Bump heap pointer, and test for heap exhaustion + -- Note that we don't move the heap pointer unless the + -- stack check succeeds. Otherwise we might end up + -- with slop at the end of the current block, which can + -- confuse the LDV profiler. + } + where + -- Stk overflow if (Sp - stk_bytes < SpLim) + stk_oflo = CmmMachOp mo_wordULt + [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr], + CmmReg (CmmGlobal SpLim)] + + -- Hp overflow if (Hpp > HpLim) + -- (Hp has been incremented by now) + -- HpLim points to the LAST WORD of valid allocation space. + hp_oflo = CmmMachOp mo_wordUGt + [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] +\end{code} + +%************************************************************************ +%* * + Generic Heap/Stack Checks - used in the RTS +%* * +%************************************************************************ + +\begin{code} +hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code +hpChkGen bytes liveness reentry + = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +-- a heap check where R1 points to the closure to enter on return, and +-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). +hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code +hpChkNodePointsAssignSp0 bytes sp0 + = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1 + where assign = oneStmt (CmmStore (CmmReg spReg) sp0) + +stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code +stkChkGen bytes liveness reentry + = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +stkChkNodePoints :: CmmExpr -> Code +stkChkNodePoints bytes + = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1 + +stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_gen"))) +stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1) \end{code} %************************************************************************ @@ -324,47 +509,65 @@ to account for this. \begin{code} allocDynClosure :: ClosureInfo - -> CAddrMode -- Cost Centre to stick in the object - -> CAddrMode -- Cost Centre to blame for this alloc + -> CmmExpr -- Cost Centre to stick in the object + -> CmmExpr -- Cost Centre to blame for this alloc -- (usually the same; sometimes "OVERHEAD") - -> [(CAddrMode, VirtualHeapOffset)] -- Offsets from start of the object - -- ie Info ptr has offset zero. - -> FCode VirtualHeapOffset -- Returns virt offset of object + -> [(CmmExpr, VirtualHpOffset)] -- Offsets from start of the object + -- ie Info ptr has offset zero. + -> FCode VirtualHpOffset -- Returns virt offset of object -allocDynClosure closure_info use_cc blame_cc amodes_with_offsets - = getVirtAndRealHp `thenFC` \ (virtHp, realHp) -> +allocDynClosure cl_info use_cc blame_cc amodes_with_offsets + = do { virt_hp <- getVirtHp -- FIND THE OFFSET OF THE INFO-PTR WORD - -- virtHp points to last allocated word, ie 1 *before* the - -- info-ptr word of new object. - let info_offset = virtHp + 1 - - -- do_move IS THE ASSIGNMENT FUNCTION - do_move (amode, offset_from_start) - = CAssign (CVal (hpRel realHp - (info_offset + offset_from_start)) - (getAmodeRep amode)) - amode - in + ; let info_offset = virt_hp + 1 + -- info_offset is the VirtualHpOffset of the first + -- word of the new object + -- Remember, virtHp points to last allocated word, + -- ie 1 *before* the info-ptr word of new object. + + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..] + -- SAY WHAT WE ARE ABOUT TO DO - profCtrC (allocProfilingMsg closure_info) - [mkIntCLit (closureGoodStuffSize closure_info), - mkIntCLit slop_size] `thenC` + ; profDynAlloc cl_info use_cc + -- ToDo: This is almost certainly wrong + -- We're ignoring blame_cc. But until we've + -- fixed the boxing hack in chooseDynCostCentres etc, + -- we're worried about making things worse by "fixing" + -- this part to use blame_cc! - -- GENERATE THE CODE - absC ( mkAbstractCs ( - [ CInitHdr closure_info - (CAddr (hpRel realHp info_offset)) - use_cc closure_size ] - ++ (map do_move amodes_with_offsets))) `thenC` + ; tickyDynAlloc cl_info - -- BUMP THE VIRTUAL HEAP POINTER - setVirtHp (virtHp + closure_size) `thenC` + -- ALLOCATE THE OBJECT + ; base <- getHpRelOffset info_offset + ; hpStore base (hdr_w_offsets ++ amodes_with_offsets) + -- BUMP THE VIRTUAL HEAP POINTER + ; setVirtHp (virt_hp + closureSize cl_info) + -- RETURN PTR TO START OF OBJECT - returnFC info_offset - where - closure_size = closureSize closure_info - slop_size = slopSize closure_info + ; returnFC info_offset } + + +initDynHdr :: CmmExpr + -> CmmExpr -- Cost centre to put in object + -> [CmmExpr] +initDynHdr info_ptr cc + = [info_ptr] + -- ToDo: Gransim stuff + -- ToDo: Parallel stuff + ++ dynProfHdr cc + -- No ticky header + +hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code +-- Store the item (expr,off) in base[off] +hpStore base es + = stmtsC [ CmmStore (cmmOffsetW base off) val + | (val, off) <- es ] + +emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code +emitSetDynHdr base info_ptr ccs + = hpStore base (zip (initDynHdr info_ptr ccs) [0..]) \end{code} diff --git a/ghc/compiler/codeGen/CgInfoTbls.hs b/ghc/compiler/codeGen/CgInfoTbls.hs new file mode 100644 index 0000000000..2f1007384f --- /dev/null +++ b/ghc/compiler/codeGen/CgInfoTbls.hs @@ -0,0 +1,538 @@ +----------------------------------------------------------------------------- +-- +-- Building info tables. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgInfoTbls ( + emitClosureCodeAndInfoTable, + emitInfoTableAndCode, + dataConTagZ, + getSRTInfo, + emitDirectReturnTarget, emitAlgReturnTarget, + emitDirectReturnInstr, emitVectoredReturnInstr, + mkRetInfoTable, + mkStdInfoTable, + mkFunGenInfoExtraBits, + entryCode, closureInfoPtr, + getConstrTag, + infoTable, infoTableClosureType, + infoTablePtrs, infoTableNonPtrs, + funInfoTable, + vectorSlot, + ) where + + +#include "HsVersions.h" + +import ClosureInfo ( ClosureInfo, closureTypeDescr, closureName, + infoTableLabelFromCI, Liveness, + closureValDescr, closureSRT, closureSMRep, + closurePtrsSize, closureNonHdrSize, closureFunInfo, + C_SRT(..), needsSRT, isConstrClosure_maybe, + ArgDescr(..) ) +import SMRep ( StgHalfWord, hALF_WORD_SIZE_IN_BITS, hALF_WORD_SIZE, + WordOff, ByteOff, + smRepClosureTypeInt, tablesNextToCode, + rET_BIG, rET_SMALL, rET_VEC_BIG, rET_VEC_SMALL ) +import CgBindery ( getLiveStackSlots ) +import CgCallConv ( isBigLiveness, mkLivenessCLit, buildContLiveness, + argDescrType, getSequelAmode, + CtrlReturnConvention(..) ) +import CgUtils ( mkStringCLit, packHalfWordsCLit, mkWordCLit, + cmmOffsetB, cmmOffsetExprW, cmmLabelOffW, cmmOffsetW, + emitDataLits, emitRODataLits, emitSwitch, cmmNegate ) +import CgMonad + +import CmmUtils ( mkIntCLit, zeroCLit ) +import Cmm ( CmmStmt(..), CmmExpr(..), CmmLit(..), LocalReg, + CmmBasicBlock, nodeReg ) +import MachOp ( MachOp(..), wordRep, halfWordRep ) +import CLabel +import StgSyn ( SRT(..) ) +import Name ( Name ) +import DataCon ( DataCon, dataConTag, fIRST_TAG ) +import Unique ( Uniquable(..) ) +import CmdLineOpts ( opt_SccProfilingOn ) +import ListSetOps ( assocDefault ) +import Maybes ( isJust ) +import Constants ( wORD_SIZE, sIZEOF_StgFunInfoExtra ) +import Outputable + + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a closure +-- +------------------------------------------------------------------------- + +-- Here we make a concrete info table, represented as a list of CmmAddr +-- (it can't be simply a list of Word, because the SRT field is +-- represented by a label+offset expression). + +-- With tablesNextToCode, the layout is +-- <reversed variable part> +-- <normal forward StgInfoTable, but without +-- an entry point at the front> +-- <code> +-- +-- Without tablesNextToCode, the layout of an info table is +-- <entry label> +-- <normal forward rest of StgInfoTable> +-- <forward variable part> +-- +-- See includes/InfoTables.h + +emitClosureCodeAndInfoTable :: ClosureInfo -> [LocalReg] -> CgStmts -> Code +emitClosureCodeAndInfoTable cl_info args body + = do { ty_descr_lit <- + if opt_SccProfilingOn + then mkStringCLit (closureTypeDescr cl_info) + else return (mkIntCLit 0) + ; cl_descr_lit <- + if opt_SccProfilingOn + then mkStringCLit cl_descr_string + else return (mkIntCLit 0) + ; let std_info = mkStdInfoTable ty_descr_lit cl_descr_lit + cl_type srt_len layout_lit + + ; blks <- cgStmtsToBlocks body + ; emitInfoTableAndCode info_lbl std_info extra_bits args blks } + where + info_lbl = infoTableLabelFromCI cl_info + + cl_descr_string = closureValDescr cl_info + cl_type = smRepClosureTypeInt (closureSMRep cl_info) + + srt = closureSRT cl_info + needs_srt = needsSRT srt + + mb_con = isConstrClosure_maybe cl_info + is_con = isJust mb_con + + (srt_label,srt_len) + = case mb_con of + Just con -> -- Constructors don't have an SRT + -- We keep the *zero-indexed* tag in the srt_len + -- field of the info table. + (mkIntCLit 0, fromIntegral (dataConTagZ con)) + + Nothing -> -- Not a constructor + srtLabelAndLength srt + + ptrs = closurePtrsSize cl_info + nptrs = size - ptrs + size = closureNonHdrSize cl_info + layout_lit = packHalfWordsCLit ptrs nptrs + + extra_bits + | is_fun = fun_extra_bits + | is_con = [] + | needs_srt = [srt_label] + | otherwise = [] + + maybe_fun_stuff = closureFunInfo cl_info + is_fun = isJust maybe_fun_stuff + (Just (arity, arg_descr)) = maybe_fun_stuff + + fun_extra_bits + | ArgGen liveness <- arg_descr + = [ fun_amode, + srt_label, + mkLivenessCLit liveness, + CmmLabel (mkSlowEntryLabel (closureName cl_info)) ] + | needs_srt = [fun_amode, srt_label] + | otherwise = [fun_amode] + + fun_amode = packHalfWordsCLit fun_type arity + fun_type = argDescrType arg_descr + +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + +-- A low-level way to generate the variable part of a fun-style info table. +-- (must match fun_extra_bits above). Used by the C-- parser. +mkFunGenInfoExtraBits :: Int -> Int -> CmmLit -> CmmLit -> CmmLit -> [CmmLit] +mkFunGenInfoExtraBits fun_type arity srt_label liveness slow_entry + = [ packHalfWordsCLit fun_type arity, + srt_label, + liveness, + slow_entry ] + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a return point +-- +------------------------------------------------------------------------- + +-- Here's the layout of a return-point info table +-- +-- Tables next to code: +-- +-- <reversed vector table> +-- <srt slot> +-- <standard info table> +-- ret-addr --> <entry code (if any)> +-- +-- Not tables-next-to-code: +-- +-- ret-addr --> <ptr to entry code> +-- <standard info table> +-- <srt slot> +-- <forward vector table> +-- +-- * The vector table is only present for vectored returns +-- +-- * The SRT slot is only there if either +-- (a) there is SRT info to record, OR +-- (b) if the return is vectored +-- The latter (b) is necessary so that the vector is in a +-- predictable place + +vectorSlot :: CmmExpr -> CmmExpr -> CmmExpr +-- Get the vector slot from the info pointer +vectorSlot info_amode zero_indexed_tag + | tablesNextToCode + = cmmOffsetExprW (cmmOffsetW info_amode (- (stdInfoTableSizeW + 2))) + (cmmNegate zero_indexed_tag) + -- The "2" is one for the SRT slot, and one more + -- to get to the first word of the vector + + | otherwise + = cmmOffsetExprW (cmmOffsetW info_amode (stdInfoTableSizeW + 2)) + zero_indexed_tag + -- The "2" is one for the entry-code slot and one for the SRT slot + + +emitReturnTarget + :: Name + -> CgStmts -- The direct-return code (if any) + -- (empty for vectored returns) + -> [CLabel] -- Vector of return points + -- (empty for non-vectored returns) + -> SRT + -> FCode CLabel +emitReturnTarget name stmts vector srt + = do { live_slots <- getLiveStackSlots + ; liveness <- buildContLiveness name live_slots + ; srt_info <- getSRTInfo name srt + + ; let + cl_type = case (null vector, isBigLiveness liveness) of + (True, True) -> rET_BIG + (True, False) -> rET_SMALL + (False, True) -> rET_VEC_BIG + (False, False) -> rET_VEC_SMALL + + (std_info, extra_bits) = + mkRetInfoTable liveness srt_info cl_type vector + + ; blks <- cgStmtsToBlocks stmts + ; emitInfoTableAndCode info_lbl std_info extra_bits args blks + ; return info_lbl } + where + args = trace "emitReturnTarget: missing args" [] + uniq = getUnique name + info_lbl = mkReturnInfoLabel uniq + + +mkRetInfoTable + :: Liveness -- liveness + -> C_SRT -- SRT Info + -> Int -- type (eg. rET_SMALL) + -> [CLabel] -- vector + -> ([CmmLit],[CmmLit]) +mkRetInfoTable liveness srt_info cl_type vector + = (std_info, extra_bits) + where + (srt_label, srt_len) = srtLabelAndLength srt_info + + srt_slot | need_srt = [srt_label] + | otherwise = [] + + need_srt = needsSRT srt_info || not (null vector) + -- If there's a vector table then we must allocate + -- an SRT slot, so that the vector table is at a + -- known offset from the info pointer + + liveness_lit = mkLivenessCLit liveness + std_info = mkStdInfoTable zeroCLit zeroCLit cl_type srt_len liveness_lit + extra_bits = srt_slot ++ map CmmLabel vector + + +emitDirectReturnTarget + :: Name + -> CgStmts -- The direct-return code + -> SRT + -> FCode CLabel +emitDirectReturnTarget name code srt + = emitReturnTarget name code [] srt + +emitAlgReturnTarget + :: Name -- Just for its unique + -> [(ConTagZ, CgStmts)] -- Tagged branches + -> Maybe CgStmts -- Default branch (if any) + -> SRT -- Continuation's SRT + -> CtrlReturnConvention + -> FCode (CLabel, SemiTaggingStuff) + +emitAlgReturnTarget name branches mb_deflt srt ret_conv + = case ret_conv of + UnvectoredReturn fam_sz -> do + { blks <- getCgStmts $ + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) + -- NB: tag_expr is zero-based + ; lbl <- emitDirectReturnTarget name blks srt + ; return (lbl, Nothing) } + -- Nothing: the internal branches in the switch don't have + -- global labels, so we can't use them at the 'call site' + + VectoredReturn fam_sz -> do + { tagged_lbls <- mapFCs emit_alt branches + ; deflt_lbl <- emit_deflt mb_deflt + ; let vector = [ assocDefault deflt_lbl tagged_lbls i + | i <- [0..fam_sz-1]] + ; lbl <- emitReturnTarget name noCgStmts vector srt + ; return (lbl, Just (tagged_lbls, deflt_lbl)) } + where + uniq = getUnique name + tag_expr = getConstrTag (CmmReg nodeReg) + + emit_alt :: (Int, CgStmts) -> FCode (Int, CLabel) + -- Emit the code for the alternative as a top-level + -- code block returning a label for it + emit_alt (tag, stmts) = do { let lbl = mkAltLabel uniq tag + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks + ; return (tag, lbl) } + + emit_deflt (Just stmts) = do { let lbl = mkDefaultLabel uniq + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks + ; return lbl } + emit_deflt Nothing = return mkErrorStdEntryLabel + -- Nothing case: the simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation the default should never be taken, + -- so we just use mkErrorStdEntryLabel + +-------------------------------- +emitDirectReturnInstr :: Code +emitDirectReturnInstr + = do { info_amode <- getSequelAmode + ; stmtC (CmmJump (entryCode info_amode) []) } + +emitVectoredReturnInstr :: CmmExpr -- *Zero-indexed* constructor tag + -> Code +emitVectoredReturnInstr zero_indexed_tag + = do { info_amode <- getSequelAmode + ; let slot = vectorSlot info_amode zero_indexed_tag + ; stmtC (CmmJump (CmmLoad slot wordRep) []) } + + + +------------------------------------------------------------------------- +-- +-- Generating a standard info table +-- +------------------------------------------------------------------------- + +-- The standard bits of an info table. This part of the info table +-- corresponds to the StgInfoTable type defined in InfoTables.h. +-- +-- Its shape varies with ticky/profiling/tables next to code etc +-- so we can't use constant offsets from Constants + +mkStdInfoTable + :: CmmLit -- closure type descr (profiling) + -> CmmLit -- closure descr (profiling) + -> Int -- closure type + -> StgHalfWord -- SRT length + -> CmmLit -- layout field + -> [CmmLit] + +mkStdInfoTable type_descr closure_descr cl_type srt_len layout_lit + = -- Parallel revertible-black hole field + prof_info + -- Ticky info (none at present) + -- Debug info (none at present) + ++ [layout_lit, type_lit] + + where + prof_info + | opt_SccProfilingOn = [closure_descr, type_descr] + | otherwise = [] + + type_lit = packHalfWordsCLit cl_type srt_len + +stdInfoTableSizeW :: WordOff +-- The size of a standard info table varies with profiling/ticky etc, +-- so we can't get it from Constants +-- It must vary in sync with mkStdInfoTable +stdInfoTableSizeW + = size_fixed + size_prof + where + size_fixed = 2 -- layout, type + size_prof | opt_SccProfilingOn = 2 + | otherwise = 0 + +stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE :: ByteOff + +stdSrtBitmapOffset :: ByteOff +-- Byte offset of the SRT bitmap half-word which is +-- in the *higher-addressed* part of the type_lit +stdSrtBitmapOffset = stdInfoTableSizeB - hALF_WORD_SIZE + +stdClosureTypeOffset :: ByteOff +-- Byte offset of the closure type half-word +stdClosureTypeOffset = stdInfoTableSizeB - wORD_SIZE + +stdPtrsOffset, stdNonPtrsOffset :: ByteOff +stdPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE +stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE + +------------------------------------------------------------------------- +-- +-- Accessing fields of an info table +-- +------------------------------------------------------------------------- + +closureInfoPtr :: CmmExpr -> CmmExpr +-- Takes a closure pointer and returns the info table pointer +closureInfoPtr e = CmmLoad e wordRep + +entryCode :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns its entry code +entryCode e | tablesNextToCode = e + | otherwise = CmmLoad e wordRep + +getConstrTag :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the *zero-indexed* +-- constructor tag obtained from the info table +-- This lives in the SRT field of the info table +-- (constructors don't need SRTs). +getConstrTag closure_ptr + = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableConstrTag info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +infoTable :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns a pointer to the first word of the standard-form +-- info table, excluding the entry-code word (if present) +infoTable info_ptr + | tablesNextToCode = cmmOffsetB info_ptr (- stdInfoTableSizeB) + | otherwise = cmmOffsetW info_ptr 1 -- Past the entry code pointer + +infoTableConstrTag :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the constr tag +-- field of the info table (same as the srt_bitmap field) +infoTableConstrTag = infoTableSrtBitmap + +infoTableSrtBitmap :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the srt_bitmap +-- field of the info table +infoTableSrtBitmap info_tbl + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) halfWordRep + +infoTableClosureType :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the closure type +-- field of the info table. +infoTableClosureType info_tbl + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) halfWordRep + +infoTablePtrs :: CmmExpr -> CmmExpr +infoTablePtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) halfWordRep + +infoTableNonPtrs :: CmmExpr -> CmmExpr +infoTableNonPtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) halfWordRep + +funInfoTable :: CmmExpr -> CmmExpr +-- Takes the info pointer of a function, +-- and returns a pointer to the first word of the StgFunInfoExtra struct +-- in the info table. +funInfoTable info_ptr + | tablesNextToCode + = cmmOffsetB info_ptr (- stdInfoTableSizeB - sIZEOF_StgFunInfoExtra) + | otherwise + = cmmOffsetW info_ptr (1 + stdInfoTableSizeW) + -- Past the entry code pointer + +------------------------------------------------------------------------- +-- +-- Emit the code for a closure (or return address) +-- and its associated info table +-- +------------------------------------------------------------------------- + +-- The complication here concerns whether or not we can +-- put the info table next to the code + +emitInfoTableAndCode + :: CLabel -- Label of info table + -> [CmmLit] -- ...its invariant part + -> [CmmLit] -- ...and its variant part + -> [LocalReg] -- ...args + -> [CmmBasicBlock] -- ...and body + -> Code + +emitInfoTableAndCode info_lbl std_info extra_bits args blocks + | tablesNextToCode -- Reverse the extra_bits; and emit the top-level proc + = emitProc (reverse extra_bits ++ std_info) + entry_lbl args blocks + -- NB: the info_lbl is discarded + + | null blocks -- No actual code; only the info table is significant + = -- Use a zero place-holder in place of the + -- entry-label in the info table + emitRODataLits info_lbl (zeroCLit : std_info ++ extra_bits) + + | otherwise -- Separately emit info table (with the function entry + = -- point as first entry) and the entry code + do { emitDataLits info_lbl (CmmLabel entry_lbl : std_info ++ extra_bits) + ; emitProc [] entry_lbl args blocks } + + where + entry_lbl = infoLblToEntryLbl info_lbl + +------------------------------------------------------------------------- +-- +-- Static reference tables +-- +------------------------------------------------------------------------- + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTInfo :: Name -> SRT -> FCode C_SRT +getSRTInfo id NoSRT = return NoC_SRT +getSRTInfo id (SRT off len bmp) + | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] + = do { srt_lbl <- getSRTLabel + ; let srt_desc_lbl = mkSRTDescLabel id + ; emitRODataLits srt_desc_lbl + ( cmmLabelOffW srt_lbl off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + ; return (C_SRT srt_desc_lbl 0 srt_escape) } + + | otherwise + = do { srt_lbl <- getSRTLabel + ; return (C_SRT srt_lbl off (fromIntegral (head bmp))) } + -- The fromIntegral converts to StgHalfWord + +srt_escape = (-1) :: StgHalfWord + +srtLabelAndLength :: C_SRT -> (CmmLit, StgHalfWord) +srtLabelAndLength NoC_SRT = (zeroCLit, 0) +srtLabelAndLength (C_SRT lbl off bitmap) = (cmmLabelOffW lbl off, bitmap) + diff --git a/ghc/compiler/codeGen/CgLetNoEscape.lhs b/ghc/compiler/codeGen/CgLetNoEscape.lhs index 80b80ee6b2..3ea05974f6 100644 --- a/ghc/compiler/codeGen/CgLetNoEscape.lhs +++ b/ghc/compiler/codeGen/CgLetNoEscape.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1993-1998 % -% $Id: CgLetNoEscape.lhs,v 1.24 2003/07/21 11:01:07 simonmar Exp $ +% $Id: CgLetNoEscape.lhs,v 1.25 2004/08/13 13:06:03 simonmar Exp $ % %******************************************************** %* * @@ -18,21 +18,23 @@ import {-# SOURCE #-} CgExpr ( cgExpr ) import StgSyn import CgMonad -import AbsCSyn import CgBindery ( CgIdInfo, letNoEscapeIdInfo, nukeDeadBindings ) -import CgCase ( mkRetDirectTarget, restoreCurrentCostCentre ) +import CgCase ( restoreCurrentCostCentre ) import CgCon ( bindUnboxedTupleComponents ) import CgHeapery ( unbxTupleHeapCheck ) -import CgStackery ( allocStackTop, deAllocStackTop ) -import CgUsages ( getSpRelOffset ) +import CgInfoTbls ( emitDirectReturnTarget ) +import CgStackery ( allocStackTop, deAllocStackTop, getSpRelOffset ) +import Cmm ( CmmStmt(..) ) +import CmmUtils ( mkLblExpr, oneStmt ) import CLabel ( mkReturnInfoLabel ) import ClosureInfo ( mkLFLetNoEscape ) import CostCentre ( CostCentreStack ) -import Id ( Id ) +import Id ( Id, idName ) import Var ( idUnique ) -import PrimRep ( PrimRep(..), retPrimRepSize ) +import SMRep ( retAddrSizeW ) import BasicTypes ( RecFlag(..) ) +import Outputable \end{code} %************************************************************************ @@ -156,25 +158,23 @@ cgLetNoEscapeClosure arity = length args lf_info = mkLFLetNoEscape arity in - -- saveVolatileVarsAndRegs done earlier in cgExpr. - forkEvalHelp - rhs_eob_info + do { (vSp, _) <- forkEvalHelp rhs_eob_info + + (do { allocStackTop retAddrSizeW + ; nukeDeadBindings full_live_in_rhss }) - (allocStackTop retPrimRepSize `thenFC` \_ -> - nukeDeadBindings full_live_in_rhss) + (do { deAllocStackTop retAddrSizeW + ; abs_c <- forkProc $ cgLetNoEscapeBody bndr cc + cc_slot args body - (deAllocStackTop retPrimRepSize `thenFC` \_ -> - forkAbsC ( - cgLetNoEscapeBody bndr cc cc_slot args body - ) `thenFC` \ abs_c -> - mkRetDirectTarget bndr abs_c srt - -- Ignore the label that comes back from - -- mkRetDirectTarget. It must be conjured up elswhere - ) `thenFC` \ (vSp, _) -> + -- Ignore the label that comes back from + -- mkRetDirectTarget. It must be conjured up elswhere + ; emitDirectReturnTarget (idName bndr) abs_c srt + ; return () }) - returnFC (bndr, letNoEscapeIdInfo bndr vSp lf_info) + ; returnFC (bndr, letNoEscapeIdInfo bndr vSp lf_info) } \end{code} \begin{code} @@ -185,28 +185,28 @@ cgLetNoEscapeBody :: Id -- Name of the joint point -> StgExpr -- Body -> Code -cgLetNoEscapeBody bndr cc cc_slot all_args body - = bindUnboxedTupleComponents all_args `thenFC` \ (arg_regs, ptrs, nptrs, ret_slot) -> +cgLetNoEscapeBody bndr cc cc_slot all_args body = do + { (arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents all_args -- restore the saved cost centre. BUT: we must not free the stack slot -- containing the cost centre, because it might be needed for a -- recursive call to this let-no-escape. - restoreCurrentCostCentre cc_slot False{-don't free-} `thenC` + ; restoreCurrentCostCentre cc_slot False{-don't free-} -- Enter the closures cc, if required - --enterCostCentreCode closure_info cc IsFunction `thenC` + ; -- enterCostCentreCode closure_info cc IsFunction -- The "return address" slot doesn't have a return address in it; -- but the heap-check needs it filled in if the heap-check fails. -- So we pass code to fill it in to the heap-check macro - getSpRelOffset ret_slot `thenFC` \ sp_rel -> - let lbl = mkReturnInfoLabel (idUnique bndr) - frame_hdr_asst = CAssign (CVal sp_rel RetRep) (CLbl lbl RetRep) - in + ; sp_rel <- getSpRelOffset ret_slot + + ; let lbl = mkReturnInfoLabel (idUnique bndr) + frame_hdr_asst = oneStmt (CmmStore sp_rel (mkLblExpr lbl)) -- Do heap check [ToDo: omit for non-recursive case by recording in -- in envt and absorbing at call site] - unbxTupleHeapCheck arg_regs ptrs nptrs frame_hdr_asst ( - cgExpr body - ) + ; unbxTupleHeapCheck arg_regs ptrs nptrs frame_hdr_asst + (cgExpr body) + } \end{code} diff --git a/ghc/compiler/codeGen/CgMonad.lhs b/ghc/compiler/codeGen/CgMonad.lhs index 88083f7536..003be9701c 100644 --- a/ghc/compiler/codeGen/CgMonad.lhs +++ b/ghc/compiler/codeGen/CgMonad.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgMonad.lhs,v 1.39 2003/07/02 13:12:38 simonpj Exp $ +% $Id: CgMonad.lhs,v 1.40 2004/08/13 13:06:03 simonmar Exp $ % \section[CgMonad]{The code generation monad} @@ -14,56 +14,64 @@ module CgMonad ( FCode, -- type initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, - returnFC, fixC, absC, nopC, getAbsC, + returnFC, fixC, checkedAbsC, + stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC, + newUnique, newUniqSupply, + CgStmts, emitCgStmts, forkCgStmts, cgStmtsToBlocks, + getCgStmts', getCgStmts, + noCgStmts, oneCgStmt, consCgStmt, + + getCmm, + emitData, emitProc, emitSimpleProc, + + forkLabelledCode, forkClosureBody, forkStatics, forkAlts, forkEval, - forkEvalHelp, forkAbsC, - SemiTaggingStuff, + forkEvalHelp, forkProc, codeOnly, + SemiTaggingStuff, ConTagZ, EndOfBlockInfo(..), setEndOfBlockInfo, getEndOfBlockInfo, - setSRTLabel, getSRTLabel, getSRTInfo, + setSRTLabel, getSRTLabel, setTickyCtrLabel, getTickyCtrLabel, - StackUsage, Slot(..), HeapUsage, - - profCtrC, profCtrAbsC, ldvEnter, + StackUsage(..), HeapUsage(..), + VirtualSpOffset, VirtualHpOffset, + initStkUsage, initHpUsage, + getHpUsage, setHpUsage, + heapHWM, - costCentresC, moduleName, + moduleName, Sequel(..), -- ToDo: unabstract? - sequelToAmode, -- ideally we wouldn't export these, but some other modules access internal state getState, setState, getInfoDown, -- more localised access to monad state - getUsage, setUsage, + getStkUsage, setStkUsage, getBinds, setBinds, getStaticBinds, -- out of general friendliness, we also export ... - CgInfoDownwards(..), CgState(..), -- non-abstract - CompilationInfo(..) + CgInfoDownwards(..), CgState(..) -- non-abstract ) where #include "HsVersions.h" import {-# SOURCE #-} CgBindery ( CgBindings, nukeVolatileBinds ) -import {-# SOURCE #-} CgUsages ( getSpRelOffset ) -import AbsCSyn +import Cmm +import CmmUtils ( CmmStmts, isNopStmt ) import CLabel -import StgSyn ( SRT(..) ) -import AbsCUtils ( mkAbsCStmts ) -import CmdLineOpts ( opt_SccProfilingOn, opt_DoTickyProfiling ) +import SMRep ( WordOff ) import Module ( Module ) -import DataCon ( ConTag ) import Id ( Id ) -import Name ( Name ) import VarEnv -import PrimRep ( PrimRep(..) ) -import SMRep ( StgHalfWord, hALF_WORD ) +import OrdList +import Unique ( Unique ) +import Util ( mapAccumL ) +import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply, uniqFromSupply ) import FastString import Outputable @@ -83,29 +91,46 @@ along. \begin{code} data CgInfoDownwards -- information only passed *downwards* by the monad - = MkCgInfoDown - CompilationInfo -- COMPLETELY STATIC info about this compilation - -- (e.g., what flags were passed to the compiler) - - CgBindings -- [Id -> info] : static environment - - CLabel -- label of the current SRT - - CLabel -- current destination for ticky counts - - EndOfBlockInfo -- Info for stuff to do at end of basic block: - - -data CompilationInfo - = MkCompInfo - Module -- the module name + = MkCgInfoDown { + cgd_mod :: Module, -- Module being compiled + cgd_statics :: CgBindings, -- [Id -> info] : static environment + cgd_srt :: CLabel, -- label of the current SRT + cgd_ticky :: CLabel, -- current destination for ticky counts + cgd_eob :: EndOfBlockInfo -- Info for stuff to do at end of basic block: + } + +initCgInfoDown :: Module -> CgInfoDownwards +initCgInfoDown mod + = MkCgInfoDown { cgd_mod = mod, + cgd_statics = emptyVarEnv, + cgd_srt = error "initC: srt", + cgd_ticky = mkTopTickyCtrLabel, + cgd_eob = initEobInfo } data CgState - = MkCgState - AbstractC -- code accumulated so far - CgBindings -- [Id -> info] : *local* bindings environment - -- Bindings for top-level things are given in the info-down part - CgStksAndHeapUsage + = MkCgState { + cgs_stmts :: OrdList CgStmt, -- Current proc + cgs_tops :: OrdList CmmTop, + -- Other procedures and data blocks in this compilation unit + -- Both the latter two are ordered only so that we can + -- reduce forward references, when it's easy to do so + + cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment + -- Bindings for top-level things are given in + -- the info-down part + + cgs_stk_usg :: StackUsage, + cgs_hp_usg :: HeapUsage, + + cgs_uniqs :: UniqSupply } + +initCgState :: UniqSupply -> CgState +initCgState uniqs + = MkCgState { cgs_stmts = nilOL, cgs_tops = nilOL, + cgs_binds = emptyVarEnv, + cgs_stk_usg = initStkUsage, + cgs_hp_usg = initHpUsage, + cgs_uniqs = uniqs } \end{code} @EndOfBlockInfo@ tells what to do at the end of this block of code or, @@ -123,7 +148,7 @@ data EndOfBlockInfo -- by a case alternative. Sequel -initEobInfo = EndOfBlockInfo 0 (OnStack 0) +initEobInfo = EndOfBlockInfo 0 OnStack \end{code} Any addressing modes inside @Sequel@ must be ``robust,'' in the sense @@ -132,105 +157,164 @@ block. \begin{code} data Sequel - = OnStack - VirtualSpOffset -- Continuation is on the stack, at the - -- specified location - - | UpdateCode + = OnStack -- Continuation is on the stack + | UpdateCode -- Continuation is update | CaseAlts - CAddrMode -- Jump to this; if the continuation is for a vectored - -- case this might be the label of a return - -- vector Guaranteed to be a non-volatile - -- addressing mode (I think) + CLabel -- Jump to this; if the continuation is for a vectored + -- case this might be the label of a return vector SemiTaggingStuff - + Id -- The case binder, only used to see if it's dead Bool -- True <=> polymorphic, push a SEQ frame too - type SemiTaggingStuff - = Maybe -- Maybe[1] we don't have any semi-tagging stuff... - ([(ConTag, JoinDetails)], -- Alternatives - Maybe (Id, JoinDetails) -- Default (but Maybe[2] we don't have one) - -- The default branch expects a - -- it expects a ptr to the thing - -- in Node, bound to b - ) - -type JoinDetails - = (AbstractC, CLabel) -- Code to load regs from heap object + profiling macros, - -- and join point label - --- The abstract C is executed only from a successful semitagging + = Maybe -- Maybe[1] we don't have any semi-tagging stuff... + ([(ConTagZ, CLabel)], -- Alternatives + CLabel) -- Default (will be a can't happen RTS label if can't happen) + +type ConTagZ = Int -- A *zero-indexed* contructor tag + +-- The case branch is executed only from a successful semitagging -- venture, when a case has looked at a variable, found that it's -- evaluated, and wants to load up the contents and go to the join -- point. +\end{code} + +%************************************************************************ +%* * + CgStmt type +%* * +%************************************************************************ + +The CgStmts type is what the code generator outputs: it is a tree of +statements, including in-line labels. The job of flattenCgStmts is to +turn this into a list of basic blocks, each of which ends in a jump +statement (either a local branch or a non-local jump). + +\begin{code} +type CgStmts = OrdList CgStmt + +data CgStmt + = CgStmt CmmStmt + | CgLabel BlockId + | CgFork BlockId CgStmts + +flattenCgStmts :: BlockId -> CgStmts -> [CmmBasicBlock] +flattenCgStmts id stmts = + case flatten (fromOL stmts) of + ([],blocks) -> blocks + (block,blocks) -> BasicBlock id block : blocks + where + flatten [] = ([],[]) + + -- A label at the end of a function or fork: this label must not be reachable, + -- but it might be referred to from another BB that also isn't reachable. + -- Eliminating these has to be done with a dead-code analysis. For now, + -- we just make it into a well-formed block by adding a recursive jump. + flatten [CgLabel id] + = ( [], [BasicBlock id [CmmBranch id]] ) + + -- A jump/branch: throw away all the code up to the next label, because + -- it is unreachable. Be careful to keep forks that we find on the way. + flatten (CgStmt stmt : stmts) + | isJump stmt + = case dropWhile isOrdinaryStmt stmts of + [] -> ( [stmt], [] ) + [CgLabel id] -> ( [stmt], [BasicBlock id [CmmBranch id]]) + (CgLabel id : stmts) -> ( [stmt], BasicBlock id block : blocks ) + where (block,blocks) = flatten stmts + (CgFork fork_id stmts : ss) -> + flatten (CgFork fork_id stmts : CgStmt stmt : ss) + + flatten (s:ss) = + case s of + CgStmt stmt -> (stmt:block,blocks) + CgLabel id -> ([CmmBranch id],BasicBlock id block:blocks) + CgFork fork_id stmts -> + (block, BasicBlock fork_id fork_block : fork_blocks ++ blocks) + where (fork_block, fork_blocks) = flatten (fromOL stmts) + where (block,blocks) = flatten ss + +isJump (CmmJump _ _) = True +isJump (CmmBranch _) = True +isJump _ = False + +isOrdinaryStmt (CgStmt _) = True +isOrdinaryStmt _ = False +\end{code} + +%************************************************************************ +%* * + Stack and heap models +%* * +%************************************************************************ --- DIRE WARNING. --- The OnStack case of sequelToAmode delivers an Amode which is only --- valid just before the final control transfer, because it assumes --- that Sp is pointing to the top word of the return address. This --- seems unclean but there you go. - --- sequelToAmode returns an amode which refers to an info table. The info --- table will always be of the RET(_VEC)?_(BIG|SMALL) kind. We're careful --- not to handle real code pointers, just in case we're compiling for --- an unregisterised/untailcallish architecture, where info pointers and --- code pointers aren't the same. - -sequelToAmode :: Sequel -> FCode CAddrMode - -sequelToAmode (OnStack virt_sp_offset) - = getSpRelOffset virt_sp_offset `thenFC` \ sp_rel -> - returnFC (CVal sp_rel RetRep) - -sequelToAmode UpdateCode = returnFC (CLbl mkUpdInfoLabel RetRep) - -sequelToAmode (CaseAlts amode _ False) = returnFC amode -sequelToAmode (CaseAlts amode _ True) = returnFC (CLbl mkSeqInfoLabel RetRep) - -type CgStksAndHeapUsage -- stacks and heap usage information - = (StackUsage, HeapUsage) - -data Slot = Free | NonPointer - deriving -#ifdef DEBUG - (Eq,Show) -#else - Eq -#endif - -type StackUsage = - (Int, -- virtSp: Virtual offset of topmost allocated slot - Int, -- frameSp: End of the current stack frame - [(Int,Slot)], -- free: List of free slots, in increasing order - Int, -- realSp: Virtual offset of real stack pointer - Int) -- hwSp: Highest value ever taken by virtSp - --- ToDo (SDM, 7 Jan 2003): I'm not sure that the distinction between --- Free and NonPointer in the free list is needed any more. It used --- to be needed because we constructed bitmaps from the free list, but --- now we construct bitmaps by finding all the live pointer bindings --- instead. Non-pointer stack slots (i.e. saved cost centres) can --- just be removed from the free list instead of being recorded as a --- NonPointer. - -type HeapUsage = - (HeapOffset, -- virtHp: Virtual offset of highest-allocated word - HeapOffset) -- realHp: Virtual offset of real heap ptr +\begin{code} +type VirtualHpOffset = WordOff -- Both are in +type VirtualSpOffset = WordOff -- units of words + +data StackUsage + = StackUsage { + virtSp :: VirtualSpOffset, + -- Virtual offset of topmost allocated slot + + frameSp :: VirtualSpOffset, + -- Virtual offset of the return address of the enclosing frame. + -- This RA describes the liveness/pointedness of + -- all the stack from frameSp downwards + -- INVARIANT: less than or equal to virtSp + + freeStk :: [VirtualSpOffset], + -- List of free slots, in *increasing* order + -- INVARIANT: all <= virtSp + -- All slots <= virtSp are taken except these ones + + realSp :: VirtualSpOffset, + -- Virtual offset of real stack pointer register + + hwSp :: VirtualSpOffset + } -- Highest value ever taken by virtSp + +-- INVARAINT: The environment contains no Stable references to +-- stack slots below (lower offset) frameSp +-- It can contain volatile references to this area though. + +data HeapUsage = + HeapUsage { + virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word + realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr + } \end{code} -NB: absolutely every one of the above Ints is really -a VirtualOffset of some description (the code generator -works entirely in terms of VirtualOffsets). +The heap high water mark is the larger of virtHp and hwHp. The latter is +only records the high water marks of forked-off branches, so to find the +heap high water mark you have to take the max of virtHp and hwHp. Remember, +virtHp never retreats! -Initialisation. +Note Jan 04: ok, so why do we only look at the virtual Hp?? \begin{code} -initialStateC = MkCgState AbsCNop emptyVarEnv initUsage +heapHWM :: HeapUsage -> VirtualHpOffset +heapHWM = virtHp +\end{code} -initUsage :: CgStksAndHeapUsage -initUsage = ((0,0,[],0,0), (0,0)) +Initialisation. + +\begin{code} +initStkUsage :: StackUsage +initStkUsage = StackUsage { + virtSp = 0, + frameSp = 0, + freeStk = [], + realSp = 0, + hwSp = 0 + } + +initHpUsage :: HeapUsage +initHpUsage = HeapUsage { + virtHp = 0, + realHp = 0 + } \end{code} @stateIncUsage@$~e_1~e_2$ incorporates in $e_1$ the stack and heap high water @@ -238,24 +322,42 @@ marks found in $e_2$. \begin{code} stateIncUsage :: CgState -> CgState -> CgState +stateIncUsage s1 s2@(MkCgState { cgs_stk_usg = stk_usg, cgs_hp_usg = hp_usg }) + = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg, + cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp stk_usg } + `addCodeBlocksFrom` s2 + +stateIncUsageEval :: CgState -> CgState -> CgState +stateIncUsageEval s1 s2 + = s1 { cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp (cgs_stk_usg s2) } + `addCodeBlocksFrom` s2 + -- We don't max the heap high-watermark because stateIncUsageEval is + -- used only in forkEval, which in turn is only used for blocks of code + -- which do their own heap-check. -stateIncUsage (MkCgState abs_c bs ((v,t,f,r,h1),(vH1,rH1))) - (MkCgState _ _ ((_,_,_,_,h2),(vH2, _))) - = MkCgState abs_c - bs - ((v,t,f,r,h1 `max` h2), - (vH1 `max` vH2, rH1)) +addCodeBlocksFrom :: CgState -> CgState -> CgState +-- Add code blocks from the latter to the former +-- (The cgs_stmts will often be empty, but not always; see codeOnly) +s1 `addCodeBlocksFrom` s2 + = s1 { cgs_stmts = cgs_stmts s1 `appOL` cgs_stmts s2, + cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 } + +maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage +hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } + +maxStkHw :: StackUsage -> VirtualSpOffset -> StackUsage +stk_usg `maxStkHw` hw = stk_usg { hwSp = hwSp stk_usg `max` hw } \end{code} %************************************************************************ %* * -\subsection[CgMonad-basics]{Basic code-generation monad magic} + The FCode monad %* * %************************************************************************ \begin{code} newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState)) -type Code = FCode () +type Code = FCode () instance Monad FCode where (>>=) = thenFC @@ -268,17 +370,13 @@ instance Monad FCode where The Abstract~C is not in the environment so as to improve strictness. \begin{code} -initC :: CompilationInfo -> Code -> AbstractC - -initC cg_info (FCode code) - = case (code (MkCgInfoDown - cg_info - emptyVarEnv -- (error "initC: statics") - (error "initC: srt") - (mkTopTickyCtrLabel) - initEobInfo) - initialStateC) of - ((),MkCgState abc _ _) -> abc +initC :: Module -> FCode a -> IO a + +initC mod (FCode code) + = do { uniqs <- mkSplitUniqSupply 'c' + ; case code (initCgInfoDown mod) (initCgState uniqs) of + (res, _) -> return res + } returnFC :: a -> FCode a returnFC val = FCode (\info_down state -> (val, state)) @@ -332,9 +430,12 @@ fixC fcode = FCode ( ) \end{code} -Operators for getting and setting the state and "info_down". -To maximise encapsulation, code should try to only get and set the -state it actually uses. +%************************************************************************ +%* * + Operators for getting and setting the state and "info_down". + +%* * +%************************************************************************ \begin{code} getState :: FCode CgState @@ -343,35 +444,58 @@ getState = FCode $ \info_down state -> (state,state) setState :: CgState -> FCode () setState state = FCode $ \info_down _ -> ((),state) -getUsage :: FCode CgStksAndHeapUsage -getUsage = do - MkCgState absC binds usage <- getState - return usage +getStkUsage :: FCode StackUsage +getStkUsage = do + state <- getState + return $ cgs_stk_usg state -setUsage :: CgStksAndHeapUsage -> FCode () -setUsage newusage = do - MkCgState absC binds usage <- getState - setState $ MkCgState absC binds newusage +setStkUsage :: StackUsage -> Code +setStkUsage new_stk_usg = do + state <- getState + setState $ state {cgs_stk_usg = new_stk_usg} + +getHpUsage :: FCode HeapUsage +getHpUsage = do + state <- getState + return $ cgs_hp_usg state + +setHpUsage :: HeapUsage -> Code +setHpUsage new_hp_usg = do + state <- getState + setState $ state {cgs_hp_usg = new_hp_usg} getBinds :: FCode CgBindings getBinds = do - MkCgState absC binds usage <- getState - return binds + state <- getState + return $ cgs_binds state setBinds :: CgBindings -> FCode () -setBinds newbinds = do - MkCgState absC binds usage <- getState - setState $ MkCgState absC newbinds usage +setBinds new_binds = do + state <- getState + setState $ state {cgs_binds = new_binds} getStaticBinds :: FCode CgBindings getStaticBinds = do - (MkCgInfoDown _ static_binds _ _ _) <- getInfoDown - return static_binds + info <- getInfoDown + return (cgd_statics info) withState :: FCode a -> CgState -> FCode (a,CgState) withState (FCode fcode) newstate = FCode $ \info_down state -> let (retval, state2) = fcode info_down newstate in ((retval,state2), state) +newUniqSupply :: FCode UniqSupply +newUniqSupply = do + state <- getState + let (us1, us2) = splitUniqSupply (cgs_uniqs state) + setState $ state { cgs_uniqs = us1 } + return us2 + +newUnique :: FCode Unique +newUnique = do + us <- newUniqSupply + return (uniqFromSupply us) + +------------------ getInfoDown :: FCode CgInfoDownwards getInfoDown = FCode $ \info_down state -> (info_down,state) @@ -383,16 +507,22 @@ doFCode (FCode fcode) info_down state = fcode info_down state \end{code} +%************************************************************************ +%* * + Forking +%* * +%************************************************************************ + @forkClosureBody@ takes a code, $c$, and compiles it in a completely fresh environment, except that: - compilation info and statics are passed in unchanged. The current environment is passed on completely unaltered, except that abstract C from the fork is incorporated. -@forkAbsC@ takes a code and compiles it in the current environment, -returning the abstract C thus constructed. The current environment -is passed on completely unchanged. It is pretty similar to @getAbsC@, -except that the latter does affect the environment. ToDo: combine? +@forkProc@ takes a code and compiles it in the current environment, +returning the basic blocks thus constructed. The current environment +is passed on completely unchanged. It is pretty similar to +@getBlocks@, except that the latter does affect the environment. @forkStatics@ $fc$ compiles $fc$ in an environment whose statics come from the current bindings, but which is otherwise freshly initialised. @@ -401,40 +531,57 @@ bindings and usage information is otherwise unchanged. \begin{code} forkClosureBody :: Code -> Code - -forkClosureBody (FCode code) = do - (MkCgInfoDown cg_info statics srt ticky _) <- getInfoDown - (MkCgState absC_in binds un_usage) <- getState - let body_info_down = MkCgInfoDown cg_info statics srt ticky initEobInfo - let ((),fork_state) = code body_info_down initialStateC - let MkCgState absC_fork _ _ = fork_state - setState $ MkCgState (AbsCStmts absC_in absC_fork) binds un_usage +forkClosureBody body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let body_info_down = info { cgd_eob = initEobInfo } + ((),fork_state) = doFCode body_code body_info_down + (initCgState us) + ; ASSERT( isNilOL (cgs_stmts fork_state) ) + setState $ state `addCodeBlocksFrom` fork_state } forkStatics :: FCode a -> FCode a - -forkStatics (FCode fcode) = FCode ( - \(MkCgInfoDown cg_info _ srt ticky _) - (MkCgState absC_in statics un_usage) - -> - let - (result, state) = fcode rhs_info_down initialStateC - MkCgState absC_fork _ _ = state -- Don't merge these this line with the one - -- above or it becomes too strict! - rhs_info_down = MkCgInfoDown cg_info statics srt ticky initEobInfo - in - (result, MkCgState (AbsCStmts absC_in absC_fork) statics un_usage) - ) - -forkAbsC :: Code -> FCode AbstractC -forkAbsC (FCode code) = - do - info_down <- getInfoDown - (MkCgState absC1 bs usage) <- getState - let ((),MkCgState absC2 _ ((_, _, _, _,h2), _)) = code info_down (MkCgState AbsCNop bs usage) - let ((v, t, f, r, h1), heap_usage) = usage - let new_usage = ((v, t, f, r, h1 `max` h2), heap_usage) - setState $ MkCgState absC1 bs new_usage - return absC2 +forkStatics body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let rhs_info_down = info { cgd_statics = cgs_binds state, + cgd_eob = initEobInfo } + (result, fork_state_out) = doFCode body_code rhs_info_down + (initCgState us) + ; ASSERT( isNilOL (cgs_stmts fork_state_out) ) + setState (state `addCodeBlocksFrom` fork_state_out) + ; return result } + +forkProc :: Code -> FCode CgStmts +forkProc body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) + { cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + -- ToDo: is the hp usage necesary? + (code_blks, fork_state_out) = doFCode (getCgStmts body_code) + info_down fork_state_in + ; setState $ state `stateIncUsageEval` fork_state_out + ; return code_blks } + +codeOnly :: Code -> Code +-- Emit any code from the inner thing into the outer thing +-- Do not affect anything else in the outer state +-- Used in almost-circular code to prevent false loop dependencies +codeOnly body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + ((), fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } \end{code} @forkAlts@ $bs~d$ takes fcodes $bs$ for the branches of a @case@, and @@ -448,13 +595,23 @@ that forkAlts :: [FCode a] -> FCode [a] forkAlts branch_fcodes - = do info_down <- getInfoDown - in_state <- getState - let compile (FCode fc) = fc info_down in_state - let (branch_results, branch_out_states) = unzip (map compile branch_fcodes) - setState $ foldl stateIncUsage in_state branch_out_states - -- NB foldl. in_state is the *left* argument to stateIncUsage - return branch_results + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let compile us branch + = (us2, doFCode branch info_down branch_state) + where + (us1,us2) = splitUniqSupply us + branch_state = (initCgState us1) { + cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + + (_us, results) = mapAccumL compile us branch_fcodes + (branch_results, branch_out_states) = unzip results + ; setState $ foldl stateIncUsage state branch_out_states + -- NB foldl. state is the *left* argument to stateIncUsage + ; return branch_results } \end{code} @forkEval@ takes two blocks of code. @@ -479,162 +636,204 @@ forkEval :: EndOfBlockInfo -- For the body -> FCode EndOfBlockInfo -- The new end of block info forkEval body_eob_info env_code body_code - = forkEvalHelp body_eob_info env_code body_code `thenFC` \ (v, sequel) -> - returnFC (EndOfBlockInfo v sequel) + = do { (v, sequel) <- forkEvalHelp body_eob_info env_code body_code + ; returnFC (EndOfBlockInfo v sequel) } forkEvalHelp :: EndOfBlockInfo -- For the body -> Code -- Code to set environment -> FCode a -- The code to do after the eval - -> FCode (Int, -- Sp - a) -- Result of the FCode - -forkEvalHelp body_eob_info env_code body_code = - do - info_down@(MkCgInfoDown cg_info statics srt ticky _) <- getInfoDown - state <- getState - let info_down_for_body = MkCgInfoDown cg_info statics srt ticky body_eob_info - let (_,MkCgState _ binds ((v,t,f,_,_),_)) = - doFCode env_code info_down_for_body state - let state_for_body = MkCgState AbsCNop - (nukeVolatileBinds binds) - ((v,t,f,v,v), (0,0)) - let (value_returned, state_at_end_return) = - doFCode body_code info_down_for_body state_for_body - setState $ state `stateIncUsageEval` state_at_end_return - return (v,value_returned) - -stateIncUsageEval :: CgState -> CgState -> CgState -stateIncUsageEval (MkCgState absC1 bs ((v,t,f,r,h1),heap_usage)) - (MkCgState absC2 _ ((_,_,_,_,h2), _)) - = MkCgState (absC1 `mkAbsCStmts` absC2) - -- The AbsC coming back should consist only of nested declarations, + -> FCode (VirtualSpOffset, -- Sp + a) -- Result of the FCode + -- A disturbingly complicated function +forkEvalHelp body_eob_info env_code body_code + = do { info_down@(MkCgInfoDown cg_info statics srt ticky _) <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let { info_down_for_body = info_down {cgd_eob = body_eob_info} + ; (_, env_state) = doFCode env_code info_down_for_body + (state {cgs_uniqs = us}) + ; state_for_body = (initCgState (cgs_uniqs env_state)) + { cgs_binds = binds_for_body, + cgs_stk_usg = stk_usg_for_body } + ; binds_for_body = nukeVolatileBinds (cgs_binds env_state) + ; stk_usg_from_env = cgs_stk_usg env_state + ; virtSp_from_env = virtSp stk_usg_from_env + ; stk_usg_for_body = stk_usg_from_env {realSp = virtSp_from_env, + hwSp = virtSp_from_env} + ; (value_returned, state_at_end_return) + = doFCode body_code info_down_for_body state_for_body + } + ; ASSERT( isNilOL (cgs_stmts state_at_end_return) ) + -- The code coming back should consist only of nested declarations, -- notably of the return vector! - bs - ((v,t,f,r,h1 `max` h2), heap_usage) - -- We don't max the heap high-watermark because stateIncUsageEval is - -- used only in forkEval, which in turn is only used for blocks of code - -- which do their own heap-check. -\end{code} + setState $ state `stateIncUsageEval` state_at_end_return + ; return (virtSp_from_env, value_returned) } -%************************************************************************ -%* * -\subsection[CgMonad-spitting-AbstractC]{Spitting out @AbstractC@} -%* * -%************************************************************************ -@nopC@ is the no-op for the @Code@ monad; it adds no Abstract~C to the -environment; @absC@ glues @ab_C@ onto the Abstract~C collected so far. -\begin{code} +-- ---------------------------------------------------------------------------- +-- Combinators for emitting code + nopC :: Code nopC = return () -absC :: AbstractC -> Code -absC more_absC = do - state@(MkCgState absC binds usage) <- getState - setState $ MkCgState (mkAbsCStmts absC more_absC) binds usage -\end{code} - -These two are just like @absC@, except they examine the compilation -info (whether SCC profiling or profiling-ctrs going) and possibly emit -nothing. - -\begin{code} -costCentresC :: FastString -> [CAddrMode] -> Code -costCentresC macro args - | opt_SccProfilingOn = absC (CCallProfCCMacro macro args) - | otherwise = nopC - -profCtrC :: FastString -> [CAddrMode] -> Code -profCtrC macro args - | opt_DoTickyProfiling = absC (CCallProfCtrMacro macro args) - | otherwise = nopC - -profCtrAbsC :: FastString -> [CAddrMode] -> AbstractC -profCtrAbsC macro args - | opt_DoTickyProfiling = CCallProfCtrMacro macro args - | otherwise = AbsCNop - -ldvEnter :: Code -ldvEnter = costCentresC FSLIT("LDV_ENTER") [CReg node] - -{- Try to avoid adding too many special compilation strategies here. - It's better to modify the header files as necessary for particular - targets, so that we can get away with as few variants of .hc files - as possible. --} -\end{code} - -@getAbsC@ compiles the code in the current environment, and returns -the abstract C thus constructed (leaving the abstract C being carried -around in the state untouched). @getAbsC@ does not generate any -in-line Abstract~C itself, but the environment it returns is that -obtained from the compilation. +whenC :: Bool -> Code -> Code +whenC True code = code +whenC False code = nopC + +stmtC :: CmmStmt -> Code +stmtC stmt = emitCgStmt (CgStmt stmt) + +labelC :: BlockId -> Code +labelC id = emitCgStmt (CgLabel id) + +newLabelC :: FCode BlockId +newLabelC = do { id <- newUnique; return (BlockId id) } + +checkedAbsC :: CmmStmt -> Code +-- Emit code, eliminating no-ops +checkedAbsC stmt = emitStmts (if isNopStmt stmt then nilOL + else unitOL stmt) + +stmtsC :: [CmmStmt] -> Code +stmtsC stmts = emitStmts (toOL stmts) + +-- Emit code; no no-op checking +emitStmts :: CmmStmts -> Code +emitStmts stmts = emitCgStmts (fmap CgStmt stmts) + +-- forkLabelledCode is for emitting a chunk of code with a label, outside +-- of the current instruction stream. +forkLabelledCode :: Code -> FCode BlockId +forkLabelledCode code = getCgStmts code >>= forkCgStmts + +emitCgStmt :: CgStmt -> Code +emitCgStmt stmt + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt } + } + +emitData :: Section -> [CmmStatic] -> Code +emitData sect lits + = do { state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` data_block } } + where + data_block = CmmData sect lits + +emitProc :: [CmmLit] -> CLabel -> [LocalReg] -> [CmmBasicBlock] -> Code +emitProc lits lbl args blocks + = do { let proc_block = CmmProc (map CmmStaticLit lits) lbl args blocks + ; state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } + +emitSimpleProc :: CLabel -> Code -> Code +-- Emit a procedure whose body is the specified code; no info table +emitSimpleProc lbl code + = do { stmts <- getCgStmts code + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks } + +getCmm :: Code -> FCode Cmm +-- Get all the CmmTops (there should be no stmts) +getCmm code + = do { state1 <- getState + ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) + ; setState $ state2 { cgs_tops = cgs_tops state1 } + ; return (Cmm (fromOL (cgs_tops state2))) } + +-- ---------------------------------------------------------------------------- +-- CgStmts + +-- These functions deal in terms of CgStmts, which is an abstract type +-- representing the code in the current proc. + + +-- emit CgStmts into the current instruction stream +emitCgStmts :: CgStmts -> Code +emitCgStmts stmts + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state `appOL` stmts } } + +-- emit CgStmts outside the current instruction stream, and return a label +forkCgStmts :: CgStmts -> FCode BlockId +forkCgStmts stmts + = do { id <- newLabelC + ; emitCgStmt (CgFork id stmts) + ; return id + } + +-- turn CgStmts into [CmmBasicBlock], for making a new proc. +cgStmtsToBlocks :: CgStmts -> FCode [CmmBasicBlock] +cgStmtsToBlocks stmts + = do { id <- newLabelC + ; return (flattenCgStmts id stmts) + } + +-- collect the code emitted by an FCode computation +getCgStmts' :: FCode a -> FCode (a, CgStmts) +getCgStmts' fcode + = do { state1 <- getState + ; (a, state2) <- withState fcode (state1 { cgs_stmts = nilOL }) + ; setState $ state2 { cgs_stmts = cgs_stmts state1 } + ; return (a, cgs_stmts state2) } + +getCgStmts :: FCode a -> FCode CgStmts +getCgStmts fcode = do { (_,stmts) <- getCgStmts' fcode; return stmts } + +-- Simple ways to construct CgStmts: +noCgStmts :: CgStmts +noCgStmts = nilOL + +oneCgStmt :: CmmStmt -> CgStmts +oneCgStmt stmt = unitOL (CgStmt stmt) + +consCgStmt :: CmmStmt -> CgStmts -> CgStmts +consCgStmt stmt stmts = CgStmt stmt `consOL` stmts + +-- ---------------------------------------------------------------------------- +-- Get the current module name -\begin{code} -getAbsC :: Code -> FCode AbstractC -getAbsC code = do - MkCgState absC binds usage <- getState - ((),MkCgState absC2 binds2 usage2) <- withState code (MkCgState AbsCNop binds usage) - setState $ MkCgState absC binds2 usage2 - return absC2 -\end{code} - -\begin{code} moduleName :: FCode Module -moduleName = do - (MkCgInfoDown (MkCompInfo mod_name) _ _ _ _) <- getInfoDown - return mod_name -\end{code} +moduleName = do { info <- getInfoDown; return (cgd_mod info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the end-of-block info -\begin{code} setEndOfBlockInfo :: EndOfBlockInfo -> Code -> Code setEndOfBlockInfo eob_info code = do - (MkCgInfoDown c_info statics srt ticky _) <- getInfoDown - withInfoDown code (MkCgInfoDown c_info statics srt ticky eob_info) + info <- getInfoDown + withInfoDown code (info {cgd_eob = eob_info}) getEndOfBlockInfo :: FCode EndOfBlockInfo getEndOfBlockInfo = do - (MkCgInfoDown c_info statics _ _ eob_info) <- getInfoDown - return eob_info -\end{code} + info <- getInfoDown + return (cgd_eob info) -There is just one SRT for each top level binding; all the nested -bindings use sub-sections of this SRT. The label is passed down to -the nested bindings via the monad. +-- ---------------------------------------------------------------------------- +-- Get/set the current SRT label -\begin{code} -getSRTInfo :: Name -> SRT -> FCode C_SRT -getSRTInfo id NoSRT = return NoC_SRT -getSRTInfo id (SRT off len bmp) - | len > hALF_WORD || bmp == [fromIntegral srt_escape] = do - srt_lbl <- getSRTLabel - let srt_desc_lbl = mkSRTDescLabel id - absC (CSRTDesc srt_desc_lbl srt_lbl off len bmp) - return (C_SRT srt_desc_lbl 0 srt_escape) - | otherwise = do - srt_lbl <- getSRTLabel - return (C_SRT srt_lbl off (fromIntegral (head bmp))) - -srt_escape = (-1) :: StgHalfWord +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. getSRTLabel :: FCode CLabel -- Used only by cgPanic -getSRTLabel = do MkCgInfoDown _ _ srt_lbl _ _ <- getInfoDown - return srt_lbl +getSRTLabel = do info <- getInfoDown + return (cgd_srt info) setSRTLabel :: CLabel -> FCode a -> FCode a setSRTLabel srt_lbl code - = do MkCgInfoDown c_info statics _ ticky eob_info <- getInfoDown - withInfoDown code (MkCgInfoDown c_info statics srt_lbl ticky eob_info) -\end{code} + = do info <- getInfoDown + withInfoDown code (info { cgd_srt = srt_lbl}) + +-- ---------------------------------------------------------------------------- +-- Get/set the current ticky counter label -\begin{code} getTickyCtrLabel :: FCode CLabel getTickyCtrLabel = do - (MkCgInfoDown _ _ _ ticky _) <- getInfoDown - return ticky + info <- getInfoDown + return (cgd_ticky info) setTickyCtrLabel :: CLabel -> Code -> Code setTickyCtrLabel ticky code = do - (MkCgInfoDown c_info statics srt _ eob_info) <- getInfoDown - withInfoDown code (MkCgInfoDown c_info statics srt ticky eob_info) + info <- getInfoDown + withInfoDown code (info {cgd_ticky = ticky}) \end{code} diff --git a/ghc/compiler/codeGen/CgParallel.hs b/ghc/compiler/codeGen/CgParallel.hs new file mode 100644 index 0000000000..74cbeb5fda --- /dev/null +++ b/ghc/compiler/codeGen/CgParallel.hs @@ -0,0 +1,90 @@ +-- Code generation relaed to GpH +-- (a) parallel +-- (b) GranSim + +module CgParallel( + staticGranHdr,staticParHdr, + granFetchAndReschedule, granYield, + doGranAllocate + ) where + +import CgMonad +import CgCallConv ( mkRegLiveness ) +import Id ( Id ) +import Cmm ( CmmLit, GlobalReg(..), node, CmmExpr ) +import CmdLineOpts ( opt_GranMacros ) +import Outputable + +staticParHdr :: [CmmLit] +-- Parallel header words in a static closure +staticParHdr = [] + +-------------------------------------------------------- +-- GranSim stuff +-------------------------------------------------------- + +staticGranHdr :: [CmmLit] +-- Gransim header words in a static closure +staticGranHdr = [] + +doGranAllocate :: CmmExpr -> Code +-- macro DO_GRAN_ALLOCATE +doGranAllocate hp + | not opt_GranMacros = nopC + | otherwise = panic "doGranAllocate" + + + +------------------------- +granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code +-- Emit code for simulating a fetch and then reschedule. +granFetchAndReschedule regs node_reqd + | opt_GranMacros && (node `elem` map snd regs || node_reqd) + = do { fetch + ; reschedule liveness node_reqd } + | otherwise + = nopC + where + liveness = mkRegLiveness regs 0 0 + +fetch = panic "granFetch" + -- Was: absC (CMacroStmt GRAN_FETCH []) + --HWL: generate GRAN_FETCH macro for GrAnSim + -- currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai + +reschedule liveness node_reqd = panic "granReschedule" + -- Was: absC (CMacroStmt GRAN_RESCHEDULE [ + -- mkIntCLit (I# (word2Int# liveness_mask)), + -- mkIntCLit (if node_reqd then 1 else 0)]) + + +------------------------- +-- The @GRAN_YIELD@ macro is taken from JSM's code for Concurrent Haskell. It +-- allows to context-switch at places where @node@ is not alive (it uses the +-- @Continue@ rather than the @EnterNodeCode@ function in the RTS). We emit +-- this kind of macro at the beginning of the following kinds of basic bocks: +-- \begin{itemize} +-- \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally +-- we use @fetchAndReschedule@ at a slow entry code. +-- \item Fast entry code (see @CgClosure.lhs@). +-- \item Alternatives in case expressions (@CLabelledCode@ structures), provided +-- that they are not inlined (see @CgCases.lhs@). These alternatives will +-- be turned into separate functions. + +granYield :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code + +granYield regs node_reqd + | opt_GranMacros && node_reqd = yield liveness + | otherwise = nopC + where + liveness = mkRegLiveness regs 0 0 + +yield liveness = panic "granYield" + -- Was : absC (CMacroStmt GRAN_YIELD + -- [mkIntCLit (I# (word2Int# liveness_mask))]) + + diff --git a/ghc/compiler/codeGen/CgPrimOp.hs b/ghc/compiler/codeGen/CgPrimOp.hs new file mode 100644 index 0000000000..65ad0cc724 --- /dev/null +++ b/ghc/compiler/codeGen/CgPrimOp.hs @@ -0,0 +1,588 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for PrimOps. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgPrimOp ( + cgPrimOp + ) where + +import StgSyn ( StgLiveVars, StgArg ) +import CgBindery ( getVolatileRegs, getArgAmodes ) +import CgMonad +import CgInfoTbls ( getConstrTag ) +import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW ) +import Cmm +import CLabel ( mkMAP_FROZEN_infoLabel ) +import CmmUtils +import MachOp +import SMRep +import PrimOp ( PrimOp(..) ) +import SMRep ( tablesNextToCode ) +import Constants ( wORD_SIZE, wORD_SIZE_IN_BITS ) +import Outputable + +-- --------------------------------------------------------------------------- +-- Code generation for PrimOps + +cgPrimOp :: [CmmReg] -- where to put the results + -> PrimOp -- the op + -> [StgArg] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +cgPrimOp results op args live + = do arg_exprs <- getArgAmodes args + let non_void_args = [ e | (r,e) <- arg_exprs, nonVoidArg r ] + emitPrimOp results op non_void_args live + + +emitPrimOp :: [CmmReg] -- where to put the results + -> PrimOp -- the op + -> [CmmExpr] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +-- First we handle various awkward cases specially. The remaining +-- easy cases are then handled by translateOp, defined below. + +emitPrimOp [res_r,res_c] IntAddCOp [aa,bb] live +{- + With some bit-twiddling, we can define int{Add,Sub}Czh portably in + C, and without needing any comparisons. This may not be the + fastest way to do it - if you have better code, please send it! --SDM + + Return : r = a + b, c = 0 if no overflow, 1 on overflow. + + We currently don't make use of the r value if c is != 0 (i.e. + overflow), we just convert to big integers and try again. This + could be improved by making r and c the correct values for + plugging into a new J#. + + { r = ((I_)(a)) + ((I_)(b)); \ + c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + Wading through the mass of bracketry, it seems to reduce to: + c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1) + +-} + = stmtsC [ + CmmAssign res_r (CmmMachOp mo_wordAdd [aa,bb]), + CmmAssign res_c $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]], + CmmMachOp mo_wordXor [aa, CmmReg res_r] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res_r,res_c] IntSubCOp [aa,bb] live +{- Similarly: + #define subIntCzh(r,c,a,b) \ + { r = ((I_)(a)) - ((I_)(b)); \ + c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + + c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1) +-} + = stmtsC [ + CmmAssign res_r (CmmMachOp mo_wordSub [aa,bb]), + CmmAssign res_c $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordXor [aa,bb], + CmmMachOp mo_wordXor [aa, CmmReg res_r] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res] ParOp [arg] live + = stmtC (CmmAssign res (CmmLit (mkIntCLit 1))) + +emitPrimOp [res] ReadMutVarOp [mutv] live + = stmtC (CmmAssign res (cmmLoadIndexW mutv fixedHdrSize)) + +emitPrimOp [] WriteMutVarOp [mutv,var] live + = stmtC (CmmStore (cmmOffsetW mutv fixedHdrSize) var) + +emitPrimOp [res] ForeignObjToAddrOp [fo] live + = stmtC (CmmAssign res (cmmLoadIndexW fo fixedHdrSize)) + +emitPrimOp [] WriteForeignObjOp [fo,addr] live + = stmtC (CmmStore (cmmOffsetW fo fixedHdrSize) addr) + +-- #define sizzeofByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofByteArrayOp [arg] live + = stmtC $ + CmmAssign res (CmmMachOp mo_wordMul [ + cmmLoadIndexW arg fixedHdrSize, + CmmLit (mkIntCLit wORD_SIZE) + ]) + +-- #define sizzeofMutableByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofMutableByteArrayOp [arg] live + = emitPrimOp [res] SizeofByteArrayOp [arg] live + + +-- #define touchzh(o) /* nothing */ +emitPrimOp [] TouchOp [arg] live + = nopC + +-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) +emitPrimOp [res] ByteArrayContents_Char [arg] live + = stmtC (CmmAssign res (cmmOffsetB arg arrWordsHdrSize)) + +-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) +emitPrimOp [res] StableNameToIntOp [arg] live + = stmtC (CmmAssign res (cmmLoadIndexW arg fixedHdrSize)) + +-- #define eqStableNamezh(r,sn1,sn2) \ +-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) +emitPrimOp [res] EqStableNameOp [arg1,arg2] live + = stmtC (CmmAssign res (CmmMachOp mo_wordEq [ + cmmLoadIndexW arg1 fixedHdrSize, + cmmLoadIndexW arg2 fixedHdrSize + ])) + + +emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] live + = stmtC (CmmAssign res (CmmMachOp mo_wordEq [arg1,arg2])) + +-- #define addrToHValuezh(r,a) r=(P_)a +emitPrimOp [res] AddrToHValueOp [arg] live + = stmtC (CmmAssign res arg) + +-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +emitPrimOp [res] DataToTagOp [arg] live + = stmtC (CmmAssign res (getConstrTag arg)) + +{- Freezing arrays-of-ptrs requires changing an info table, for the + benefit of the generational collector. It needs to scavenge mutable + objects, even if they are in old space. When they become immutable, + they can be removed from this scavenge list. -} + +-- #define unsafeFreezzeArrayzh(r,a) +-- { +-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_info); +-- r = a; +-- } +emitPrimOp [res] UnsafeFreezeArrayOp [arg] live + = stmtsC [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)), + CmmAssign res arg ] + +-- #define unsafeFreezzeByteArrayzh(r,a) r=(a) +emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] live + = stmtC (CmmAssign res arg) + +-- Reading/writing pointer arrays + +emitPrimOp [r] ReadArrayOp [obj,ix] live = doReadPtrArrayOp r obj ix +emitPrimOp [r] IndexArrayOp [obj,ix] live = doReadPtrArrayOp r obj ix +emitPrimOp [] WriteArrayOp [obj,ix,v] live = doWritePtrArrayOp obj ix v + +-- IndexXXXoffForeignObj + +emitPrimOp res IndexOffForeignObjOp_Char args live = doIndexOffForeignObjOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexOffForeignObjOp_WideChar args live = doIndexOffForeignObjOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexOffForeignObjOp_Int args live = doIndexOffForeignObjOp Nothing wordRep res args +emitPrimOp res IndexOffForeignObjOp_Word args live = doIndexOffForeignObjOp Nothing wordRep res args +emitPrimOp res IndexOffForeignObjOp_Addr args live = doIndexOffForeignObjOp Nothing wordRep res args +emitPrimOp res IndexOffForeignObjOp_Float args live = doIndexOffForeignObjOp Nothing F32 res args +emitPrimOp res IndexOffForeignObjOp_Double args live = doIndexOffForeignObjOp Nothing F64 res args +emitPrimOp res IndexOffForeignObjOp_StablePtr args live = doIndexOffForeignObjOp Nothing wordRep res args +emitPrimOp res IndexOffForeignObjOp_Int8 args live = doIndexOffForeignObjOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res IndexOffForeignObjOp_Int16 args live = doIndexOffForeignObjOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res IndexOffForeignObjOp_Int32 args live = doIndexOffForeignObjOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res IndexOffForeignObjOp_Int64 args live = doIndexOffForeignObjOp Nothing I64 res args +emitPrimOp res IndexOffForeignObjOp_Word8 args live = doIndexOffForeignObjOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexOffForeignObjOp_Word16 args live = doIndexOffForeignObjOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res IndexOffForeignObjOp_Word32 args live = doIndexOffForeignObjOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexOffForeignObjOp_Word64 args live = doIndexOffForeignObjOp Nothing I64 res args + +-- IndexXXXoffAddr + +emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args +emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args + +-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. + +emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args +emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args + +-- IndexXXXArray + +emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args +emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args + +-- ReadXXXArray, identical to IndexXXXArray. + +emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args +emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args + +-- WriteXXXoffAddr + +emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing F32 res args +emitPrimOp res WriteOffAddrOp_ForeignObj args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing F64 res args +emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing I64 res args +emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing I64 res args + +-- WriteXXXArray + +emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing F32 res args +emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing F64 res args +emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing I64 res args +emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing I64 res args + + +-- The rest just translate straightforwardly +emitPrimOp [res] op [arg] live + | nopOp op + = stmtC (CmmAssign res arg) + + | Just (mop,rep) <- narrowOp op + = stmtC (CmmAssign res (CmmMachOp (mop rep wordRep) [ + CmmMachOp (mop wordRep rep) [arg]])) + +emitPrimOp [res] op args live + | Just prim <- callishOp op + = do vols <- getVolatileRegs live + stmtC (CmmCall (CmmPrim prim) [(res,NoHint)] + [(a,NoHint) | a<-args] (Just vols)) -- ToDo: hints? + + | Just mop <- translateOp op + = let stmt = CmmAssign res (CmmMachOp mop args) in + stmtC stmt + +emitPrimOp _ op _ _ + = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op) + + +-- These PrimOps are NOPs in Cmm + +nopOp Int2WordOp = True +nopOp Word2IntOp = True +nopOp Int2AddrOp = True +nopOp Addr2IntOp = True +nopOp _ = False + +-- These PrimOps turn into double casts + +narrowOp Narrow8IntOp = Just (MO_S_Conv, I8) +narrowOp Narrow16IntOp = Just (MO_S_Conv, I16) +narrowOp Narrow32IntOp = Just (MO_S_Conv, I32) +narrowOp Narrow8WordOp = Just (MO_U_Conv, I8) +narrowOp Narrow16WordOp = Just (MO_U_Conv, I16) +narrowOp Narrow32WordOp = Just (MO_U_Conv, I32) +narrowOp _ = Nothing + +-- Native word signless ops + +translateOp IntAddOp = Just mo_wordAdd +translateOp IntSubOp = Just mo_wordSub +translateOp WordAddOp = Just mo_wordAdd +translateOp WordSubOp = Just mo_wordSub +translateOp AddrAddOp = Just mo_wordAdd +translateOp AddrSubOp = Just mo_wordSub + +translateOp IntEqOp = Just mo_wordEq +translateOp IntNeOp = Just mo_wordNe +translateOp WordEqOp = Just mo_wordEq +translateOp WordNeOp = Just mo_wordNe +translateOp AddrEqOp = Just mo_wordEq +translateOp AddrNeOp = Just mo_wordNe + +translateOp AndOp = Just mo_wordAnd +translateOp OrOp = Just mo_wordOr +translateOp XorOp = Just mo_wordXor +translateOp NotOp = Just mo_wordNot +translateOp SllOp = Just mo_wordShl +translateOp SrlOp = Just mo_wordUShr + +translateOp AddrRemOp = Just mo_wordURem + +-- Native word signed ops + +translateOp IntMulOp = Just mo_wordMul +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordRep) +translateOp IntQuotOp = Just mo_wordSQuot +translateOp IntRemOp = Just mo_wordSRem +translateOp IntNegOp = Just mo_wordSNeg + + +translateOp IntGeOp = Just mo_wordSGe +translateOp IntLeOp = Just mo_wordSLe +translateOp IntGtOp = Just mo_wordSGt +translateOp IntLtOp = Just mo_wordSLt + +translateOp ISllOp = Just mo_wordShl +translateOp ISraOp = Just mo_wordSShr +translateOp ISrlOp = Just mo_wordUShr + +-- Native word unsigned ops + +translateOp WordGeOp = Just mo_wordUGe +translateOp WordLeOp = Just mo_wordULe +translateOp WordGtOp = Just mo_wordUGt +translateOp WordLtOp = Just mo_wordULt + +translateOp WordMulOp = Just mo_wordMul +translateOp WordQuotOp = Just mo_wordUQuot +translateOp WordRemOp = Just mo_wordURem + +translateOp AddrGeOp = Just mo_wordUGe +translateOp AddrLeOp = Just mo_wordULe +translateOp AddrGtOp = Just mo_wordUGt +translateOp AddrLtOp = Just mo_wordULt + +-- 32-bit unsigned ops + +translateOp CharEqOp = Just (MO_Eq I32) +translateOp CharNeOp = Just (MO_Ne I32) +translateOp CharGeOp = Just (MO_U_Ge I32) +translateOp CharLeOp = Just (MO_U_Le I32) +translateOp CharGtOp = Just (MO_U_Gt I32) +translateOp CharLtOp = Just (MO_U_Lt I32) + +-- Double ops + +translateOp DoubleEqOp = Just (MO_Eq F64) +translateOp DoubleNeOp = Just (MO_Ne F64) +translateOp DoubleGeOp = Just (MO_S_Ge F64) +translateOp DoubleLeOp = Just (MO_S_Le F64) +translateOp DoubleGtOp = Just (MO_S_Gt F64) +translateOp DoubleLtOp = Just (MO_S_Lt F64) + +translateOp DoubleAddOp = Just (MO_Add F64) +translateOp DoubleSubOp = Just (MO_Sub F64) +translateOp DoubleMulOp = Just (MO_Mul F64) +translateOp DoubleDivOp = Just (MO_S_Quot F64) +translateOp DoubleNegOp = Just (MO_S_Neg F64) + +-- Float ops + +translateOp FloatEqOp = Just (MO_Eq F32) +translateOp FloatNeOp = Just (MO_Ne F32) +translateOp FloatGeOp = Just (MO_S_Ge F32) +translateOp FloatLeOp = Just (MO_S_Le F32) +translateOp FloatGtOp = Just (MO_S_Gt F32) +translateOp FloatLtOp = Just (MO_S_Lt F32) + +translateOp FloatAddOp = Just (MO_Add F32) +translateOp FloatSubOp = Just (MO_Sub F32) +translateOp FloatMulOp = Just (MO_Mul F32) +translateOp FloatDivOp = Just (MO_S_Quot F32) +translateOp FloatNegOp = Just (MO_S_Neg F32) + +-- Conversions + +translateOp Int2DoubleOp = Just (MO_S_Conv wordRep F64) +translateOp Double2IntOp = Just (MO_S_Conv F64 wordRep) + +translateOp Int2FloatOp = Just (MO_S_Conv wordRep F32) +translateOp Float2IntOp = Just (MO_S_Conv F32 wordRep) + +translateOp Float2DoubleOp = Just (MO_S_Conv F32 F64) +translateOp Double2FloatOp = Just (MO_S_Conv F64 F32) + +translateOp OrdOp = Just (MO_U_Conv I32 wordRep) +translateOp ChrOp = Just (MO_U_Conv wordRep I32) + +-- Word comparisons masquerading as more exotic things. + +translateOp SameMutVarOp = Just mo_wordEq +translateOp SameMVarOp = Just mo_wordEq +translateOp SameMutableArrayOp = Just mo_wordEq +translateOp SameMutableByteArrayOp = Just mo_wordEq +translateOp EqForeignObj = Just mo_wordEq +translateOp EqStablePtrOp = Just mo_wordEq + +translateOp _ = Nothing + +-- These primops are implemented by CallishMachOps, because they sometimes +-- turn into foreign calls depending on the backend. + +callishOp DoublePowerOp = Just MO_F64_Pwr +callishOp DoubleSinOp = Just MO_F64_Sin +callishOp DoubleCosOp = Just MO_F64_Cos +callishOp DoubleTanOp = Just MO_F64_Tan +callishOp DoubleSinhOp = Just MO_F64_Sinh +callishOp DoubleCoshOp = Just MO_F64_Cosh +callishOp DoubleTanhOp = Just MO_F64_Tanh +callishOp DoubleAsinOp = Just MO_F64_Asin +callishOp DoubleAcosOp = Just MO_F64_Acos +callishOp DoubleAtanOp = Just MO_F64_Atan +callishOp DoubleLogOp = Just MO_F64_Log +callishOp DoubleExpOp = Just MO_F64_Exp +callishOp DoubleSqrtOp = Just MO_F64_Sqrt + +callishOp FloatPowerOp = Just MO_F32_Pwr +callishOp FloatSinOp = Just MO_F32_Sin +callishOp FloatCosOp = Just MO_F32_Cos +callishOp FloatTanOp = Just MO_F32_Tan +callishOp FloatSinhOp = Just MO_F32_Sinh +callishOp FloatCoshOp = Just MO_F32_Cosh +callishOp FloatTanhOp = Just MO_F32_Tanh +callishOp FloatAsinOp = Just MO_F32_Asin +callishOp FloatAcosOp = Just MO_F32_Acos +callishOp FloatAtanOp = Just MO_F32_Atan +callishOp FloatLogOp = Just MO_F32_Log +callishOp FloatExpOp = Just MO_F32_Exp +callishOp FloatSqrtOp = Just MO_F32_Sqrt + +callishOp _ = Nothing + +------------------------------------------------------------------------------ +-- Helpers for translating various minor variants of array indexing. + +doIndexOffForeignObjOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead 0 maybe_post_read_cast rep res + (cmmLoadIndexW addr fixedHdrSize) idx +doIndexOffForeignObjOp _ _ _ _ + = panic "CgPrimOp: doIndexOffForeignObjOp" + +doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx +doIndexOffAddrOp _ _ _ _ + = panic "CgPrimOp: doIndexOffAddrOp" + +doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx +doIndexByteArrayOp _ _ _ _ + = panic "CgPrimOp: doIndexByteArrayOp" + +doReadPtrArrayOp res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing wordRep res addr idx + + +doWriteOffAddrOp maybe_pre_write_cast rep [] [addr,idx,val] + = mkBasicIndexedWrite 0 maybe_pre_write_cast rep addr idx val +doWriteOffAddrOp _ _ _ _ + = panic "CgPrimOp: doWriteOffAddrOp" + +doWriteByteArrayOp maybe_pre_write_cast rep [] [addr,idx,val] + = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast rep addr idx val +doWriteByteArrayOp _ _ _ _ + = panic "CgPrimOp: doWriteByteArrayOp" + +doWritePtrArrayOp addr idx val + = mkBasicIndexedWrite arrPtrsHdrSize Nothing wordRep addr idx val + + +mkBasicIndexedRead off Nothing read_rep res base idx + = stmtC (CmmAssign res (cmmLoadIndexOffExpr off read_rep base idx)) +mkBasicIndexedRead off (Just cast) read_rep res base idx + = stmtC (CmmAssign res (CmmMachOp cast [ + cmmLoadIndexOffExpr off read_rep base idx])) + +mkBasicIndexedWrite off Nothing write_rep base idx val + = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) val) +mkBasicIndexedWrite off (Just cast) write_rep base idx val + = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) (CmmMachOp cast [val])) + +-- ---------------------------------------------------------------------------- +-- Misc utils + +cmmIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr off rep base idx + = cmmIndexExpr rep (cmmOffsetB base off) idx + +cmmLoadIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr off rep base idx + = CmmLoad (cmmIndexOffExpr off rep base idx) rep + +setInfo :: CmmExpr -> CmmExpr -> CmmStmt +setInfo closure_ptr info_ptr = CmmStore closure_ptr info_ptr + diff --git a/ghc/compiler/codeGen/CgProf.hs b/ghc/compiler/codeGen/CgProf.hs new file mode 100644 index 0000000000..30f801dba3 --- /dev/null +++ b/ghc/compiler/codeGen/CgProf.hs @@ -0,0 +1,474 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for profiling +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgProf ( + mkCCostCentre, mkCCostCentreStack, + + -- Cost-centre Profiling + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, + chooseDynCostCentres, + costCentreFrom, + curCCS, curCCSAddr, + emitCostCentreDecl, emitCostCentreStackDecl, + emitRegisterCC, emitRegisterCCS, + emitSetCCC, emitCCS, + + -- Lag/drag/void stuff + ldvEnter, ldvRecordCreate + ) where + +#include "HsVersions.h" +#include "../includes/ghcconfig.h" + -- Needed by Constants.h +#include "../includes/Constants.h" + -- For LDV_CREATE_MASK, LDV_STATE_USE + -- which are StgWords +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import ClosureInfo ( ClosureInfo, closureSize, + closureName, isToplevClosure, closureReEntrant, ) +import CgUtils +import CgMonad +import SMRep ( StgWord, profHdrSize ) + +import Cmm +import MachOp +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr ) +import CLabel ( mkCCLabel, mkCCSLabel, mkRtsDataLabel ) + +import Module ( moduleNameUserString ) +import Id ( Id ) +import CostCentre +import StgSyn ( GenStgExpr(..), StgExpr ) +import CmdLineOpts ( opt_SccProfilingOn ) +import FastString ( FastString, mkFastString, LitString ) +import Constants -- Lots of field offsets +import Outputable + +import Maybe +import Char ( ord ) +import Monad ( when ) + +----------------------------------------------------------------------------- +-- +-- Cost-centre-stack Profiling +-- +----------------------------------------------------------------------------- + +-- Expression representing the current cost centre stack +curCCS :: CmmExpr +curCCS = CmmLoad curCCSAddr wordRep + +-- Address of current CCS variable, for storing into +curCCSAddr :: CmmExpr +curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCCS"))) + +mkCCostCentre :: CostCentre -> CmmLit +mkCCostCentre cc = CmmLabel (mkCCLabel cc) + +mkCCostCentreStack :: CostCentreStack -> CmmLit +mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) + +costCentreFrom :: CmmExpr -- A closure pointer + -> CmmExpr -- The cost centre from that closure +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) wordRep + +staticProfHdr :: CostCentreStack -> [CmmLit] +-- The profiling header words in a static closure +-- Was SET_STATIC_PROF_HDR +staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs, + staticLdvInit] + +dynProfHdr :: CmmExpr -> [CmmExpr] +-- Profiling header words in a dynamic closure +dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit] + +initUpdFrameProf :: CmmExpr -> Code +-- Initialise the profiling field of an update frame +initUpdFrameProf frame_amode + = ifProfiling $ -- frame->header.prof.ccs = CCCS + stmtC (CmmStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) + -- is unnecessary because it is not used anyhow. + +-- ----------------------------------------------------------------------------- +-- Recording allocation in a cost centre + +-- | Record the allocation of a closure. The CmmExpr is the cost +-- centre stack to which to attribute the allocation. +profDynAlloc :: ClosureInfo -> CmmExpr -> Code +profDynAlloc cl_info ccs + = ifProfiling $ + profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs + +-- | Record the allocation of a closure (size is given by a CmmExpr) +-- The size must be in words, because the allocation counter in a CCS counts +-- in words. +profAlloc :: CmmExpr -> CmmExpr -> Code +profAlloc words ccs + = ifProfiling $ + stmtC (addToMemE alloc_rep + (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) + (CmmMachOp (MO_U_Conv wordRep alloc_rep) $ + [CmmMachOp mo_wordSub [words, + CmmLit (mkIntCLit profHdrSize)]])) + -- subtract the "profiling overhead", which is the + -- profiling header in a closure. + where + alloc_rep = REP_CostCentreStack_mem_alloc + +-- ---------------------------------------------------------------------- +-- Setting the cost centre in a new closure + +chooseDynCostCentres :: CostCentreStack + -> [Id] -- Args + -> StgExpr -- Body + -> FCode (CmmExpr, CmmExpr) +-- Called when alllcating a closure +-- Tells which cost centre to put in the object, and which +-- to blame the cost of allocation on +chooseDynCostCentres ccs args body = do + -- Cost-centre we record in the object + use_ccs <- emitCCS ccs + + -- Cost-centre on whom we blame the allocation + let blame_ccs + | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) + | otherwise = use_ccs + + return (use_ccs, blame_ccs) + + +-- Some CostCentreStacks are a sequence of pushes on top of CCCS. +-- These pushes must be performed before we can refer to the stack in +-- an expression. +emitCCS :: CostCentreStack -> FCode CmmExpr +emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) + where + (cc's, ccs') = decomposeCCS ccs + + push_em ccs [] = return ccs + push_em ccs (cc:rest) = do + tmp <- newTemp wordRep + pushCostCentre tmp ccs cc + push_em (CmmReg tmp) rest + +ccsExpr :: CostCentreStack -> CmmExpr +ccsExpr ccs + | isCurrentCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + +isBox :: StgExpr -> Bool +-- If it's an utterly trivial RHS, then it must be +-- one introduced by boxHigherOrderArgs for profiling, +-- so we charge it to "OVERHEAD". +-- This looks like a GROSS HACK to me --SDM +isBox (StgApp fun []) = True +isBox other = False + + +-- ----------------------------------------------------------------------- +-- Setting the current cost centre on entry to a closure + +-- For lexically scoped profiling we have to load the cost centre from +-- the closure entered, if the costs are not supposed to be inherited. +-- This is done immediately on entering the fast entry point. + +-- Load current cost centre from closure, if not inherited. +-- Node is guaranteed to point to it, if profiling and not inherited. + +enterCostCentre + :: ClosureInfo + -> CostCentreStack + -> StgExpr -- The RHS of the closure + -> Code + +-- We used to have a special case for bindings of form +-- f = g True +-- where g has arity 2. The RHS is a thunk, but we don't +-- need to update it; and we want to subsume costs. +-- We don't have these sort of PAPs any more, so the special +-- case has gone away. + +enterCostCentre closure_info ccs body + = ifProfiling $ + ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) + enter_cost_centre closure_info ccs body + +enter_cost_centre closure_info ccs body + | isSubsumedCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(re_entrant) + enter_ccs_fsub + + | isDerivedFromCurrentCCS ccs + = do { + if re_entrant && not is_box + then + enter_ccs_fun node_ccs + else + stmtC (CmmStore curCCSAddr node_ccs) + + -- don't forget to bump the scc count. This closure might have been + -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal + -- pass has turned into simply let x = e in ...x... and attached + -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that + -- we don't lose the scc counter, bump it in the entry code for x. + -- ToDo: for a multi-push we should really bump the counter for + -- each of the intervening CCSs, not just the top one. + ; when (not (isCurrentCCS ccs)) $ + stmtC (bumpSccCount curCCS) + } + + | isCafCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(not re_entrant) + do { -- This is just a special case of the isDerivedFromCurrentCCS + -- case above. We could delete this, but it's a micro + -- optimisation and saves a bit of code. + stmtC (CmmStore curCCSAddr enc_ccs) + ; stmtC (bumpSccCount node_ccs) + } + + | otherwise + = panic "enterCostCentre" + where + enc_ccs = CmmLit (mkCCostCentreStack ccs) + re_entrant = closureReEntrant closure_info + node_ccs = costCentreFrom (CmmReg nodeReg) + is_box = isBox body + +-- set the current CCS when entering a PAP +enterCostCentrePAP :: CmmExpr -> Code +enterCostCentrePAP closure = + ifProfiling $ do + enter_ccs_fun (costCentreFrom closure) + enteringPAP 1 + +enterCostCentreThunk :: CmmExpr -> Code +enterCostCentreThunk closure = + ifProfiling $ do + stmtC $ CmmStore curCCSAddr (costCentreFrom closure) + +enter_ccs_fun stack = emitRtsCall SLIT("EnterFunCCS") [(stack,PtrHint)] + +enter_ccs_fsub = enteringPAP 0 + +-- When entering a PAP, EnterFunCCS is called by both the PAP entry +-- code and the function entry code; we don't want the function's +-- entry code to also update CCCS in the event that it was called via +-- a PAP, so we set the flag entering_PAP to indicate that we are +-- entering via a PAP. +enteringPAP :: Integer -> Code +enteringPAP n + = stmtC (CmmStore (CmmLit (CmmLabel (mkRtsDataLabel SLIT("entering_PAP")))) + (CmmLit (CmmInt n cIntRep))) + +ifProfiling :: Code -> Code +ifProfiling code + | opt_SccProfilingOn = code + | otherwise = nopC + +ifProfilingL :: [a] -> [a] +ifProfilingL xs + | opt_SccProfilingOn = xs + | otherwise = [] + + +-- --------------------------------------------------------------------------- +-- Initialising Cost Centres & CCSs + +emitCostCentreDecl + :: CostCentre + -> Code +emitCostCentreDecl cc = do + { label <- mkStringCLit (costCentreUserName cc) + ; modl <- mkStringCLit (moduleNameUserString (cc_mod cc)) + ; let + lits = [ zero, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, + zero, -- StgWord time_ticks + zero64, -- StgWord64 mem_alloc + subsumed, -- StgInt is_caf + zero -- struct _CostCentre *link + ] + ; emitDataLits (mkCCLabel cc) lits + } + where + subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring + + +emitCostCentreStackDecl + :: CostCentreStack + -> Code +emitCostCentreStackDecl ccs + | Just cc <- maybeSingletonCCS ccs = do + { let + lits = [ zero, + mkCCostCentre cc, + zero, -- struct _CostCentreStack *prevStack; + zero, -- struct _IndexTable *indexTable; + zero, -- StgWord selected; + zero64, -- StgWord64 scc_count; + zero, -- StgWord time_ticks; + zero64, -- StgWord64 mem_alloc; + zero, -- StgWord inherited_ticks; + zero64, -- StgWord64 inherited_alloc; + zero -- CostCentre *root; + ] + ; emitDataLits (mkCCSLabel ccs) lits + } + | otherwise = pprPanic "emitCostCentreStackDecl" (ppr ccs) + +zero = mkIntCLit 0 +zero64 = CmmInt 0 I64 + + +-- --------------------------------------------------------------------------- +-- Registering CCs and CCSs + +-- (cc)->link = CC_LIST; +-- CC_LIST = (cc); +-- (cc)->ccID = CC_ID++; + +emitRegisterCC :: CostCentre -> Code +emitRegisterCC cc = do + { tmp <- newTemp cIntRep + ; stmtsC [ + CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) + (CmmLoad cC_LIST wordRep), + CmmStore cC_LIST cc_lit, + CmmAssign tmp (CmmLoad cC_ID cIntRep), + CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg tmp), + CmmStore cC_ID (cmmRegOffB tmp 1) + ] + } + where + cc_lit = CmmLit (CmmLabel (mkCCLabel cc)) + +-- (ccs)->prevStack = CCS_LIST; +-- CCS_LIST = (ccs); +-- (ccs)->ccsID = CCS_ID++; + +emitRegisterCCS :: CostCentreStack -> Code +emitRegisterCCS ccs = do + { tmp <- newTemp cIntRep + ; stmtsC [ + CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) + (CmmLoad cCS_LIST wordRep), + CmmStore cCS_LIST ccs_lit, + CmmAssign tmp (CmmLoad cCS_ID cIntRep), + CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg tmp), + CmmStore cCS_ID (cmmRegOffB tmp 1) + ] + } + where + ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs)) + + +cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_LIST"))) +cC_ID = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_ID"))) + +cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_LIST"))) +cCS_ID = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_ID"))) + +-- --------------------------------------------------------------------------- +-- Set the current cost centre stack + +emitSetCCC :: CostCentre -> Code +emitSetCCC cc + | not opt_SccProfilingOn = nopC + | otherwise = do + ASSERTM(sccAbleCostCentre cc) + tmp <- newTemp wordRep + pushCostCentre tmp curCCS cc + stmtC (CmmStore curCCSAddr (CmmReg tmp)) + when (isSccCountCostCentre cc) $ + stmtC (bumpSccCount curCCS) + +pushCostCentre :: CmmReg -> CmmExpr -> CostCentre -> Code +pushCostCentre result ccs cc + = emitRtsCallWithResult result PtrHint + SLIT("PushCostCentre") [(ccs,PtrHint), + (CmmLit (mkCCostCentre cc), PtrHint)] + +bumpSccCount :: CmmExpr -> CmmStmt +bumpSccCount ccs + = addToMem REP_CostCentreStack_scc_count + (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 + +----------------------------------------------------------------------------- +-- +-- Lag/drag/void stuff +-- +----------------------------------------------------------------------------- + +-- +-- Initial value for the LDV field in a static closure +-- +staticLdvInit :: CmmLit +staticLdvInit = zeroCLit + +-- +-- Initial value of the LDV field in a dynamic closure +-- +dynLdvInit :: CmmExpr +dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE + CmmMachOp mo_wordOr [ + CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ], + CmmLit (mkWordCLit lDV_STATE_CREATE) + ] + +-- +-- Initialise the LDV word of a new closure +-- +ldvRecordCreate :: CmmExpr -> Code +ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit + +-- +-- Called when a closure is entered, marks the closure as having been "used". +-- The closure is not an 'inherently used' one. +-- The closure is not IND or IND_OLDGEN because neither is considered for LDV +-- profiling. +-- +ldvEnter :: CmmExpr -> Code +-- Argument is a closure pointer +ldvEnter cl_ptr + = ifProfiling $ + -- if (era > 0) { + -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | + -- era | LDV_STATE_USE } + emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + (stmtC (CmmStore ldv_wd new_ldv_wd)) + where + ldv_wd = ldvWord cl_ptr + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) + (CmmLit (mkWordCLit lDV_CREATE_MASK))) + (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) + +loadEra :: CmmExpr +loadEra = CmmLoad (mkLblExpr (mkRtsDataLabel SLIT("era"))) cIntRep + +ldvWord :: CmmExpr -> CmmExpr +-- Takes the address of a closure, and returns +-- the address of the LDV word in the closure +ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw + +-- LDV constants, from ghc/includes/Constants.h +lDV_SHIFT = (LDV_SHIFT :: Int) +--lDV_STATE_MASK = (LDV_STATE_MASK :: StgWord) +lDV_CREATE_MASK = (LDV_CREATE_MASK :: StgWord) +--lDV_LAST_MASK = (LDV_LAST_MASK :: StgWord) +lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord) +lDV_STATE_USE = (LDV_STATE_USE :: StgWord) + diff --git a/ghc/compiler/codeGen/CgRetConv.hi-boot b/ghc/compiler/codeGen/CgRetConv.hi-boot deleted file mode 100644 index 9b14f430ab..0000000000 --- a/ghc/compiler/codeGen/CgRetConv.hi-boot +++ /dev/null @@ -1,7 +0,0 @@ -_interface_ CgRetConv 1 -_exports_ -CgRetConv CtrlReturnConvention(VectoredReturn UnvectoredReturn) ctrlReturnConvAlg; -_declarations_ -1 data CtrlReturnConvention = VectoredReturn PrelBase.Int | UnvectoredReturn PrelBase.Int; -1 ctrlReturnConvAlg _:_ TyCon.TyCon -> CtrlReturnConvention ;; - diff --git a/ghc/compiler/codeGen/CgRetConv.lhs b/ghc/compiler/codeGen/CgRetConv.lhs deleted file mode 100644 index ecf7d52ae9..0000000000 --- a/ghc/compiler/codeGen/CgRetConv.lhs +++ /dev/null @@ -1,246 +0,0 @@ -% -% (c) The GRASP Project, Glasgow University, 1992-1998 -% -% $Id: CgRetConv.lhs,v 1.34 2003/10/09 11:58:46 simonpj Exp $ -% -\section[CgRetConv]{Return conventions for the code generator} - -The datatypes and functions here encapsulate what there is to know -about return conventions. - -\begin{code} -module CgRetConv ( - CtrlReturnConvention(..), - ctrlReturnConvAlg, - dataReturnConvPrim, - assignRegs, assignAllRegs - ) where - -#include "HsVersions.h" - -import AbsCSyn -- quite a few things -import Constants ( mAX_FAMILY_SIZE_FOR_VEC_RETURNS, - mAX_Vanilla_REG, mAX_Float_REG, - mAX_Double_REG, mAX_Long_REG, - mAX_Real_Vanilla_REG, mAX_Real_Float_REG, - mAX_Real_Double_REG, mAX_Real_Long_REG - ) -import CmdLineOpts ( opt_Unregisterised ) -import Maybes ( mapCatMaybes ) -import PrimRep ( isFloatingRep, PrimRep(..), is64BitRep ) -import TyCon ( TyCon, tyConFamilySize ) -import Util ( isn'tIn ) -import FastTypes -import Outputable -\end{code} - -%************************************************************************ -%* * -\subsection[CgRetConv-possibilities]{Data types that encode possible return conventions} -%* * -%************************************************************************ - -A @CtrlReturnConvention@ says how {\em control} is returned. -\begin{code} -data CtrlReturnConvention - = VectoredReturn Int -- size of the vector table (family size) - | UnvectoredReturn Int -- family size -\end{code} - -%************************************************************************ -%* * -\subsection[CgRetConv-algebraic]{Return conventions for algebraic datatypes} -%* * -%************************************************************************ - -\begin{code} -ctrlReturnConvAlg :: TyCon -> CtrlReturnConvention - -ctrlReturnConvAlg tycon - = case (tyConFamilySize tycon) of - size -> -- we're supposed to know... - if (size > (1::Int) && size <= mAX_FAMILY_SIZE_FOR_VEC_RETURNS) then - VectoredReturn size - else - UnvectoredReturn size - -- NB: unvectored returns Include size 0 (no constructors), so that - -- the following perverse code compiles (it crashed GHC in 5.02) - -- data T1 - -- data T2 = T2 !T1 Int - -- The only value of type T1 is bottom, which never returns anyway. -\end{code} - -%************************************************************************ -%* * -\subsection[CgRetConv-prim]{Return conventions for primitive datatypes} -%* * -%************************************************************************ - -\begin{code} -dataReturnConvPrim :: PrimRep -> MagicId - -dataReturnConvPrim PtrRep = VanillaReg PtrRep (_ILIT 1) -dataReturnConvPrim IntRep = VanillaReg IntRep (_ILIT 1) -dataReturnConvPrim WordRep = VanillaReg WordRep (_ILIT 1) -dataReturnConvPrim Int32Rep = VanillaReg Int32Rep (_ILIT 1) -dataReturnConvPrim Word32Rep = VanillaReg Word32Rep (_ILIT 1) -dataReturnConvPrim Int64Rep = LongReg Int64Rep (_ILIT 1) -dataReturnConvPrim Word64Rep = LongReg Word64Rep (_ILIT 1) -dataReturnConvPrim AddrRep = VanillaReg AddrRep (_ILIT 1) -dataReturnConvPrim CharRep = VanillaReg CharRep (_ILIT 1) -dataReturnConvPrim Int8Rep = VanillaReg Int8Rep (_ILIT 1) -dataReturnConvPrim FloatRep = FloatReg (_ILIT 1) -dataReturnConvPrim DoubleRep = DoubleReg (_ILIT 1) -dataReturnConvPrim StablePtrRep = VanillaReg StablePtrRep (_ILIT 1) -dataReturnConvPrim VoidRep = VoidReg - -#ifdef DEBUG -dataReturnConvPrim rep = pprPanic "dataReturnConvPrim:" (ppr rep) -#endif -\end{code} - -%************************************************************************ -%* * -\subsubsection[CgRetConv-regs]{Register assignment} -%* * -%************************************************************************ - -How to assign registers for - - 1) Calling a fast entry point. - 2) Returning an unboxed tuple. - 3) Invoking an out-of-line PrimOp. - -Registers are assigned in order. - -If we run out, we don't attempt to assign any further registers (even -though we might have run out of only one kind of register); we just -return immediately with the left-overs specified. - -The alternative version @assignAllRegs@ uses the complete set of -registers, including those that aren't mapped to real machine -registers. This is used for calling special RTS functions and PrimOps -which expect their arguments to always be in the same registers. - -\begin{code} -assignRegs, assignAllRegs - :: [MagicId] -- Unavailable registers - -> [PrimRep] -- Arg or result kinds to assign - -> ([MagicId], -- Register assignment in same order - -- for *initial segment of* input list - [PrimRep])-- leftover kinds - -assignRegs regs_in_use kinds - = assign_reg kinds [] (mkRegTbl regs_in_use) - -assignAllRegs regs_in_use kinds - = assign_reg kinds [] (mkRegTbl_allRegs regs_in_use) - -assign_reg - :: [PrimRep] -- arg kinds being scrutinized - -> [MagicId] -- accum. regs assigned so far (reversed) - -> AvailRegs -- regs still avail: Vanilla, Float, Double, longs - -> ([MagicId], [PrimRep]) - -assign_reg (VoidRep:ks) acc supply - = assign_reg ks (VoidReg:acc) supply - -- one VoidReg is enough for everybody! - -assign_reg (FloatRep:ks) acc (vanilla_rs, f:float_rs, double_rs, long_rs) - = assign_reg ks (FloatReg (iUnbox f):acc) - (vanilla_rs, float_rs, double_rs, long_rs) - -assign_reg (DoubleRep:ks) acc (vanilla_rs, float_rs, d:double_rs, long_rs) - = assign_reg ks (DoubleReg (iUnbox d):acc) - (vanilla_rs, float_rs, double_rs, long_rs) - -assign_reg (Word64Rep:ks) acc (vanilla_rs, float_rs, double_rs, u:long_rs) - = assign_reg ks (LongReg Word64Rep (iUnbox u):acc) - (vanilla_rs, float_rs, double_rs, long_rs) - -assign_reg (Int64Rep:ks) acc (vanilla_rs, float_rs, double_rs, l:long_rs) - = assign_reg ks (LongReg Int64Rep (iUnbox l):acc) - (vanilla_rs, float_rs, double_rs, long_rs) - -assign_reg (k:ks) acc (v:vanilla_rs, float_rs, double_rs, long_rs) - | not (isFloatingRep k || is64BitRep k) - = assign_reg ks (VanillaReg k (iUnbox v):acc) - (vanilla_rs, float_rs, double_rs, long_rs) - --- The catch-all. It can happen because either --- (a) we've assigned all the regs so leftover_ks is [] --- (b) we couldn't find a spare register in the appropriate supply --- or, I suppose, --- (c) we came across a Kind we couldn't handle (this one shouldn't happen) -assign_reg leftover_ks acc _ = (reverse acc, leftover_ks) - -\end{code} - -Register supplies. Vanilla registers can contain pointers, Ints, Chars. -Floats and doubles have separate register supplies. - -We take these register supplies from the *real* registers, i.e. those -that are guaranteed to map to machine registers. - -\begin{code} -useVanillaRegs | opt_Unregisterised = 0 - | otherwise = mAX_Real_Vanilla_REG -useFloatRegs | opt_Unregisterised = 0 - | otherwise = mAX_Real_Float_REG -useDoubleRegs | opt_Unregisterised = 0 - | otherwise = mAX_Real_Double_REG -useLongRegs | opt_Unregisterised = 0 - | otherwise = mAX_Real_Long_REG - -vanillaRegNos, floatRegNos, doubleRegNos, longRegNos :: [Int] -vanillaRegNos = regList useVanillaRegs -floatRegNos = regList useFloatRegs -doubleRegNos = regList useDoubleRegs -longRegNos = regList useLongRegs - -allVanillaRegNos, allFloatRegNos, allDoubleRegNos, allLongRegNos :: [Int] -allVanillaRegNos = regList mAX_Vanilla_REG -allFloatRegNos = regList mAX_Float_REG -allDoubleRegNos = regList mAX_Double_REG -allLongRegNos = regList mAX_Long_REG - -regList 0 = [] -regList n = [1 .. n] - -type AvailRegs = ( [Int] -- available vanilla regs. - , [Int] -- floats - , [Int] -- doubles - , [Int] -- longs (int64 and word64) - ) - -mkRegTbl :: [MagicId] -> AvailRegs -mkRegTbl regs_in_use - = mkRegTbl' regs_in_use vanillaRegNos floatRegNos doubleRegNos longRegNos - -mkRegTbl_allRegs :: [MagicId] -> AvailRegs -mkRegTbl_allRegs regs_in_use - = mkRegTbl' regs_in_use allVanillaRegNos allFloatRegNos allDoubleRegNos allLongRegNos - -mkRegTbl' regs_in_use vanillas floats doubles longs - = (ok_vanilla, ok_float, ok_double, ok_long) - where - ok_vanilla = mapCatMaybes (select (VanillaReg VoidRep)) vanillas - ok_float = mapCatMaybes (select FloatReg) floats - ok_double = mapCatMaybes (select DoubleReg) doubles - ok_long = mapCatMaybes (select (LongReg Int64Rep)) longs - -- rep isn't looked at, hence we can use any old rep. - - select :: (FastInt -> MagicId) -> Int{-cand-} -> Maybe Int - -- one we've unboxed the Int, we make a MagicId - -- and see if it is already in use; if not, return its number. - - select mk_reg_fun cand - = let - reg = mk_reg_fun (iUnbox cand) - in - if reg `not_elem` regs_in_use - then Just cand - else Nothing - where - not_elem = isn'tIn "mkRegTbl" -\end{code} diff --git a/ghc/compiler/codeGen/CgStackery.lhs b/ghc/compiler/codeGen/CgStackery.lhs index 4b1b414064..206dcc2153 100644 --- a/ghc/compiler/codeGen/CgStackery.lhs +++ b/ghc/compiler/codeGen/CgStackery.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgStackery.lhs,v 1.24 2003/11/17 14:42:47 simonmar Exp $ +% $Id: CgStackery.lhs,v 1.25 2004/08/13 13:06:12 simonmar Exp $ % \section[CgStackery]{Stack management functions} @@ -10,33 +10,92 @@ Stack-twiddling operations, which are pretty low-down and grimy. \begin{code} module CgStackery ( + spRel, getVirtSp, getRealSp, setRealSp, + setRealAndVirtualSp, getSpRelOffset, + allocPrimStack, allocStackTop, deAllocStackTop, adjustStackHW, getFinalStackHW, setStackFrame, getStackFrame, mkVirtStkOffsets, mkStkAmodes, - freeStackSlots, dataStackSlots, - updateFrameSize, - constructSlowCall, slowArgs, + freeStackSlots, + pushUpdateFrame, emitPushUpdateFrame, ) where #include "HsVersions.h" import CgMonad -import AbsCSyn -import CLabel ( mkRtsApplyInfoLabel, mkRtsApplyEntryLabel ) - -import CgUsages ( getRealSp ) -import AbsCUtils ( mkAbstractCs, getAmodeRep ) -import PrimRep -import CmdLineOpts ( opt_SccProfilingOn, opt_GranMacros ) +import CgUtils ( cmmOffsetB, cmmRegOffW ) +import CgProf ( initUpdFrameProf ) +import SMRep +import Cmm +import CmmUtils ( CmmStmts, mkLblExpr ) +import CLabel ( mkUpdInfoLabel ) import Constants import Util ( sortLt ) import FastString ( LitString ) -import Panic - -import TRACE ( trace ) +import OrdList ( toOL ) +import Outputable +\end{code} + +%************************************************************************ +%* * +\subsection[CgUsages-stackery]{Monad things for fiddling with stack usage} +%* * +%************************************************************************ + +spRel is a little function that abstracts the stack direction. Note that most +of the code generator is dependent on the stack direction anyway, so +changing this on its own spells certain doom. ToDo: remove? + + THIS IS DIRECTION SENSITIVE! + +Stack grows down, positive virtual offsets correspond to negative +additions to the stack pointer. + +\begin{code} +spRel :: VirtualSpOffset -- virtual offset of Sp + -> VirtualSpOffset -- virtual offset of The Thing + -> WordOff -- integer offset +spRel sp off = sp - off +\end{code} + +@setRealAndVirtualSp@ sets into the environment the offsets of the +current position of the real and virtual stack pointers in the current +stack frame. The high-water mark is set too. It generates no code. +It is used to initialise things at the beginning of a closure body. + +\begin{code} +setRealAndVirtualSp :: VirtualSpOffset -- New real Sp + -> Code + +setRealAndVirtualSp new_sp + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg {virtSp = new_sp, + realSp = new_sp, + hwSp = new_sp}) } + +getVirtSp :: FCode VirtualSpOffset +getVirtSp + = do { stk_usg <- getStkUsage + ; return (virtSp stk_usg) } + +getRealSp :: FCode VirtualSpOffset +getRealSp + = do { stk_usg <- getStkUsage + ; return (realSp stk_usg) } + +setRealSp :: VirtualSpOffset -> Code +setRealSp new_real_sp + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg {realSp = new_real_sp}) } + +getSpRelOffset :: VirtualSpOffset -> FCode CmmExpr +getSpRelOffset virtual_offset + = do { real_sp <- getRealSp + ; return (cmmRegOffW spReg (spRel real_sp virtual_offset)) } \end{code} + %************************************************************************ %* * \subsection[CgStackery-layout]{Laying out a stack frame} @@ -50,24 +109,22 @@ increase towards the top of stack). \begin{code} mkVirtStkOffsets :: VirtualSpOffset -- Offset of the last allocated thing - -> (a -> PrimRep) -- to be able to grab kinds - -> [a] -- things to make offsets for + -> [(CgRep,a)] -- things to make offsets for -> (VirtualSpOffset, -- OUTPUTS: Topmost allocated word - [(a, VirtualSpOffset)]) -- things with offsets + [(a, VirtualSpOffset)]) -- things with offsets (voids filtered out) -mkVirtStkOffsets init_Sp_offset kind_fun things +mkVirtStkOffsets init_Sp_offset things = loop init_Sp_offset [] (reverse things) where loop offset offs [] = (offset,offs) - loop offset offs (t:things) = - let - size = getPrimRepSize (kind_fun t) - thing_slot = offset + size - in - loop thing_slot ((t,thing_slot):offs) things - -- offset of thing is offset+size, because we're growing the stack - -- *downwards* as the offsets increase. - + loop offset offs ((VoidArg,t):things) = loop offset offs things + -- ignore Void arguments + loop offset offs ((rep,t):things) + = loop thing_slot ((t,thing_slot):offs) things + where + thing_slot = offset + cgRepSizeW rep + -- offset of thing is offset+size, because we're + -- growing the stack *downwards* as the offsets increase. -- | 'mkStkAmodes' is a higher-level version of -- 'mkVirtStkOffsets'. It starts from the tail-call locations. @@ -77,87 +134,17 @@ mkVirtStkOffsets init_Sp_offset kind_fun things mkStkAmodes :: VirtualSpOffset -- Tail call positions - -> [CAddrMode] -- things to make offsets for + -> [(CgRep,CmmExpr)] -- things to make offsets for -> FCode (VirtualSpOffset, -- OUTPUTS: Topmost allocated word - AbstractC) -- Assignments to appropriate stk slots + CmmStmts) -- Assignments to appropriate stk slots mkStkAmodes tail_Sp things - = getRealSp `thenFC` \ realSp -> - let - (last_Sp_offset, offsets) = mkVirtStkOffsets tail_Sp getAmodeRep things - - abs_cs = - [ CAssign (CVal (spRel realSp offset) (getAmodeRep thing)) thing - | (thing, offset) <- offsets - ] - in - returnFC (last_Sp_offset, mkAbstractCs abs_cs) -\end{code} - -%************************************************************************ -%* * -\subsection{Pushing the arguments for a slow call} -%* * -%************************************************************************ - -For a slow call, we must take a bunch of arguments and intersperse -some stg_ap_<pattern>_ret_info return addresses. - -\begin{code} -constructSlowCall :: [CAddrMode] -> (CAddrMode, [CAddrMode]) - -- don't forget the zero case -constructSlowCall [] = (CLbl stg_ap_0 CodePtrRep , []) -constructSlowCall amodes = - -- traceSlowCall amodes $ - (CLbl lbl CodePtrRep, these ++ slowArgs rest) - where (tag, these, rest) = matchSlowPattern amodes - lbl = mkRtsApplyEntryLabel tag - -stg_ap_0 = mkRtsApplyEntryLabel SLIT("0") - --- | 'slowArgs' takes a list of function arguments and prepares them for --- pushing on the stack for "extra" arguments to a function which requires --- fewer arguments than we currently have. -slowArgs :: [CAddrMode] -> [CAddrMode] -slowArgs [] = [] -slowArgs amodes = CLbl lbl RetRep : args ++ slowArgs rest - where (tag, args, rest) = matchSlowPattern amodes - lbl = mkRtsApplyInfoLabel tag - -matchSlowPattern :: [CAddrMode] -> (LitString, [CAddrMode], [CAddrMode]) -matchSlowPattern amodes = (tag, these, rest) - where reps = map getAmodeRep amodes - (tag, n) = findMatch (map primRepToArgRep reps) - (these, rest) = splitAt n amodes - --- These cases were found to cover about 99% of all slow calls: -findMatch (RepP: RepP: RepP: RepP: RepP: RepP: RepP: _) = (SLIT("ppppppp"), 7) -findMatch (RepP: RepP: RepP: RepP: RepP: RepP: _) = (SLIT("pppppp"), 6) -findMatch (RepP: RepP: RepP: RepP: RepP: _) = (SLIT("ppppp"), 5) -findMatch (RepP: RepP: RepP: RepP: _) = (SLIT("pppp"), 4) -findMatch (RepP: RepP: RepP: _) = (SLIT("ppp"), 3) -findMatch (RepP: RepP: RepV: _) = (SLIT("ppv"), 3) -findMatch (RepP: RepP: _) = (SLIT("pp"), 2) -findMatch (RepP: RepV: _) = (SLIT("pv"), 2) -findMatch (RepP: _) = (SLIT("p"), 1) -findMatch (RepV: _) = (SLIT("v"), 1) -findMatch (RepN: _) = (SLIT("n"), 1) -findMatch (RepF: _) = (SLIT("f"), 1) -findMatch (RepD: _) = (SLIT("d"), 1) -findMatch (RepL: _) = (SLIT("l"), 1) -findMatch _ = panic "CgStackery.findMatch" - -#ifdef DEBUG -primRepChar p | isFollowableRep p = 'p' -primRepChar VoidRep = 'v' -primRepChar FloatRep = 'f' -primRepChar DoubleRep = 'd' -primRepChar p | getPrimRepSize p == 1 = 'n' -primRepChar p | is64BitRep p = 'l' - -traceSlowCall amodes and_then - = trace ("call: " ++ map primRepChar (map getAmodeRep amodes)) and_then -#endif + = do { rSp <- getRealSp + ; let (last_Sp_offset, offsets) = mkVirtStkOffsets tail_Sp things + abs_cs = [ CmmStore (cmmRegOffW spReg (spRel rSp offset)) amode + | (amode, offset) <- offsets + ] + ; returnFC (last_Sp_offset, toOL abs_cs) } \end{code} %************************************************************************ @@ -169,108 +156,150 @@ traceSlowCall amodes and_then Allocate a virtual offset for something. \begin{code} -allocPrimStack :: Int -> FCode VirtualSpOffset -allocPrimStack size = do - ((virt_sp, frame, free_stk, real_sp, hw_sp),h_usage) <- getUsage - let push_virt_sp = virt_sp + size - let (chosen_slot, new_stk_usage) = - case find_block free_stk of - Nothing -> (push_virt_sp, - (push_virt_sp, frame, free_stk, real_sp, - hw_sp `max` push_virt_sp)) +allocPrimStack :: CgRep -> FCode VirtualSpOffset +allocPrimStack rep + = do { stk_usg <- getStkUsage + ; let free_stk = freeStk stk_usg + ; case find_block free_stk of + Nothing -> do + { let push_virt_sp = virtSp stk_usg + size + ; setStkUsage (stk_usg { virtSp = push_virt_sp, + hwSp = hwSp stk_usg `max` push_virt_sp }) -- Adjust high water mark - Just slot -> (slot, - (virt_sp, frame, - delete_block free_stk slot, - real_sp, hw_sp)) - setUsage (new_stk_usage, h_usage) - return chosen_slot - - where - -- find_block looks for a contiguous chunk of free slots - find_block :: [(VirtualSpOffset,Slot)] -> Maybe VirtualSpOffset - find_block [] = Nothing - find_block ((off,free):slots) - | take size ((off,free):slots) == - zip [off..top_slot] (repeat Free) = Just top_slot - | otherwise = find_block slots - -- The stack grows downwards, with increasing virtual offsets. - -- Therefore, the address of a multi-word object is the *highest* - -- virtual offset it occupies (top_slot below). - where top_slot = off+size-1 - - delete_block free_stk slot = [ (s,f) | (s,f) <- free_stk, - (s<=slot-size) || (s>slot) ] - -- Retain slots which are not in the range - -- slot-size+1..slot + ; return push_virt_sp } + Just slot -> do + { setStkUsage (stk_usg { freeStk = delete_block free_stk slot }) + ; return slot } + } + where + size :: WordOff + size = cgRepSizeW rep + + -- Find_block looks for a contiguous chunk of free slots + -- returning the offset of its topmost word + find_block :: [VirtualSpOffset] -> Maybe VirtualSpOffset + find_block [] = Nothing + find_block (slot:slots) + | take size (slot:slots) == [slot..top_slot] + = Just top_slot + | otherwise + = find_block slots + where -- The stack grows downwards, with increasing virtual offsets. + -- Therefore, the address of a multi-word object is the *highest* + -- virtual offset it occupies (top_slot below). + top_slot = slot+size-1 + + delete_block free_stk slot = [ s | s <- free_stk, + (s<=slot-size) || (s>slot) ] + -- Retain slots which are not in the range + -- slot-size+1..slot \end{code} Allocate a chunk ON TOP OF the stack. -ToDo: should really register this memory as NonPointer stuff in the -free list. - \begin{code} -allocStackTop :: Int -> FCode VirtualSpOffset -allocStackTop size = do - ((virt_sp, frame, free_stk, real_sp, hw_sp), h_usage) <- getUsage - let push_virt_sp = virt_sp + size - let new_stk_usage = (push_virt_sp, frame, free_stk, real_sp, - hw_sp `max` push_virt_sp) - setUsage (new_stk_usage, h_usage) - return push_virt_sp +allocStackTop :: WordOff -> FCode VirtualSpOffset +allocStackTop size + = do { stk_usg <- getStkUsage + ; let push_virt_sp = virtSp stk_usg + size + ; setStkUsage (stk_usg { virtSp = push_virt_sp, + hwSp = hwSp stk_usg `max` push_virt_sp }) + ; return push_virt_sp } \end{code} Pop some words from the current top of stack. This is used for de-allocating the return address in a case alternative. \begin{code} -deAllocStackTop :: Int -> FCode VirtualSpOffset -deAllocStackTop size = do - ((virt_sp, frame, free_stk, real_sp, hw_sp), h_usage) <- getUsage - let pop_virt_sp = virt_sp - size - let new_stk_usage = (pop_virt_sp, frame, free_stk, real_sp, hw_sp) - setUsage (new_stk_usage, h_usage) - return pop_virt_sp +deAllocStackTop :: WordOff -> FCode VirtualSpOffset +deAllocStackTop size + = do { stk_usg <- getStkUsage + ; let pop_virt_sp = virtSp stk_usg - size + ; setStkUsage (stk_usg { virtSp = pop_virt_sp }) + ; return pop_virt_sp } \end{code} \begin{code} adjustStackHW :: VirtualSpOffset -> Code -adjustStackHW offset = do - ((vSp,fTop,fSp,realSp,hwSp), h_usage) <- getUsage - setUsage ((vSp, fTop, fSp, realSp, max offset hwSp), h_usage) +adjustStackHW offset + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg { hwSp = hwSp stk_usg `max` offset }) } \end{code} A knot-tying beast. \begin{code} getFinalStackHW :: (VirtualSpOffset -> Code) -> Code -getFinalStackHW fcode = do - fixC (\hwSp -> do - fcode hwSp - ((_,_,_,_, hwSp),_) <- getUsage - return hwSp) - return () +getFinalStackHW fcode + = do { fixC (\hw_sp -> do + { fcode hw_sp + ; stk_usg <- getStkUsage + ; return (hwSp stk_usg) }) + ; return () } \end{code} \begin{code} setStackFrame :: VirtualSpOffset -> Code -setStackFrame offset = do - ((vSp,_,fSp,realSp,hwSp), h_usage) <- getUsage - setUsage ((vSp, offset, fSp, realSp, hwSp), h_usage) +setStackFrame offset + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg { frameSp = offset }) } getStackFrame :: FCode VirtualSpOffset -getStackFrame = do - ((vSp,frame,fSp,realSp,hwSp), h_usage) <- getUsage - return frame +getStackFrame + = do { stk_usg <- getStkUsage + ; return (frameSp stk_usg) } \end{code} + +%******************************************************** +%* * +%* Setting up update frames * +%* * +%******************************************************** + +@pushUpdateFrame@ $updatee$ pushes a general update frame which +points to $updatee$ as the thing to be updated. It is only used +when a thunk has just been entered, so the (real) stack pointers +are guaranteed to be nicely aligned with the top of stack. +@pushUpdateFrame@ adjusts the virtual and tail stack pointers +to reflect the frame pushed. + \begin{code} -updateFrameSize | opt_SccProfilingOn = pROF_UF_SIZE - | opt_GranMacros = trace ("updateFrameSize = " ++ (show gRAN_UF_SIZE))gRAN_UF_SIZE - | otherwise = uF_SIZE +pushUpdateFrame :: CmmExpr -> Code -> Code + +pushUpdateFrame updatee code + = do { +#ifdef DEBUG + EndOfBlockInfo _ sequel <- getEndOfBlockInfo ; + ASSERT(case sequel of { OnStack -> True; _ -> False}) +#endif + + allocStackTop (fixedHdrSize + + sIZEOF_StgUpdateFrame_NoHdr `quot` wORD_SIZE) + ; vsp <- getVirtSp + ; setStackFrame vsp + ; frame_addr <- getSpRelOffset vsp + -- The location of the lowest-address + -- word of the update frame itself + + ; setEndOfBlockInfo (EndOfBlockInfo vsp UpdateCode) $ + do { emitPushUpdateFrame frame_addr updatee + ; code } + } + +emitPushUpdateFrame :: CmmExpr -> CmmExpr -> Code +emitPushUpdateFrame frame_addr updatee = do + stmtsC [ -- Set the info word + CmmStore frame_addr (mkLblExpr mkUpdInfoLabel) + , -- And the updatee + CmmStore (cmmOffsetB frame_addr off_updatee) updatee ] + initUpdFrameProf frame_addr + +off_updatee :: ByteOff +off_updatee = fixedHdrSize*wORD_SIZE + oFFSET_StgUpdateFrame_updatee \end{code} + %************************************************************************ %* * \subsection[CgStackery-free]{Free stack slots} @@ -280,50 +309,31 @@ updateFrameSize | opt_SccProfilingOn = pROF_UF_SIZE Explicitly free some stack space. \begin{code} -addFreeStackSlots :: [VirtualSpOffset] -> Slot -> Code -addFreeStackSlots extra_free slot = do - ((vsp, frame,free, real, hw),heap_usage) <- getUsage - let all_free = addFreeSlots free (zip (sortLt (<) extra_free) (repeat slot)) - let (new_vsp, new_free) = trim vsp all_free - let new_usage = ((new_vsp, frame, new_free, real, hw), heap_usage) - setUsage new_usage - freeStackSlots :: [VirtualSpOffset] -> Code -freeStackSlots slots = addFreeStackSlots slots Free - -dataStackSlots :: [VirtualSpOffset] -> Code -dataStackSlots slots = addFreeStackSlots slots NonPointer - -addFreeSlots :: [(Int,Slot)] -> [(Int,Slot)] -> [(Int,Slot)] +freeStackSlots extra_free + = do { stk_usg <- getStkUsage + ; let all_free = addFreeSlots (freeStk stk_usg) (sortLt (<) extra_free) + ; let (new_vsp, new_free) = trim (virtSp stk_usg) all_free + ; setStkUsage (stk_usg { virtSp = new_vsp, freeStk = new_free }) } + +addFreeSlots :: [VirtualSpOffset] -> [VirtualSpOffset] -> [VirtualSpOffset] +-- Merge the two, assuming both are in increasing order addFreeSlots cs [] = cs addFreeSlots [] ns = ns -addFreeSlots ((c,s):cs) ((n,s'):ns) - = if c < n then - (c,s) : addFreeSlots cs ((n,s'):ns) - else if c > n then - (n,s') : addFreeSlots ((c,s):cs) ns - else if s /= s' then -- c == n - (c,s') : addFreeSlots cs ns - else - panic ("addFreeSlots: equal slots: " ++ show (c:map fst cs) - ++ show (n:map fst ns)) - -trim :: Int{-offset-} -> [(Int,Slot)] -> (Int{-offset-}, [(Int,Slot)]) -trim current_sp free_slots - = try current_sp free_slots - where - try csp [] = (csp,[]) - - try csp (slot@(off,state):slots) = - if state == Free && null slots' then - if csp' < off then - (csp', []) - else if csp' == off then - (csp'-1, []) - else - (csp',[slot]) - else - (csp', slot:slots') - where - (csp',slots') = try csp slots +addFreeSlots (c:cs) (n:ns) + | c < n = c : addFreeSlots cs (n:ns) + | otherwise = n : addFreeSlots (c:cs) ns + +trim :: VirtualSpOffset -> [VirtualSpOffset] -> (VirtualSpOffset, [VirtualSpOffset]) +-- Try to trim back the virtual stack pointer, where there is a +-- continuous bunch of free slots at the end of the free list +trim vsp [] = (vsp, []) +trim vsp (slot:slots) + = case trim vsp slots of + (vsp', []) + | vsp' < slot -> pprTrace "trim: strange" (ppr vsp <+> ppr (slot:slots)) + (vsp', []) + | vsp' == slot -> (vsp'-1, []) + | otherwise -> (vsp', [slot]) + (vsp', slots') -> (vsp', slot:slots') \end{code} diff --git a/ghc/compiler/codeGen/CgTailCall.lhs b/ghc/compiler/codeGen/CgTailCall.lhs index 9d5118a77d..982891b2f7 100644 --- a/ghc/compiler/codeGen/CgTailCall.lhs +++ b/ghc/compiler/codeGen/CgTailCall.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgTailCall.lhs,v 1.38 2003/06/02 13:27:34 simonpj Exp $ +% $Id: CgTailCall.lhs,v 1.39 2004/08/13 13:06:13 simonmar Exp $ % %******************************************************** %* * @@ -13,9 +13,9 @@ module CgTailCall ( cgTailCall, performTailCall, performReturn, performPrimReturn, - mkStaticAlgReturnCode, mkDynamicAlgReturnCode, + emitKnownConReturnCode, emitAlgReturnCode, returnUnboxedTuple, ccallReturnUnboxedTuple, - mkPrimReturnCode, + pushUnboxedTuple, tailCallPrimOp, pushReturnAddress @@ -24,31 +24,31 @@ module CgTailCall ( #include "HsVersions.h" import CgMonad -import CgBindery ( getArgAmodes, getCAddrMode, getCAddrModeAndInfo ) -import CgRetConv -import CgStackery -import CgUsages ( getSpRelOffset, adjustSpAndHp ) +import CgBindery ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape, + idInfoToAmode, cgIdInfoId, cgIdInfoLF, + cgIdInfoArgRep ) +import CgInfoTbls ( entryCode, emitDirectReturnInstr, dataConTagZ, + emitVectoredReturnInstr, closureInfoPtr ) +import CgCallConv +import CgStackery ( setRealSp, mkStkAmodes, adjustStackHW, + getSpRelOffset ) +import CgHeapery ( setRealHp, getHpRelOffset ) +import CgUtils ( emitSimultaneously ) +import CgTicky import ClosureInfo - -import AbsCUtils ( mkAbstractCs, getAmodeRep ) -import AbsCSyn -import CLabel ( mkRtsPrimOpLabel, mkSeqInfoLabel ) - -import Id ( Id, idType, idName ) -import DataCon ( DataCon, dataConTyCon, dataConTag, fIRST_TAG ) -import PrimRep ( PrimRep(..) ) -import StgSyn ( StgArg ) +import SMRep ( CgRep, isVoidArg, separateByPtrFollowness ) +import Cmm +import CmmUtils +import CLabel ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel ) import Type ( isUnLiftedType ) -import Name ( Name ) +import Id ( Id, idName, idUnique, idType ) +import DataCon ( DataCon, dataConTyCon ) +import StgSyn ( StgArg ) import TyCon ( TyCon ) import PrimOp ( PrimOp ) -import Util ( zipWithEqual, splitAtList ) -import ListSetOps ( assocMaybe ) -import PrimRep ( isFollowableRep ) import Outputable -import Panic ( panic, assertPanic ) -import List ( partition ) +import Monad ( when ) ----------------------------------------------------------------------------- -- Tail Calls @@ -75,339 +75,205 @@ cgTailCall :: Id -> [StgArg] -> Code -- Treat unboxed locals exactly like literals (above) except use the addr -- mode for the local instead of (CLit lit) in the assignment. --- Case for unboxed returns first: -cgTailCall fun [] - | isUnLiftedType (idType fun) - = getCAddrMode fun `thenFC` \ amode -> - performPrimReturn (ppr fun) amode - --- The general case (@fun@ is boxed): cgTailCall fun args - = getCAddrModeAndInfo fun `thenFC` \ (fun', fun_amode, lf_info) -> - getArgAmodes args `thenFC` \ arg_amodes -> - performTailCall fun' fun_amode lf_info arg_amodes AbsCNop - + = do { fun_info <- getCgIdInfo fun + + ; if isUnLiftedType (idType fun) + then -- Primitive return + ASSERT( null args ) + do { fun_amode <- idInfoToAmode fun_info + ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } + + else -- Normal case, fun is boxed + do { arg_amodes <- getArgAmodes args + ; performTailCall fun_info arg_amodes noStmts } + } + -- ----------------------------------------------------------------------------- -- The guts of a tail-call performTailCall - :: Id -- function - -> CAddrMode -- function amode - -> LambdaFormInfo - -> [CAddrMode] - -> AbstractC -- Pending simultaneous assignments - -- *** GUARANTEED to contain only stack assignments. + :: CgIdInfo -- The function + -> [(CgRep,CmmExpr)] -- Args + -> CmmStmts -- Pending simultaneous assignments + -- *** GUARANTEED to contain only stack assignments. -> Code -performTailCall fun fun_amode lf_info arg_amodes pending_assts = - nodeMustPointToIt lf_info `thenFC` \ node_points -> - let - -- assign to node if necessary - node_asst - | node_points = CAssign (CReg node) fun_amode - | otherwise = AbsCNop - in - - getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - - let - -- set up for a let-no-escape if necessary - join_sp = case fun_amode of - CJoinPoint sp -> sp - other -> args_sp - in - - -- decide how to code the tail-call: which registers assignments to make, - -- what args to push on the stack, and how to make the jump - constructTailCall (idName fun) lf_info arg_amodes join_sp - node_points fun_amode sequel - `thenFC` \ (final_sp, arg_assts, jump_code) -> - - let sim_assts = mkAbstractCs [node_asst, - pending_assts, - arg_assts] - - is_lne = case fun_amode of { CJoinPoint _ -> True; _ -> False } - in - - doFinalJump final_sp sim_assts is_lne (const jump_code) - - --- Figure out how to do a particular tail-call. - -constructTailCall - :: Name - -> LambdaFormInfo - -> [CAddrMode] - -> VirtualSpOffset -- Sp at which to make the call - -> Bool -- node points to the fun closure? - -> CAddrMode -- addressing mode of the function - -> Sequel -- the sequel, in case we need it - -> FCode ( - VirtualSpOffset, -- Sp after pushing the args - AbstractC, -- assignments - Code -- code to do the jump - ) - -constructTailCall name lf_info arg_amodes sp node_points fun_amode sequel = - - getEntryConvention name lf_info (map getAmodeRep arg_amodes) - `thenFC` \ entry_conv -> - - case entry_conv of - EnterIt -> returnFC (sp, AbsCNop, code) - where code = profCtrC FSLIT("TICK_ENT_VIA_NODE") [] `thenC` - absC (CJump (CMacroExpr CodePtrRep ENTRY_CODE - [CVal (nodeRel 0) DataPtrRep])) - - -- A function, but we have zero arguments. It is already in WHNF, - -- so we can just return it. - ReturnIt -> returnFC (sp, asst, code) - where -- if node doesn't already point to the closure, we have to - -- load it up. - asst | node_points = AbsCNop - | otherwise = CAssign (CReg node) fun_amode - - code = sequelToAmode sequel `thenFC` \ dest_amode -> - absC (CReturn dest_amode DirectReturn) - - JumpToIt lbl -> returnFC (sp, AbsCNop, code) - where code = absC (CJump (CLbl lbl CodePtrRep)) - - -- a slow function call via the RTS apply routines - SlowCall -> - let (apply_fn, new_amodes) = constructSlowCall arg_amodes - - -- if node doesn't already point to the closure, - -- we have to load it up. - node_asst | node_points = AbsCNop - | otherwise = CAssign (CReg node) fun_amode - in - - -- Fill in all the arguments on the stack - mkStkAmodes sp new_amodes `thenFC` - \ (final_sp, stk_assts) -> - - returnFC - (final_sp + 1, -- add one, because the stg_ap functions - -- expect there to be a free slot on the stk - mkAbstractCs [node_asst, stk_assts], - absC (CJump apply_fn) - ) - - -- A direct function call (possibly with some left-over arguments) - DirectEntry lbl arity regs - - -- A let-no-escape is slightly different, because we +performTailCall fun_info arg_amodes pending_assts + | Just join_sp <- maybeLetNoEscape fun_info + = -- A let-no-escape is slightly different, because we -- arrange the stack arguments into pointers and non-pointers -- to make the heap check easier. The tail-call sequence -- is very similar to returning an unboxed tuple, so we -- share some code. - | is_let_no_escape -> - pushUnboxedTuple sp arg_amodes `thenFC` \ (final_sp, assts) -> - returnFC (final_sp, assts, absC (CJump (CLbl lbl CodePtrRep))) - - - -- A normal fast call - | otherwise -> - let - -- first chunk of args go in registers - (reg_arg_amodes, stk_arg_amodes) = - splitAtList regs arg_amodes - - -- the rest of this function's args go straight on the stack - (stk_args, extra_stk_args) = - splitAt (arity - length regs) stk_arg_amodes - - -- any "extra" arguments are placed in frames on the - -- stack after the other arguments. - slow_stk_args = slowArgs extra_stk_args - - reg_assts - = mkAbstractCs (zipWithEqual "assign_to_reg2" - assign_to_reg regs reg_arg_amodes) + do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes + ; emitSimultaneously (pending_assts `plusStmts` arg_assts) + ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info)) + ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) } + + | otherwise + = do { fun_amode <- idInfoToAmode fun_info + ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode) + opt_node_asst | nodeMustPointToIt lf_info = node_asst + | otherwise = noStmts + ; EndOfBlockInfo sp _ <- getEndOfBlockInfo + + ; case (getCallMethod fun_name lf_info (length arg_amodes)) of + + -- Node must always point to things we enter + EnterIt -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) + ; doFinalJump sp False (stmtC (CmmJump target [])) } + + -- A function, but we have zero arguments. It is already in WHNF, + -- so we can just return it. + -- As with any return, Node must point to it. + ReturnIt -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; doFinalJump sp False emitDirectReturnInstr } + + -- A real constructor. Don't bother entering it, + -- just do the right sort of return instead. + -- As with any return, Node must point to it. + ReturnCon con -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; doFinalJump sp False (emitKnownConReturnCode con) } + + JumpToIt lbl -> do + { emitSimultaneously (opt_node_asst `plusStmts` pending_assts) + ; doFinalJump sp False (jumpToLbl lbl) } + + -- A slow function call via the RTS apply routines + -- Node must definitely point to the thing + SlowCall -> do + { let (apply_lbl, new_amodes) = constructSlowCall arg_amodes - in - mkStkAmodes sp (stk_args ++ slow_stk_args) `thenFC` - \ (final_sp, stk_assts) -> + -- Fill in all the arguments on the stack + ; (final_sp,stk_assts) <- mkStkAmodes sp new_amodes + + ; emitSimultaneously (node_asst `plusStmts` stk_assts + `plusStmts` pending_assts) + + ; when (not (null arg_amodes)) $ do + { if (isKnownFun lf_info) + then tickyKnownCallTooFewArgs + else tickyUnknownCall + ; tickySlowCallPat (map fst arg_amodes) + } + + ; doFinalJump (final_sp + 1) + -- Add one, because the stg_ap functions + -- expect there to be a free slot on the stk + False (jumpToLbl apply_lbl) + } + + -- A direct function call (possibly with some left-over arguments) + DirectEntry lbl arity -> do + { let + -- The args beyond the arity go straight on the stack + (arity_args, extra_stk_args) = splitAt arity arg_amodes + + -- First chunk of args go in registers + (reg_arg_amodes, stk_args) = assignCallRegs arity_args + + -- Any "extra" arguments are placed in frames on the + -- stack after the other arguments. + slow_stk_args = slowArgs extra_stk_args + + reg_assts = assignToRegs reg_arg_amodes + + ; if null slow_stk_args + then tickyKnownCallExact + else do tickyKnownCallExtraArgs + tickySlowCallPat (map fst extra_stk_args) + + ; (final_sp, stk_assts) <- mkStkAmodes sp + (stk_args ++ slow_stk_args) + + ; emitSimultaneously (opt_node_asst `plusStmts` + reg_assts `plusStmts` + stk_assts `plusStmts` + pending_assts) + + ; doFinalJump final_sp False (jumpToLbl lbl) } + } + where + fun_name = idName (cgIdInfoId fun_info) + lf_info = cgIdInfoLF fun_info - returnFC - (final_sp, - mkAbstractCs [reg_assts, stk_assts], - absC (CJump (CLbl lbl CodePtrRep)) - ) - where is_let_no_escape = case fun_amode of - CJoinPoint _ -> True - _ -> False -- ----------------------------------------------------------------------------- -- The final clean-up before we do a jump at the end of a basic block. -- This code is shared by tail-calls and returns. -doFinalJump :: VirtualSpOffset -> AbstractC -> Bool -> (Sequel -> Code) -> Code -doFinalJump final_sp sim_assts is_let_no_escape jump_code = - - -- adjust the high-water mark if necessary - adjustStackHW final_sp `thenC` +doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code +doFinalJump final_sp is_let_no_escape jump_code + = do { -- Adjust the high-water mark if necessary + adjustStackHW final_sp - -- Do the simultaneous assignments, - absC (CSimultaneous sim_assts) `thenC` - - -- push a return address if necessary (after the assignments + -- Push a return address if necessary (after the assignments -- above, in case we clobber a live stack location) -- -- DONT push the return address when we're about to jump to a -- let-no-escape: the final tail call in the let-no-escape -- will do this. - getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - (if is_let_no_escape then nopC - else pushReturnAddress eob) `thenC` + ; eob <- getEndOfBlockInfo + ; whenC (not is_let_no_escape) (pushReturnAddress eob) - -- Final adjustment of Sp/Hp - adjustSpAndHp final_sp `thenC` + -- Final adjustment of Sp/Hp + ; adjustSpAndHp final_sp - -- and do the jump - jump_code sequel + -- and do the jump + ; jump_code } -- ----------------------------------------------------------------------------- -- A general return (just a special case of doFinalJump, above) -performReturn :: AbstractC -- Simultaneous assignments to perform - -> (Sequel -> Code) -- The code to execute to actually do - -- the return, given an addressing mode - -- for the return address +performReturn :: Code -- The code to execute to actually do the return -> Code -performReturn sim_assts finish_code - = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - doFinalJump args_sp sim_assts False{-not a LNE-} finish_code +performReturn finish_code + = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo + ; doFinalJump args_sp False{-not a LNE-} finish_code } -- ----------------------------------------------------------------------------- -- Primitive Returns - -- Just load the return value into the right register, and return. -performPrimReturn :: SDoc -- Just for debugging (sigh) - -> CAddrMode -- The thing to return +performPrimReturn :: CgRep -> CmmExpr -- The thing to return -> Code - -performPrimReturn doc amode - = let - kind = getAmodeRep amode - ret_reg = dataReturnConvPrim kind - - assign_possibly = case kind of - VoidRep -> AbsCNop - kind -> (CAssign (CReg ret_reg) amode) - in - performReturn assign_possibly (mkPrimReturnCode doc) - -mkPrimReturnCode :: SDoc -- Debugging only - -> Sequel - -> Code -mkPrimReturnCode doc UpdateCode = pprPanic "mkPrimReturnCode: Upd" doc -mkPrimReturnCode doc sequel = sequelToAmode sequel `thenFC` \ dest_amode -> - absC (CReturn dest_amode DirectReturn) - -- Direct, no vectoring +performPrimReturn rep amode + = do { whenC (not (isVoidArg rep)) + (stmtC (CmmAssign ret_reg amode)) + ; performReturn emitDirectReturnInstr } + where + ret_reg = dataReturnConvPrim rep -- ----------------------------------------------------------------------------- -- Algebraic constructor returns -- Constructor is built on the heap; Node is set. --- All that remains is --- (a) to set TagReg, if necessary --- (c) to do the right sort of jump. - -mkStaticAlgReturnCode :: DataCon -- The constructor - -> Sequel -- where to return to - -> Code - -mkStaticAlgReturnCode con sequel - = -- Generate profiling code if necessary - (case return_convention of - VectoredReturn sz -> profCtrC FSLIT("TICK_VEC_RETURN") [mkIntCLit sz] - other -> nopC - ) `thenC` - - -- Set tag if necessary - -- This is done by a macro, because if we are short of registers - -- we don't set TagReg; instead the continuation gets the tag - -- by indexing off the info ptr - (case return_convention of - - UnvectoredReturn no_of_constrs - | no_of_constrs > 1 - -> absC (CMacroStmt SET_TAG [mkIntCLit zero_indexed_tag]) - - other -> nopC - ) `thenC` - - -- Generate the right jump or return - (case sequel of - CaseAlts _ (Just (alts, _)) False -> -- Ho! We know the constructor so - -- we can go right to the alternative - - case assocMaybe alts tag of - Just (alt_absC, join_lbl) -> - absC (CJump (CLbl join_lbl CodePtrRep)) - Nothing -> panic "mkStaticAlgReturnCode: default" - -- The Nothing case should never happen; - -- it's the subject of a wad of special-case - -- code in cgReturnCon - - other -> -- OnStack, or (CaseAlts ret_amode Nothing), - -- or UpdateCode. - sequelToAmode sequel `thenFC` \ ret_amode -> - absC (CReturn ret_amode return_info) - ) +-- All that remains is to do the right sort of jump. - where - tag = dataConTag con - tycon = dataConTyCon con - return_convention = ctrlReturnConvAlg tycon - zero_indexed_tag = tag - fIRST_TAG -- Adjust tag to be zero-indexed - -- cf AbsCUtils.mkAlgAltsCSwitch - - return_info = - case return_convention of - UnvectoredReturn _ -> DirectReturn - VectoredReturn _ -> StaticVectoredReturn zero_indexed_tag - - --- ----------------------------------------------------------------------------- --- Returning an enumerated type from a PrimOp +emitKnownConReturnCode :: DataCon -> Code +emitKnownConReturnCode con + = emitAlgReturnCode (dataConTyCon con) + (CmmLit (mkIntCLit (dataConTagZ con))) + -- emitAlgReturnCode requires zero-indexed tag --- This function is used by PrimOps that return enumerated types (i.e. +emitAlgReturnCode :: TyCon -> CmmExpr -> Code +-- emitAlgReturnCode is used both by emitKnownConReturnCode, +-- and by by PrimOps that return enumerated types (i.e. -- all the comparison operators). - -mkDynamicAlgReturnCode :: TyCon -> CAddrMode -> Sequel -> Code - -mkDynamicAlgReturnCode tycon dyn_tag sequel - = case ctrlReturnConvAlg tycon of - VectoredReturn sz -> - - profCtrC FSLIT("TICK_VEC_RETURN") [mkIntCLit sz] `thenC` - sequelToAmode sequel `thenFC` \ ret_addr -> - absC (CReturn ret_addr (DynamicVectoredReturn dyn_tag)) - - UnvectoredReturn no_of_constrs -> - - -- Set tag if necessary - -- This is done by a macro, because if we are short of registers - -- we don't set TagReg; instead the continuation gets the tag - -- by indexing off the info ptr - (if no_of_constrs > 1 then - absC (CMacroStmt SET_TAG [dyn_tag]) - else - nopC - ) `thenC` - - - sequelToAmode sequel `thenFC` \ ret_addr -> - -- Generate the right jump or return - absC (CReturn ret_addr DirectReturn) +emitAlgReturnCode tycon tag + = do { case ctrlReturnConvAlg tycon of + VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz + ; emitVectoredReturnInstr tag } + UnvectoredReturn _ -> emitDirectReturnInstr + } -- --------------------------------------------------------------------------- @@ -424,59 +290,37 @@ mkDynamicAlgReturnCode tycon dyn_tag sequel -- let-no-escape functions, because they also can't be partially -- applied. -returnUnboxedTuple :: [CAddrMode] -> Code -returnUnboxedTuple amodes = - getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - - profCtrC FSLIT("TICK_RET_UNBOXED_TUP") [mkIntCLit (length amodes)] `thenC` - - pushUnboxedTuple args_sp amodes `thenFC` \ (final_sp, assts) -> - doFinalJump final_sp assts False{-not a LNE-} mkUnboxedTupleReturnCode - - -pushUnboxedTuple - :: VirtualSpOffset -- Sp at which to start pushing - -> [CAddrMode] -- amodes of the components - -> FCode (VirtualSpOffset, -- final Sp - AbstractC) -- assignments (regs+stack) - -pushUnboxedTuple sp amodes = - let - (arg_regs, _leftovers) = assignRegs [] (map getAmodeRep amodes) - - (reg_arg_amodes, stk_arg_amodes) = splitAtList arg_regs amodes - - -- separate the rest of the args into pointers and non-pointers - ( ptr_args, nptr_args ) = - partition (isFollowableRep . getAmodeRep) stk_arg_amodes - - reg_arg_assts - = mkAbstractCs (zipWithEqual "assign_to_reg2" - assign_to_reg arg_regs reg_arg_amodes) - in - - -- push ptrs, then nonptrs, on the stack - mkStkAmodes sp ptr_args `thenFC` \ (ptr_sp, ptr_assts) -> - mkStkAmodes ptr_sp nptr_args `thenFC` \ (final_sp, nptr_assts) -> +returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code +returnUnboxedTuple amodes + = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo + ; tickyUnboxedTupleReturn (length amodes) + ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes + ; emitSimultaneously assts + ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr } + +pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing + -> [(CgRep, CmmExpr)] -- amodes of the components + -> FCode (VirtualSpOffset, -- final Sp + CmmStmts) -- assignments (regs+stack) + +pushUnboxedTuple sp [] + = return (sp, noStmts) +pushUnboxedTuple sp amodes + = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes + + -- separate the rest of the args into pointers and non-pointers + (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes + reg_arg_assts = assignToRegs reg_arg_amodes + + -- push ptrs, then nonptrs, on the stack + ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args + ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args - returnFC (final_sp, - mkAbstractCs [reg_arg_assts, ptr_assts, nptr_assts]) + ; returnFC (final_sp, + reg_arg_assts `plusStmts` + ptr_assts `plusStmts` nptr_assts) } - -mkUnboxedTupleReturnCode :: Sequel -> Code -mkUnboxedTupleReturnCode sequel - = case sequel of - -- can't update with an unboxed tuple! - UpdateCode -> panic "mkUnboxedTupleReturnCode" - - CaseAlts _ (Just ([(_,(alt_absC,join_lbl))], _)) False -> - absC (CJump (CLbl join_lbl CodePtrRep)) - - other -> -- OnStack, or (CaseAlts ret_amode something) - sequelToAmode sequel `thenFC` \ ret_amode -> - absC (CReturn ret_amode DirectReturn) - -- ----------------------------------------------------------------------------- -- Returning unboxed tuples. This is mainly to support _ccall_GC_, where -- we want to do things in a slightly different order to normal: @@ -494,44 +338,35 @@ mkUnboxedTupleReturnCode sequel -- (in order to avoid pushing it again), so we end up doing a needless -- indirect jump (ToDo). -ccallReturnUnboxedTuple :: [CAddrMode] -> Code -> Code +ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code ccallReturnUnboxedTuple amodes before_jump - = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - - -- push a return address if necessary - pushReturnAddress eob `thenC` - setEndOfBlockInfo (EndOfBlockInfo args_sp (OnStack args_sp)) ( - - -- Adjust Sp/Hp - adjustSpAndHp args_sp `thenC` + = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo - before_jump `thenC` - - returnUnboxedTuple amodes - ) + -- Push a return address if necessary + ; pushReturnAddress eob + ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack) + (do { adjustSpAndHp args_sp + ; before_jump + ; returnUnboxedTuple amodes }) + } -- ----------------------------------------------------------------------------- -- Calling an out-of-line primop tailCallPrimOp :: PrimOp -> [StgArg] -> Code -tailCallPrimOp op args = - -- we're going to perform a normal-looking tail call, - -- except that *all* the arguments will be in registers. - getArgAmodes args `thenFC` \ arg_amodes -> - let (arg_regs, leftovers) = assignAllRegs [] (map getAmodeRep arg_amodes) +tailCallPrimOp op args + = do { -- We're going to perform a normal-looking tail call, + -- except that *all* the arguments will be in registers. + -- Hence the ASSERT( null leftovers ) + arg_amodes <- getArgAmodes args + ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes + jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op) - reg_arg_assts - = mkAbstractCs (zipWithEqual "assign_to_reg2" - assign_to_reg arg_regs arg_amodes) + ; ASSERT(null leftovers) -- no stack-resident args + emitSimultaneously (assignToRegs arg_regs) - jump_to_primop = - absC (CJump (CLbl (mkRtsPrimOpLabel op) CodePtrRep)) - in - - ASSERT(null leftovers) -- no stack-resident args - - getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - doFinalJump args_sp reg_arg_assts False{-not a LNE-} (const jump_to_primop) + ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo + ; doFinalJump args_sp False{-not a LNE-} jump_to_primop } -- ----------------------------------------------------------------------------- -- Return Addresses @@ -551,23 +386,72 @@ tailCallPrimOp op args = pushReturnAddress :: EndOfBlockInfo -> Code -pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts amode _ False)) = - getSpRelOffset args_sp `thenFC` \ sp_rel -> - absC (CAssign (CVal sp_rel RetRep) amode) +pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False)) + = do { sp_rel <- getSpRelOffset args_sp + ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) } -- For a polymorphic case, we have two return addresses to push: the case -- return, and stg_seq_frame_info which turns a possible vectored return -- into a direct one. -pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts amode _ True)) = - getSpRelOffset (args_sp-1) `thenFC` \ sp_rel -> - absC (CAssign (CVal sp_rel RetRep) amode) `thenC` - getSpRelOffset args_sp `thenFC` \ sp_rel -> - absC (CAssign (CVal sp_rel RetRep) (CLbl mkSeqInfoLabel RetRep)) +pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True)) + = do { sp_rel <- getSpRelOffset (args_sp-1) + ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) + ; sp_rel <- getSpRelOffset args_sp + ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) } + pushReturnAddress _ = nopC -- ----------------------------------------------------------------------------- -- Misc. -assign_to_reg reg_id amode = CAssign (CReg reg_id) amode +jumpToLbl :: CLabel -> Code +-- Passes no argument to the destination procedure +jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}]) +assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts +assignToRegs reg_args + = mkStmts [ CmmAssign (CmmGlobal reg_id) expr + | (expr, reg_id) <- reg_args ] +\end{code} + + +%************************************************************************ +%* * +\subsection[CgStackery-adjust]{Adjusting the stack pointers} +%* * +%************************************************************************ + +This function adjusts the stack and heap pointers just before a tail +call or return. The stack pointer is adjusted to its final position +(i.e. to point to the last argument for a tail call, or the activation +record for a return). The heap pointer may be moved backwards, in +cases where we overallocated at the beginning of the basic block (see +CgCase.lhs for discussion). + +These functions {\em do not} deal with high-water-mark adjustment. +That's done by functions which allocate stack space. + +\begin{code} +adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr + -> Code +adjustSpAndHp newRealSp + = do { -- Adjust stack, if necessary. + -- NB: the conditional on the monad-carried realSp + -- is out of line (via codeOnly), to avoid a black hole + ; new_sp <- getSpRelOffset newRealSp + ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case + ; setRealSp newRealSp -- where realSp==newRealSp + + -- Adjust heap. The virtual heap pointer may be less than the real Hp + -- because the latter was advanced to deal with the worst-case branch + -- of the code, and we may be in a better-case branch. In that case, + -- move the real Hp *back* and retract some ticky allocation count. + ; hp_usg <- getHpUsage + ; let rHp = realHp hp_usg + vHp = virtHp hp_usg + ; new_hp <- getHpRelOffset vHp + ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp + ; tickyAllocHeap (vHp - rHp) -- ...ditto + ; setRealHp vHp + } \end{code} diff --git a/ghc/compiler/codeGen/CgTicky.hs b/ghc/compiler/codeGen/CgTicky.hs new file mode 100644 index 0000000000..19dbc43aac --- /dev/null +++ b/ghc/compiler/codeGen/CgTicky.hs @@ -0,0 +1,370 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for ticky-ticky profiling +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgTicky ( + emitTickyCounter, + + tickyDynAlloc, + tickyAllocHeap, + tickyAllocPrim, + tickyAllocThunk, + tickyAllocPAP, + + tickyPushUpdateFrame, + tickyUpdateFrameOmitted, + + tickyEnterDynCon, + tickyEnterStaticCon, + tickyEnterViaNode, + + tickyEnterFun, + tickyEnterThunk, + + tickyUpdateBhCaf, + tickyBlackHole, + tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyReturnOldCon, tickyReturnNewCon, + + tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, + tickyUnknownCall, tickySlowCallPat, + + staticTickyHdr, + ) where + +#include "HsVersions.h" +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import ClosureInfo ( ClosureInfo, closureSize, slopSize, closureSMRep, + closureUpdReqd, closureName, isStaticClosure ) +import CgUtils +import CgMonad +import SMRep ( ClosureType(..), smRepClosureType, CgRep ) + +import Cmm +import MachOp +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr, cmmIndexExpr ) +import CLabel ( CLabel, mkRtsDataLabel, mkRednCountsLabel ) + +import Name ( isInternalName ) +import Id ( Id, idType ) +import CmdLineOpts ( opt_DoTickyProfiling ) +import BasicTypes ( Arity ) +import FastString ( FastString, mkFastString, LitString ) +import Constants -- Lots of field offsets +import Outputable + +-- Turgid imports for showTypeCategory +import PrelNames +import TcType ( Type, isDictTy, tcSplitTyConApp_maybe, + tcSplitFunTy_maybe ) +import TyCon ( isPrimTyCon, isTupleTyCon, isEnumerationTyCon, + maybeTyConSingleCon ) +import Maybe + +----------------------------------------------------------------------------- +-- +-- Ticky-ticky profiling +-- +----------------------------------------------------------------------------- + +staticTickyHdr :: [CmmLit] +-- The ticky header words in a static closure +-- Was SET_STATIC_TICKY_HDR +staticTickyHdr + | not opt_DoTickyProfiling = [] + | otherwise = [zeroCLit] + +emitTickyCounter :: ClosureInfo -> [Id] -> Int -> Code +emitTickyCounter cl_info args on_stk + = ifTicky $ + do { mod_name <- moduleName + ; fun_descr_lit <- mkStringCLit (fun_descr mod_name) + ; arg_descr_lit <- mkStringCLit arg_descr + ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter + [ CmmInt 0 I16, + CmmInt (fromIntegral (length args)) I16, -- Arity + CmmInt (fromIntegral on_stk) I16, -- Words passed on stack + CmmInt 0 I16, -- 2-byte gap + fun_descr_lit, + arg_descr_lit, + zeroCLit, -- Entry count + zeroCLit, -- Allocs + zeroCLit -- Link + ] } + where + name = closureName cl_info + ticky_ctr_label = mkRednCountsLabel name + arg_descr = map (showTypeCategory . idType) args + fun_descr mod_name = ppr_for_ticky_name mod_name name + +-- When printing the name of a thing in a ticky file, we want to +-- give the module name even for *local* things. We print +-- just "x (M)" rather that "M.x" to distinguish them from the global kind. +ppr_for_ticky_name mod_name name + | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name))) + | otherwise = showSDocDebug (ppr name) + +-- ----------------------------------------------------------------------------- +-- Ticky stack frames + +tickyPushUpdateFrame = ifTicky $ bumpTickyCounter SLIT("UPDF_PUSHED_ctr") +tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter SLIT("UPDF_OMITTED_ctr") + +-- ----------------------------------------------------------------------------- +-- Ticky entries + +tickyEnterDynCon = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_CON_ctr") +tickyEnterDynThunk = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_THK_ctr") +tickyEnterStaticCon = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_CON_ctr") +tickyEnterStaticThunk = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_THK_ctr") +tickyEnterViaNode = ifTicky $ bumpTickyCounter SLIT("ENT_VIA_NODE_ctr") + +tickyEnterThunk :: ClosureInfo -> Code +tickyEnterThunk cl_info + | isStaticClosure cl_info = tickyEnterStaticThunk + | otherwise = tickyEnterDynThunk + +tickyBlackHole :: Bool{-updatable-} -> Code +tickyBlackHole updatable + = ifTicky (bumpTickyCounter ctr) + where + ctr | updatable = SLIT("UPD_BH_SINGLE_ENTRY_ctr") + | otherwise = SLIT("UPD_BH_UPDATABLE_ctr") + +tickyUpdateBhCaf cl_info + = ifTicky (bumpTickyCounter ctr) + where + ctr | closureUpdReqd cl_info = SLIT("UPD_CAF_BH_SINGLE_ENTRY_ctr") + | otherwise = SLIT("UPD_CAF_BH_UPDATABLE_ctr") + +tickyEnterFun :: ClosureInfo -> Code +tickyEnterFun cl_info + = ifTicky $ + do { bumpTickyCounter ctr + ; fun_ctr_lbl <- getTickyCtrLabel + ; registerTickyCtr fun_ctr_lbl + ; bumpTickyCounter' fun_ctr_lbl } + where + ctr | isStaticClosure cl_info = SLIT("TICK_ENT_STATIC_FUN_DIRECT") + | otherwise = SLIT("TICK_ENT_DYN_FUN_DIRECT") + +registerTickyCtr :: CLabel -> Code +-- Register a ticky counter +-- if ( ! f_ct.registeredp ) { +-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ +-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ +-- f_ct.registeredp = 1 } +registerTickyCtr ctr_lbl + = emitIf test (stmtsC register_stmts) + where + test = CmmMachOp (MO_Not I16) + [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) I16] + register_stmts + = [ CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) + (CmmLoad ticky_entry_ctrs wordRep) + , CmmStore ticky_entry_ctrs (mkLblExpr ctr_lbl) + , CmmStore (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) + (CmmLit (mkIntCLit 1)) ] + ticky_entry_ctrs = mkLblExpr (mkRtsDataLabel SLIT("ticky_entry_ctrs")) + +tickyReturnOldCon, tickyReturnNewCon :: Arity -> Code +tickyReturnOldCon arity + = ifTicky $ do { bumpTickyCounter SLIT("RET_OLD_ctr") + ; bumpHistogram SLIT("RET_OLD_hst") arity } +tickyReturnNewCon arity + | not opt_DoTickyProfiling = nopC + | otherwise + = ifTicky $ do { bumpTickyCounter SLIT("RET_NEW_ctr") + ; bumpHistogram SLIT("RET_NEW_hst") arity } + +tickyUnboxedTupleReturn :: Int -> Code +tickyUnboxedTupleReturn arity + = ifTicky $ do { bumpTickyCounter SLIT("RET_UNBOXED_TUP_ctr") + ; bumpHistogram SLIT("RET_UNBOXED_TUP_hst") arity } + +tickyVectoredReturn :: Int -> Code +tickyVectoredReturn family_size + = ifTicky $ do { bumpTickyCounter SLIT("VEC_RETURN_ctr") + ; bumpHistogram SLIT("RET_VEC_RETURN_hst") family_size } + +-- ----------------------------------------------------------------------------- +-- Ticky calls + +-- Ticks at a *call site*: +tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_TOO_FEW_ARGS_ctr") +tickyKnownCallExact = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_ctr") +tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_EXTRA_ctr") +tickyUnknownCall = ifTicky $ bumpTickyCounter SLIT("UNKNOWN_CALL_ctr") + +-- Tick for the call pattern at slow call site (i.e. in addition to +-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.) +tickySlowCallPat :: [CgRep] -> Code +tickySlowCallPat args = return () +{- LATER: (introduces recursive module dependency now). + case callPattern args of + (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat) + (str, False) -> bumpTickyCounter SLIT("TICK_SLOW_CALL_OTHER") + +callPattern :: [CgRep] -> (String,Bool) +callPattern reps + | match == length reps = (chars, True) + | otherwise = (chars, False) + where (_,match) = findMatch reps + chars = map argChar reps + +argChar VoidArg = 'v' +argChar PtrArg = 'p' +argChar NonPtrArg = 'n' +argChar LongArg = 'l' +argChar FloatArg = 'f' +argChar DoubleArg = 'd' +-} + +-- ----------------------------------------------------------------------------- +-- Ticky allocation + +tickyDynAlloc :: ClosureInfo -> Code +-- Called when doing a dynamic heap allocation +tickyDynAlloc cl_info + = ifTicky $ + case smRepClosureType (closureSMRep cl_info) of + Constr -> tick_alloc_con + ConstrNoCaf -> tick_alloc_con + Fun -> tick_alloc_fun + Thunk -> tick_alloc_thk + ThunkSelector -> tick_alloc_thk + where + -- will be needed when we fill in stubs + cl_size = closureSize cl_info + slop_size = slopSize cl_info + + tick_alloc_thk + | closureUpdReqd cl_info = tick_alloc_up_thk + | otherwise = tick_alloc_se_thk + + tick_alloc_con = panic "ToDo: tick_alloc" + tick_alloc_fun = panic "ToDo: tick_alloc" + tick_alloc_up_thk = panic "ToDo: tick_alloc" + tick_alloc_se_thk = panic "ToDo: tick_alloc" + +tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> Code +tickyAllocPrim hdr goods slop = ifTicky $ panic "ToDo: tickyAllocPrim" + +tickyAllocThunk :: CmmExpr -> CmmExpr -> Code +tickyAllocThunk goods slop = ifTicky $ panic "ToDo: tickyAllocThunk" + +tickyAllocPAP :: CmmExpr -> CmmExpr -> Code +tickyAllocPAP goods slop = ifTicky $ panic "ToDo: tickyAllocPAP" + +tickyAllocHeap :: VirtualHpOffset -> Code +-- Called when doing a heap check [TICK_ALLOC_HEAP] +tickyAllocHeap hp + = ifTicky $ + do { ticky_ctr <- getTickyCtrLabel + ; stmtsC $ + if hp == 0 then [] -- Inside the stmtC to avoid control + else [ -- dependency on the argument + -- Bump the allcoation count in the StgEntCounter + addToMem REP_StgEntCounter_allocs + (CmmLit (cmmLabelOffB ticky_ctr + oFFSET_StgEntCounter_allocs)) hp, + -- Bump ALLOC_HEAP_ctr + addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_ctr")) 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_tot")) hp] } + +-- ----------------------------------------------------------------------------- +-- Ticky utils + +ifTicky :: Code -> Code +ifTicky code + | opt_DoTickyProfiling = code + | otherwise = nopC + +addToMemLbl :: MachRep -> CLabel -> Int -> CmmStmt +addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n + +-- All the ticky-ticky counters are declared "unsigned long" in C +bumpTickyCounter :: LitString -> Code +bumpTickyCounter lbl = bumpTickyCounter' (mkRtsDataLabel lbl) + +bumpTickyCounter' :: CLabel -> Code +bumpTickyCounter' lbl = stmtC (addToMemLbl cLongRep lbl 1) + +addToMemLong = addToMem cLongRep + +bumpHistogram :: LitString -> Int -> Code +bumpHistogram lbl n + = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongRep)) + +bumpHistogramE :: LitString -> CmmExpr -> Code +bumpHistogramE lbl n + = do t <- newTemp cLongRep + stmtC (CmmAssign t n) + emitIf (CmmMachOp (MO_U_Le cLongRep) [CmmReg t, eight]) $ + stmtC (CmmAssign t eight) + stmtC (addToMemLong (cmmIndexExpr cLongRep + (CmmLit (CmmLabel (mkRtsDataLabel lbl))) + (CmmReg t)) + 1) + where + eight = CmmLit (CmmInt 8 cLongRep) + +------------------------------------------------------------------ +-- Showing the "type category" for ticky-ticky profiling + +showTypeCategory :: Type -> Char + {- {C,I,F,D} char, int, float, double + T tuple + S other single-constructor type + {c,i,f,d} unboxed ditto + t *unpacked* tuple + s *unpacked" single-cons... + + v void# + a primitive array + + E enumeration type + + dictionary, unless it's a ... + L List + > function + M other (multi-constructor) data-con type + . other type + - reserved for others to mark as "uninteresting" + -} +showTypeCategory ty + = if isDictTy ty + then '+' + else + case tcSplitTyConApp_maybe ty of + Nothing -> if isJust (tcSplitFunTy_maybe ty) + then '>' + else '.' + + Just (tycon, _) -> + let utc = getUnique tycon in + if utc == charDataConKey then 'C' + else if utc == intDataConKey then 'I' + else if utc == floatDataConKey then 'F' + else if utc == doubleDataConKey then 'D' + else if utc == smallIntegerDataConKey || + utc == largeIntegerDataConKey then 'J' + else if utc == charPrimTyConKey then 'c' + else if (utc == intPrimTyConKey || utc == wordPrimTyConKey + || utc == addrPrimTyConKey) then 'i' + else if utc == floatPrimTyConKey then 'f' + else if utc == doublePrimTyConKey then 'd' + else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus + else if isEnumerationTyCon tycon then 'E' + else if isTupleTyCon tycon then 'T' + else if isJust (maybeTyConSingleCon tycon) then 'S' + else if utc == listTyConKey then 'L' + else 'M' -- oh, well... diff --git a/ghc/compiler/codeGen/CgUpdate.lhs b/ghc/compiler/codeGen/CgUpdate.lhs deleted file mode 100644 index 879dafe4f6..0000000000 --- a/ghc/compiler/codeGen/CgUpdate.lhs +++ /dev/null @@ -1,61 +0,0 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 -% -\section[CgUpdate]{Manipulating update frames} - -\begin{code} -module CgUpdate ( pushUpdateFrame ) where - -#include "HsVersions.h" - -import CgMonad -import AbsCSyn - -import CgStackery ( allocStackTop, updateFrameSize, setStackFrame ) -import CgUsages ( getVirtSp ) -import Panic ( assertPanic ) -\end{code} - - -%******************************************************** -%* * -%* Setting up update frames * -%* * -%******************************************************** -\subsection[setting-update-frames]{Setting up update frames} - -@pushUpdateFrame@ $updatee$ pushes a general update frame which -points to $updatee$ as the thing to be updated. It is only used -when a thunk has just been entered, so the (real) stack pointers -are guaranteed to be nicely aligned with the top of stack. -@pushUpdateFrame@ adjusts the virtual and tail stack pointers -to reflect the frame pushed. - -\begin{code} -pushUpdateFrame :: CAddrMode -> Code -> Code - -pushUpdateFrame updatee code - = -#ifdef DEBUG - getEndOfBlockInfo `thenFC` \ eob_info -> - ASSERT(case eob_info of { EndOfBlockInfo _ (OnStack _) -> True; - _ -> False}) -#endif - - allocStackTop updateFrameSize `thenFC` \ _ -> - getVirtSp `thenFC` \ vsp -> - - setStackFrame vsp `thenC` - - setEndOfBlockInfo (EndOfBlockInfo vsp UpdateCode) ( - - -- Emit the push macro - absC (CMacroStmt PUSH_UPD_FRAME [ - updatee, - int_CLit0 -- we just entered a closure, so must be zero - ]) - `thenC` code - ) - -int_CLit0 = mkIntCLit 0 -- out here to avoid pushUpdateFrame CAF (sigh) -\end{code} diff --git a/ghc/compiler/codeGen/CgUsages.lhs b/ghc/compiler/codeGen/CgUsages.lhs deleted file mode 100644 index c8b98f696d..0000000000 --- a/ghc/compiler/codeGen/CgUsages.lhs +++ /dev/null @@ -1,170 +0,0 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 -% -\section[CgUsages]{Accessing and modifying stacks and heap usage info} - -This module provides the functions to access (\tr{get*} functions) and -modify (\tr{set*} functions) the stacks and heap usage information. - -\begin{code} -module CgUsages ( - initHeapUsage, setVirtHp, getVirtAndRealHp, setRealHp, - setRealAndVirtualSp, - - getVirtSp, getRealSp, - - getHpRelOffset, getSpRelOffset, - - adjustSpAndHp - ) where - -#include "HsVersions.h" - -import AbsCSyn -import PrimRep ( PrimRep(..) ) -import AbsCUtils ( mkAbstractCs ) -import CgMonad -\end{code} - -%************************************************************************ -%* * -\subsection[CgUsages-heapery]{Monad things for fiddling with heap usage} -%* * -%************************************************************************ - -@initHeapUsage@ applies a function to the amount of heap that it uses. -It initialises the heap usage to zeros, and passes on an unchanged -heap usage. - -It is usually a prelude to performing a GC check, so everything must -be in a tidy and consistent state. - -rje: Note the slightly suble fixed point behaviour needed here -\begin{code} -initHeapUsage :: (VirtualHeapOffset -> Code) -> Code - -initHeapUsage fcode = do - (stk_usage, heap_usage) <- getUsage - setUsage (stk_usage, (0,0)) - fixC (\heap_usage2 -> do - fcode (heapHWM heap_usage2) - (_, heap_usage2) <- getUsage - return heap_usage2) - (stk_usage2, heap_usage2) <- getUsage - setUsage (stk_usage2, heap_usage {-unchanged -}) -\end{code} - -\begin{code} -setVirtHp :: VirtualHeapOffset -> Code -setVirtHp new_virtHp = do - (stk, (virtHp, realHp)) <- getUsage - setUsage (stk, (new_virtHp, realHp)) -\end{code} - -\begin{code} -getVirtAndRealHp :: FCode (VirtualHeapOffset, VirtualHeapOffset) -getVirtAndRealHp = do - (_, (virtHp, realHp)) <- getUsage - return (virtHp, realHp) -\end{code} - -\begin{code} -setRealHp :: VirtualHeapOffset -> Code -setRealHp realHp = do - (stk_usage, (vHp, _)) <- getUsage - setUsage (stk_usage, (vHp, realHp)) -\end{code} - -\begin{code} -getHpRelOffset :: VirtualHeapOffset -> FCode RegRelative -getHpRelOffset virtual_offset = do - (_,(_,realHp)) <- getUsage - return $ hpRel realHp virtual_offset -\end{code} - -The heap high water mark is the larger of virtHp and hwHp. The latter is -only records the high water marks of forked-off branches, so to find the -heap high water mark you have to take the max of virtHp and hwHp. Remember, -virtHp never retreats! - -\begin{code} -heapHWM (virtHp, realHp) = virtHp -\end{code} - -%************************************************************************ -%* * -\subsection[CgUsages-stackery]{Monad things for fiddling with stack usage} -%* * -%************************************************************************ - -@setRealAndVirtualSp@ sets into the environment the offsets of the -current position of the real and virtual stack pointers in the current -stack frame. The high-water mark is set too. It generates no code. -It is used to initialise things at the beginning of a closure body. - -\begin{code} -setRealAndVirtualSp :: VirtualSpOffset -- New real Sp - -> Code - -setRealAndVirtualSp sp = do - ((vsp,frame,f,realSp,hwsp), h_usage) <- getUsage - let new_usage = ((sp, frame, f, sp, sp), h_usage) - setUsage new_usage -\end{code} - -\begin{code} -getVirtSp :: FCode VirtualSpOffset -getVirtSp = do - ((virtSp,_,_,_,_), _) <- getUsage - return virtSp - -getRealSp :: FCode VirtualSpOffset -getRealSp = do - ((_,_,_,realSp,_),_) <- getUsage - return realSp -\end{code} - -\begin{code} -getSpRelOffset :: VirtualSpOffset -> FCode RegRelative -getSpRelOffset virtual_offset = do - ((_,_,_,realSp,_),_) <- getUsage - return $ spRel realSp virtual_offset -\end{code} - -%************************************************************************ -%* * -\subsection[CgStackery-adjust]{Adjusting the stack pointers} -%* * -%************************************************************************ - -This function adjusts the stack and heap pointers just before a tail -call or return. The stack pointer is adjusted to its final position -(i.e. to point to the last argument for a tail call, or the activation -record for a return). The heap pointer may be moved backwards, in -cases where we overallocated at the beginning of the basic block (see -CgCase.lhs for discussion). - -These functions {\em do not} deal with high-water-mark adjustment. -That's done by functions which allocate stack space. - -\begin{code} -adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr - -> Code -adjustSpAndHp newRealSp = do - (MkCgInfoDown _ _ _ ticky_ctr _) <- getInfoDown - (MkCgState absC binds - ((vSp,frame,fSp,realSp,hwSp), - (vHp, rHp))) <- getState - let move_sp = if (newRealSp == realSp) then AbsCNop - else (CAssign (CReg Sp) - (CAddr (spRel realSp newRealSp))) - let move_hp = - if (rHp == vHp) then AbsCNop - else mkAbstractCs [ - CAssign (CReg Hp) (CAddr (hpRel rHp vHp)), - profCtrAbsC FSLIT("TICK_ALLOC_HEAP") - [ mkIntCLit (vHp - rHp), CLbl ticky_ctr DataPtrRep ] - ] - let new_usage = ((vSp, frame, fSp, newRealSp, hwSp), (vHp,vHp)) - setState $ MkCgState (mkAbstractCs [absC,move_sp,move_hp]) binds new_usage -\end{code} diff --git a/ghc/compiler/codeGen/CgUtils.hs b/ghc/compiler/codeGen/CgUtils.hs new file mode 100644 index 0000000000..e74bd14d1e --- /dev/null +++ b/ghc/compiler/codeGen/CgUtils.hs @@ -0,0 +1,622 @@ +----------------------------------------------------------------------------- +-- +-- Code generator utilities; mostly monadic +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgUtils ( + addIdReps, + cgLit, + emitDataLits, emitRODataLits, emitIf, emitIfThenElse, + emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, + assignTemp, newTemp, + emitSimultaneously, + emitSwitch, emitLitSwitch, + tagToClosure, + + cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmOffsetExprW, cmmOffsetExprB, + cmmRegOffW, cmmRegOffB, + cmmLabelOffW, cmmLabelOffB, + cmmOffsetW, cmmOffsetB, + cmmOffsetLitW, cmmOffsetLitB, + cmmLoadIndexW, + + addToMem, addToMemE, + mkWordCLit, + mkStringCLit, + packHalfWordsCLit, + blankWord + ) where + +#include "HsVersions.h" + +import CgMonad +import TyCon ( TyCon, tyConName ) +import Id ( Id ) +import Constants ( wORD_SIZE ) +import SMRep ( CgRep, StgWord, hALF_WORD_SIZE_IN_BITS, ByteOff, + WordOff, idCgRep ) +import PprCmm ( {- instances -} ) +import Cmm +import CLabel +import CmmUtils +import MachOp ( MachRep(..), wordRep, MachOp(..), MachHint(..), + mo_wordOr, mo_wordAnd, mo_wordNe, mo_wordEq, + mo_wordULt, machRepByteWidth ) +import ForeignCall ( CCallConv(..) ) +import Literal ( Literal(..) ) +import CLabel ( CLabel, mkAsmTempLabel ) +import Digraph ( SCC(..), stronglyConnComp ) +import ListSetOps ( assocDefault ) +import Util ( filterOut, sortLt ) +import Char ( ord ) +import FastString ( LitString, FastString, unpackFS ) +import Outputable + +import DATA_BITS + +#include "../includes/ghcconfig.h" + -- For WORDS_BIGENDIAN + +------------------------------------------------------------------------- +-- +-- Random small functions +-- +------------------------------------------------------------------------- + +addIdReps :: [Id] -> [(CgRep, Id)] +addIdReps ids = [(idCgRep id, id) | id <- ids] + +------------------------------------------------------------------------- +-- +-- Literals +-- +------------------------------------------------------------------------- + +cgLit :: Literal -> FCode CmmLit +cgLit (MachStr s) = mkStringCLit (unpackFS s) +cgLit other_lit = return (mkSimpleLit other_lit) + +mkSimpleLit :: Literal -> CmmLit +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordRep +mkSimpleLit MachNullAddr = zeroCLit +mkSimpleLit (MachInt i) = CmmInt i wordRep +mkSimpleLit (MachInt64 i) = CmmInt i I64 +mkSimpleLit (MachWord i) = CmmInt i wordRep +mkSimpleLit (MachWord64 i) = CmmInt i I64 +mkSimpleLit (MachFloat r) = CmmFloat r F32 +mkSimpleLit (MachDouble r) = CmmFloat r F64 +mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) + where + is_dyn = False -- ToDo: fix me + +mkLtOp :: Literal -> MachOp +-- On signed literals we must do a signed comparison +mkLtOp (MachInt _) = MO_S_Lt wordRep +mkLtOp (MachFloat _) = MO_S_Lt F32 +mkLtOp (MachDouble _) = MO_S_Lt F64 +mkLtOp lit = MO_U_Lt (cmmLitRep (mkSimpleLit lit)) + + +--------------------------------------------------- +-- +-- Cmm data type functions +-- +--------------------------------------------------- + +----------------------- +-- The "B" variants take byte offsets +cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr +cmmRegOffB = cmmRegOff + +cmmOffsetB :: CmmExpr -> ByteOff -> CmmExpr +cmmOffsetB = cmmOffset + +cmmOffsetExprB :: CmmExpr -> CmmExpr -> CmmExpr +cmmOffsetExprB = cmmOffsetExpr + +cmmLabelOffB :: CLabel -> ByteOff -> CmmLit +cmmLabelOffB = cmmLabelOff + +cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit +cmmOffsetLitB = cmmOffsetLit + +----------------------- +-- The "W" variants take word offsets +cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr +-- The second arg is a *word* offset; need to change it to bytes +cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) +cmmOffsetExprW e wd_off = cmmIndexExpr wordRep e wd_off + +cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr +cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) + +cmmRegOffW :: CmmReg -> WordOff -> CmmExpr +cmmRegOffW reg wd_off = cmmRegOffB reg (wd_off * wORD_SIZE) + +cmmOffsetLitW :: CmmLit -> WordOff -> CmmLit +cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) + +cmmLabelOffW :: CLabel -> WordOff -> CmmLit +cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) + +cmmLoadIndexW :: CmmExpr -> Int -> CmmExpr +cmmLoadIndexW base off + = CmmLoad (cmmOffsetW base off) wordRep + +----------------------- +cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord :: CmmExpr -> CmmExpr -> CmmExpr +cmmOrWord e1 e2 = CmmMachOp mo_wordOr [e1, e2] +cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2] +cmmNeWord e1 e2 = CmmMachOp mo_wordNe [e1, e2] +cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] +cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] + +cmmNegate :: CmmExpr -> CmmExpr +cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] + +blankWord :: CmmStatic +blankWord = CmmUninitialised wORD_SIZE + +----------------------- +-- Making literals + +mkWordCLit :: StgWord -> CmmLit +mkWordCLit wd = CmmInt (fromIntegral wd) wordRep + +packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit +-- Make a single word literal in which the lower_half_word is +-- at the lower address, and the upper_half_word is at the +-- higher address +-- ToDo: consider using half-word lits instead +-- but be careful: that's vulnerable when reversed +packHalfWordsCLit lower_half_word upper_half_word +#ifdef WORDS_BIGENDIAN + = mkWordCLit ((fromIntegral lower_half_word `shiftL` hALF_WORD_SIZE_IN_BITS) + .|. fromIntegral upper_half_word) +#else + = mkWordCLit ((fromIntegral lower_half_word) + .|. (fromIntegral upper_half_word `shiftL` hALF_WORD_SIZE_IN_BITS)) +#endif + +-------------------------------------------------------------------------- +-- +-- Incrementing a memory location +-- +-------------------------------------------------------------------------- + +addToMem :: MachRep -- rep of the counter + -> CmmExpr -- Address + -> Int -- What to add (a word) + -> CmmStmt +addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) rep)) + +addToMemE :: MachRep -- rep of the counter + -> CmmExpr -- Address + -> CmmExpr -- What to add (a word-typed expression) + -> CmmStmt +addToMemE rep ptr n + = CmmStore ptr (CmmMachOp (MO_Add rep) [CmmLoad ptr rep, n]) + +------------------------------------------------------------------------- +-- +-- Converting a closure tag to a closure for enumeration types +-- (this is the implementation of tagToEnum#). +-- +------------------------------------------------------------------------- + +tagToClosure :: TyCon -> CmmExpr -> CmmExpr +tagToClosure tycon tag + = CmmLoad (cmmOffsetExprW closure_tbl tag) wordRep + where closure_tbl = CmmLit (CmmLabel (mkClosureTblLabel (tyConName tycon))) + +------------------------------------------------------------------------- +-- +-- Conditionals and rts calls +-- +------------------------------------------------------------------------- + +emitIf :: CmmExpr -- Boolean + -> Code -- Then part + -> Code +-- Emit (if e then x) +-- ToDo: reverse the condition to avoid the extra branch instruction if possible +-- (some conditionals aren't reversible. eg. floating point comparisons cannot +-- be inverted because there exist some values for which both comparisons +-- return False, such as NaN.) +emitIf cond then_part + = do { then_id <- newLabelC + ; join_id <- newLabelC + ; stmtC (CmmCondBranch cond then_id) + ; stmtC (CmmBranch join_id) + ; labelC then_id + ; then_part + ; labelC join_id + } + +emitIfThenElse :: CmmExpr -- Boolean + -> Code -- Then part + -> Code -- Else part + -> Code +-- Emit (if e then x else y) +emitIfThenElse cond then_part else_part + = do { then_id <- newLabelC + ; else_id <- newLabelC + ; join_id <- newLabelC + ; stmtC (CmmCondBranch cond then_id) + ; else_part + ; stmtC (CmmBranch join_id) + ; labelC then_id + ; then_part + ; labelC join_id + } + +emitRtsCall :: LitString -> [(CmmExpr,MachHint)] -> Code +emitRtsCall fun args = emitRtsCall' [] fun args Nothing + -- The 'Nothing' says "save all global registers" + +emitRtsCallWithVols :: LitString -> [(CmmExpr,MachHint)] -> [GlobalReg] -> Code +emitRtsCallWithVols fun args vols + = emitRtsCall' [] fun args (Just vols) + +emitRtsCallWithResult :: CmmReg -> MachHint -> LitString + -> [(CmmExpr,MachHint)] -> Code +emitRtsCallWithResult res hint fun args + = emitRtsCall' [(res,hint)] fun args Nothing + +-- Make a call to an RTS C procedure +emitRtsCall' + :: [(CmmReg,MachHint)] + -> LitString + -> [(CmmExpr,MachHint)] + -> Maybe [GlobalReg] + -> Code +emitRtsCall' res fun args vols = stmtC (CmmCall target res args vols) + where + target = CmmForeignCall fun_expr CCallConv + fun_expr = mkLblExpr (mkRtsCodeLabel fun) + + +------------------------------------------------------------------------- +-- +-- Strings gnerate a top-level data block +-- +------------------------------------------------------------------------- + +emitDataLits :: CLabel -> [CmmLit] -> Code +-- Emit a data-segment data block +emitDataLits lbl lits + = emitData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +emitRODataLits :: CLabel -> [CmmLit] -> Code +-- Emit a read-only data block +emitRODataLits lbl lits + = emitData ReadOnlyData (CmmDataLabel lbl : map CmmStaticLit lits) + +mkStringCLit :: String -> FCode CmmLit +-- Make a global definition for the string, +-- and return its label +mkStringCLit str + = do { uniq <- newUnique + ; let lbl = mkAsmTempLabel uniq + ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString str] + ; return (CmmLabel lbl) } + +------------------------------------------------------------------------- +-- +-- Assigning expressions to temporaries +-- +------------------------------------------------------------------------- + +assignTemp :: CmmExpr -> FCode CmmExpr +-- For a non-trivial expression, e, create a local +-- variable and assign the expression to it +assignTemp e + | isTrivialCmmExpr e = return e + | otherwise = do { reg <- newTemp (cmmExprRep e) + ; stmtC (CmmAssign reg e) + ; return (CmmReg reg) } + + +newTemp :: MachRep -> FCode CmmReg +newTemp rep = do { uniq <- newUnique; return (CmmLocal (LocalReg uniq rep)) } + + +------------------------------------------------------------------------- +-- +-- Building case analysis +-- +------------------------------------------------------------------------- + +emitSwitch + :: CmmExpr -- Tag to switch on + -> [(ConTagZ, CgStmts)] -- Tagged branches + -> Maybe CgStmts -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> Code + +-- ONLY A DEFAULT BRANCH: no case analysis to do +emitSwitch tag_expr [] (Just stmts) _ _ + = emitCgStmts stmts + +-- Right, off we go +emitSwitch tag_expr branches mb_deflt lo_tag hi_tag + = -- Just sort the branches before calling mk_sritch + do { mb_deflt_id <- + case mb_deflt of + Nothing -> return Nothing + Just stmts -> do id <- forkCgStmts stmts; return (Just id) + + ; stmts <- mk_switch tag_expr (sortLt lt branches) + mb_deflt_id lo_tag hi_tag + ; emitCgStmts stmts + } + where + (t1,_) `lt` (t2,_) = t1 < t2 + + +mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)] + -> Maybe BlockId -> ConTagZ -> ConTagZ + -> FCode CgStmts + +-- SINGLETON TAG RANGE: no case analysis to do +mk_switch tag_expr [(tag,stmts)] _ lo_tag hi_tag + | lo_tag == hi_tag + = ASSERT( tag == lo_tag ) + return stmts + +-- SINGLETON BRANCH, NO DEFUALT: no case analysis to do +mk_switch tag_expr [(tag,stmts)] Nothing lo_tag hi_tag + = return stmts + -- The simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation we can be sure the (:) case + -- can't happen, so no need to test + +-- SINGLETON BRANCH: one equality check to do +mk_switch tag_expr [(tag,stmts)] (Just deflt) lo_tag hi_tag + = return (CmmCondBranch cond deflt `consCgStmt` stmts) + where + cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag)) + -- We have lo_tag < hi_tag, but there's only one branch, + -- so there must be a default + +-- ToDo: we might want to check for the two branch case, where one of +-- the branches is the tag 0, because comparing '== 0' is likely to be +-- more efficient than other kinds of comparison. + +-- DENSE TAG RANGE: use a switch statment +mk_switch tag_expr branches mb_deflt lo_tag hi_tag + | use_switch -- Use a switch + = do { deflt_id <- get_deflt_id mb_deflt + ; branch_ids <- mapM forkCgStmts (map snd branches) + ; let + tagged_blk_ids = zip (map fst branches) branch_ids + + find_branch :: BlockId -> ConTagZ -> BlockId + find_branch deflt_id i = assocDefault deflt_id tagged_blk_ids i + + arms = [ Just (find_branch deflt_id (i+lo_tag)) + | i <- [0..n_tags-1]] + + switch_stmt = CmmSwitch (cmmOffset tag_expr (- lo_tag)) arms + + ; return (oneCgStmt switch_stmt) + } + + | otherwise -- Use an if-tree + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr + -- To avoid duplication + ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt lo_tag (mid_tag-1) + ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt mid_tag hi_tag + ; lo_id <- forkCgStmts lo_stmts + ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit mid_tag)) + branch_stmt = CmmCondBranch cond lo_id + ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` hi_stmts)) + } + where + use_switch = ASSERT( n_branches > 1 && n_tags > 1 ) + n_tags > 2 && (small || dense) + -- a 2-branch switch always turns into an if. + small = n_tags <= 4 + dense = n_branches > (n_tags `div` 2) + exhaustive = n_tags == n_branches + n_tags = hi_tag - lo_tag + 1 + n_branches = length branches + + -- INVARIANT: Provided hi_tag > lo_tag (which is true) + -- lo_tag <= mid_tag < hi_tag + -- lo_branches have tags < mid_tag + -- hi_branches have tags >= mid_tag + + (mid_tag,_) = branches !! (n_branches `div` 2) + -- 2 branches => n_branches `div` 2 = 1 + -- => branches !! 1 give the *second* tag + -- There are always at least 2 branches here + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_tag + + -- Add a default block if the case is not exhaustive + get_deflt_id (Just deflt_id) = return deflt_id + get_deflt_id Nothing + | exhaustive + = return (pprPanic "mk_deflt_blks" (ppr tag_expr)) + | otherwise + = do { stmts <- getCgStmts (stmtC jump_to_impossible) + ; id <- forkCgStmts stmts + ; return id } + + jump_to_impossible + = CmmJump (mkLblExpr mkErrorStdEntryLabel) [] + + +assignTemp' e + | isTrivialCmmExpr e = return (CmmNop, e) + | otherwise = do { reg <- newTemp (cmmExprRep e) + ; return (CmmAssign reg e, CmmReg reg) } + + +emitLitSwitch :: CmmExpr -- Tag to switch on + -> [(Literal, CgStmts)] -- Tagged branches + -> CgStmts -- Default branch (always) + -> Code -- Emit the code +-- Used for general literals, whose size might not be a word, +-- where there is always a default case, and where we don't know +-- the range of values for certain. For simplicity we always generate a tree. +emitLitSwitch scrut [] deflt + = emitCgStmts deflt +emitLitSwitch scrut branches deflt_blk + = do { scrut' <- assignTemp scrut + ; deflt_blk_id <- forkCgStmts deflt_blk + ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLt lt branches) + ; emitCgStmts blk } + where + lt (t1,_) (t2,_) = t1 < t2 + +mk_lit_switch :: CmmExpr -> BlockId + -> [(Literal,CgStmts)] + -> FCode CgStmts +mk_lit_switch scrut deflt_blk_id [(lit,blk)] + = return (consCgStmt if_stmt blk) + where + cmm_lit = mkSimpleLit lit + rep = cmmLitRep cmm_lit + cond = CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit] + if_stmt = CmmCondBranch cond deflt_blk_id + +mk_lit_switch scrut deflt_blk_id branches + = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches + ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches + ; lo_blk_id <- forkCgStmts lo_blk + ; let if_stmt = CmmCondBranch cond lo_blk_id + ; return (if_stmt `consCgStmt` hi_blk) } + where + n_branches = length branches + (mid_lit,_) = branches !! (n_branches `div` 2) + -- See notes above re mid_tag + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_lit + + cond = CmmMachOp (mkLtOp mid_lit) + [scrut, CmmLit (mkSimpleLit mid_lit)] + +------------------------------------------------------------------------- +-- +-- Simultaneous assignment +-- +------------------------------------------------------------------------- + + +emitSimultaneously :: CmmStmts -> Code +-- Emit code to perform the assignments in the +-- input simultaneously, using temporary variables when necessary. +-- +-- The Stmts must be: +-- CmmNop, CmmComment, CmmAssign, CmmStore +-- and nothing else + + +-- We use the strongly-connected component algorithm, in which +-- * the vertices are the statements +-- * an edge goes from s1 to s2 iff +-- s1 assigns to something s2 uses +-- that is, if s1 should *follow* s2 in the final order + +type CVertex = (Int, CmmStmt) -- Give each vertex a unique number, + -- for fast comparison + +emitSimultaneously stmts + = codeOnly $ + case filterOut isNopStmt (stmtList stmts) of + -- Remove no-ops + [] -> nopC + [stmt] -> stmtC stmt -- It's often just one stmt + stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list) + +doSimultaneously1 :: [CVertex] -> Code +doSimultaneously1 vertices + = let + edges = [ (vertex, key1, edges_from stmt1) + | vertex@(key1, stmt1) <- vertices + ] + edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices, + stmt1 `mustFollow` stmt2 + ] + components = stronglyConnComp edges + + -- do_components deal with one strongly-connected component + -- Not cyclic, or singleton? Just do it + do_component (AcyclicSCC (n,stmt)) = stmtC stmt + do_component (CyclicSCC [(n,stmt)]) = stmtC stmt + + -- Cyclic? Then go via temporaries. Pick one to + -- break the loop and try again with the rest. + do_component (CyclicSCC ((n,first_stmt) : rest)) + = do { from_temp <- go_via_temp first_stmt + ; doSimultaneously1 rest + ; stmtC from_temp } + + go_via_temp (CmmAssign dest src) + = do { tmp <- newTemp (cmmRegRep dest) + ; stmtC (CmmAssign tmp src) + ; return (CmmAssign dest (CmmReg tmp)) } + go_via_temp (CmmStore dest src) + = do { tmp <- newTemp (cmmExprRep src) + ; stmtC (CmmAssign tmp src) + ; return (CmmStore dest (CmmReg tmp)) } + in + mapCs do_component components + +mustFollow :: CmmStmt -> CmmStmt -> Bool +CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt +CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprRep e)) stmt +CmmNop `mustFollow` stmt = False +CmmComment _ `mustFollow` stmt = False + + +anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool +-- True if the fn is true of any input of the stmt +anySrc p (CmmAssign _ e) = p e +anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side +anySrc p (CmmComment _) = False +anySrc p CmmNop = False +anySrc p other = True -- Conservative + +regUsedIn :: CmmReg -> CmmExpr -> Bool +reg `regUsedIn` CmmLit _ = False +reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e +reg `regUsedIn` CmmReg reg' = reg == reg' +reg `regUsedIn` CmmRegOff reg' _ = reg == reg' +reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es + +locUsedIn :: CmmExpr -> MachRep -> CmmExpr -> Bool +-- (locUsedIn a r e) checks whether writing to r[a] could affect the value of +-- 'e'. Returns True if it's not sure. +locUsedIn loc rep (CmmLit _) = False +locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep +locUsedIn loc rep (CmmReg reg') = False +locUsedIn loc rep (CmmRegOff reg' _) = False +locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es + +possiblySameLoc :: CmmExpr -> MachRep -> CmmExpr -> MachRep -> Bool +-- Assumes that distinct registers (eg Hp, Sp) do not +-- point to the same location, nor any offset thereof. +possiblySameLoc (CmmReg r1) rep1 (CmmReg r2) rep2 = r1==r2 +possiblySameLoc (CmmReg r1) rep1 (CmmRegOff r2 0) rep2 = r1==r2 +possiblySameLoc (CmmRegOff r1 0) rep1 (CmmReg r2) rep2 = r1==r2 +possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2 + = r1==r2 && end1 > start2 && end2 > start1 + where + end1 = start1 + machRepByteWidth rep1 + end2 = start2 + machRepByteWidth rep2 + +possiblySameLoc l1 rep1 (CmmLit _) rep2 = False +possiblySameLoc l1 rep1 l2 rep2 = True -- Conservative diff --git a/ghc/compiler/codeGen/ClosureInfo.lhs b/ghc/compiler/codeGen/ClosureInfo.lhs index 86380ecaa6..0abf831c51 100644 --- a/ghc/compiler/codeGen/ClosureInfo.lhs +++ b/ghc/compiler/codeGen/ClosureInfo.lhs @@ -1,9 +1,11 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% (c) The Univserity of Glasgow 1992-2004 % -% $Id: ClosureInfo.lhs,v 1.62 2004/03/31 15:23:17 simonmar Exp $ -% -\section[ClosureInfo]{Data structures which describe closures} + + Data structures which describe closures, and + operations over those data structures + + Nothing monadic in here Much of the rationale for these things is in the ``details'' part of the STG paper. @@ -11,86 +13,73 @@ the STG paper. \begin{code} module ClosureInfo ( ClosureInfo, LambdaFormInfo, SMRep, -- all abstract - StandardFormInfo, ArgDescr(..), + StandardFormInfo, - CallingConvention(..), + ArgDescr(..), Liveness(..), + C_SRT(..), needsSRT, - mkClosureLFInfo, mkConLFInfo, mkSelectorLFInfo, + mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, + mkClosureInfo, mkConInfo, + closureSize, closureNonHdrSize, closureGoodStuffSize, closurePtrsSize, - slopSize, + slopSize, - layOutDynClosure, layOutStaticClosure, layOutStaticNoFVClosure, - layOutDynConstr, layOutStaticConstr, - mkVirtHeapOffsets, mkStaticClosure, + closureName, infoTableLabelFromCI, + closureLabelFromCI, closureSRT, + closureLFInfo, closureSMRep, closureUpdReqd, + closureSingleEntry, closureReEntrant, isConstrClosure_maybe, + closureFunInfo, isStandardFormThunk, isKnownFun, - nodeMustPointToIt, getEntryConvention, - FCode, CgInfoDownwards, CgState, + enterIdLabel, enterReturnPtLabel, + + nodeMustPointToIt, + CallMethod(..), getCallMethod, blackHoleOnEntry, staticClosureRequired, - - closureName, infoTableLabelFromCI, - closureLabelFromCI, closureSRT, - entryLabelFromCI, - closureLFInfo, closureSMRep, closureUpdReqd, - closureSingleEntry, closureReEntrant, closureSemiTag, - closureFunInfo, isStandardFormThunk, + getClosureType, isToplevClosure, - closureTypeDescr, -- profiling + closureValDescr, closureTypeDescr, -- profiling isStaticClosure, - allocProfilingMsg, cafBlackHoleClosureInfo, seCafBlackHoleClosureInfo, staticClosureNeedsLink, - - mkInfoTable, mkRetInfoTable, mkVecInfoTable, ) where -#include "../includes/config.h" #include "../includes/MachDeps.h" #include "HsVersions.h" -import AbsCSyn import StgSyn -import CgMonad +import SMRep -- all of it -import Constants ( mIN_UPD_SIZE, mIN_SIZE_NonUpdHeapObject ) -import CgRetConv ( assignRegs ) import CLabel + +import Constants ( mIN_UPD_SIZE, mIN_SIZE_NonUpdHeapObject ) import CmdLineOpts ( opt_SccProfilingOn, opt_OmitBlackHoling, opt_Parallel, opt_DoTickyProfiling, - opt_SMP, opt_Unregisterised ) -import Id ( Id, idType, idArity, idName, idPrimRep ) -import DataCon ( DataCon, dataConTag, fIRST_TAG, dataConTyCon, - isNullaryDataCon, dataConName - ) -import Name ( Name, nameUnique, getOccName, getName, getOccString ) + opt_SMP ) +import Id ( Id, idType, idArity, idName ) +import DataCon ( DataCon, dataConTyCon, isNullaryDataCon, dataConName ) +import Name ( Name, nameUnique, getOccName, getOccString ) import OccName ( occNameUserString ) -import PrimRep -import SMRep -- all of it import Type ( isUnLiftedType, Type, repType, splitTyConApp_maybe ) import TcType ( tcSplitSigmaTy ) import TyCon ( isFunTyCon, isAbstractTyCon ) import BasicTypes ( TopLevelFlag(..), isNotTopLevel, isTopLevel, ipNameName ) -import Util ( mapAccumL, listLengthCmp, lengthIs ) import FastString import Outputable -import Literal import Constants -import Bitmap - -import Maybe ( isJust ) -import DATA_BITS import TypeRep -- TEMP \end{code} + %************************************************************************ %* * \subsection[ClosureInfo-datatypes]{Data types for closure information} @@ -121,12 +110,22 @@ data ClosureInfo closureDescr :: !String -- closure description (for profiling) } - -- constructor closures don't have a unique info table label (they use + -- Constructor closures don't have a unique info table label (they use -- the constructor's info table), and they don't have an SRT. | ConInfo { closureCon :: !DataCon, closureSMRep :: !SMRep } + +-- C_SRT is what StgSyn.SRT gets translated to... +-- we add a label for the table, and expect only the 'offset/length' form + +data C_SRT = NoC_SRT + | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-} + +needsSRT :: C_SRT -> Bool +needsSRT NoC_SRT = False +needsSRT (C_SRT _ _ _) = True \end{code} %************************************************************************ @@ -147,11 +146,11 @@ ClosureInfo contains a LambdaFormInfo. data LambdaFormInfo = LFReEntrant -- Reentrant closure (a function) TopLevelFlag -- True if top level - !Int -- Arity + !Int -- Arity. Invariant: always > 0 !Bool -- True <=> no fvs ArgDescr -- Argument descriptor (should reall be in ClosureInfo) - | LFCon -- Constructor + | LFCon -- A saturated constructor application DataCon -- The constructor | LFThunk -- Thunk (zero arity) @@ -179,36 +178,58 @@ data LambdaFormInfo CLabel -- Flavour (info label, eg CAF_BLACKHOLE_info). -data StandardFormInfo -- Tells whether this thunk has one of a small number - -- of standard forms +------------------------- +-- An ArgDsecr describes the argument pattern of a function - = NonStandardThunk -- No, it isn't +data ArgDescr + = ArgSpec -- Fits one of the standard patterns + !Int -- RTS type identifier ARG_P, ARG_N, ... - | SelectorThunk - Int -- 0-origin offset of ak within the "goods" of - -- constructor (Recall that the a1,...,an may be laid - -- out in the heap in a non-obvious order.) + | ArgGen -- General case + Liveness -- Details about the arguments -{- A SelectorThunk is of form - case x of - con a1,..,an -> ak +------------------------- +-- We represent liveness bitmaps as a Bitmap (whose internal +-- representation really is a bitmap). These are pinned onto case return +-- vectors to indicate the state of the stack for the garbage collector. +-- +-- In the compiled program, liveness bitmaps that fit inside a single +-- word (StgWord) are stored as a single word, while larger bitmaps are +-- stored as a pointer to an array of words. - and the constructor is from a single-constr type. --} +data Liveness + = SmallLiveness -- Liveness info that fits in one word + StgWord -- Here's the bitmap + + | BigLiveness -- Liveness info witha a multi-word bitmap + CLabel -- Label for the bitmap - | ApThunk - Int -- arity -{- An ApThunk is of form +------------------------- +-- StandardFormInfo tells whether this thunk has one of +-- a small number of standard forms - x1 ... xn +data StandardFormInfo + = NonStandardThunk + -- Not of of the standard forms - The code for the thunk just pushes x2..xn on the stack and enters x1. - There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled - in the RTS to save space. --} + | SelectorThunk + -- A SelectorThunk is of form + -- case x of + -- con a1,..,an -> ak + -- and the constructor is from a single-constr type. + WordOff -- 0-origin offset of ak within the "goods" of + -- constructor (Recall that the a1,...,an may be laid + -- out in the heap in a non-obvious order.) + | ApThunk + -- An ApThunk is of form + -- x1 ... xn + -- The code for the thunk just pushes x2..xn on the stack and enters x1. + -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled + -- in the RTS to save space. + Int -- Arity, n \end{code} %************************************************************************ @@ -217,31 +238,27 @@ data StandardFormInfo -- Tells whether this thunk has one of a small number %* * %************************************************************************ -@mkClosureLFInfo@ figures out the appropriate LFInfo for the closure. - \begin{code} -mkClosureLFInfo :: Id -- The binder - -> TopLevelFlag -- True of top level - -> [Id] -- Free vars - -> UpdateFlag -- Update flag - -> [Id] -- Args - -> LambdaFormInfo - -mkClosureLFInfo bndr top fvs upd_flag args@(_:_) -- Non-empty args - = LFReEntrant top (length args) (null fvs) (mkArgDescr (getName bndr) args) - -mkClosureLFInfo bndr top fvs upd_flag [] - = ASSERT( not updatable || not (isUnLiftedType id_ty) ) - LFThunk top (null fvs) updatable NonStandardThunk - (might_be_a_function id_ty) - where - updatable = isUpdatable upd_flag - id_ty = idType bndr +mkLFReEntrant :: TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> [Id] -- Args + -> ArgDescr -- Argument descriptor + -> LambdaFormInfo + +mkLFReEntrant top fvs args arg_descr + = LFReEntrant top (length args) (null fvs) arg_descr + +mkLFThunk thunk_ty top fvs upd_flag + = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) ) + LFThunk top (null fvs) + (isUpdatable upd_flag) + NonStandardThunk + (might_be_a_function thunk_ty) might_be_a_function :: Type -> Bool might_be_a_function ty | Just (tc,_) <- splitTyConApp_maybe (repType ty), - not (isFunTyCon tc) && not (isAbstractTyCon tc) = False + not (isFunTyCon tc) && not (isAbstractTyCon tc) = False -- don't forget to check for abstract types, which might -- be functions too. | otherwise = True @@ -278,15 +295,51 @@ mkLFImported id %************************************************************************ %* * + Building ClosureInfos +%* * +%************************************************************************ + +\begin{code} +mkClosureInfo :: Bool -- Is static + -> Id + -> LambdaFormInfo + -> Int -> Int -- Total and pointer words + -> C_SRT + -> String -- String descriptor + -> ClosureInfo +mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr + = ClosureInfo { closureName = name, + closureLFInfo = lf_info, + closureSMRep = sm_rep, + closureSRT = srt_info, + closureType = idType id, + closureDescr = descr } + where + name = idName id + sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds + +mkConInfo :: Bool -- Is static + -> DataCon + -> Int -> Int -- Total and pointer words + -> ClosureInfo +mkConInfo is_static data_con tot_wds ptr_wds + = ConInfo { closureSMRep = sm_rep, + closureCon = data_con } + where + sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds +\end{code} + +%************************************************************************ +%* * \subsection[ClosureInfo-sizes]{Functions about closure {\em sizes}} %* * %************************************************************************ \begin{code} -closureSize :: ClosureInfo -> HeapOffset +closureSize :: ClosureInfo -> WordOff closureSize cl_info = fixedHdrSize + closureNonHdrSize cl_info -closureNonHdrSize :: ClosureInfo -> Int +closureNonHdrSize :: ClosureInfo -> WordOff closureNonHdrSize cl_info = tot_wds + computeSlopSize tot_wds (closureSMRep cl_info) @@ -302,24 +355,24 @@ closureNeedsUpdSpace (ClosureInfo { closureLFInfo = LFThunk TopLevel _ _ _ _ }) = True closureNeedsUpdSpace cl_info = closureUpdReqd cl_info -slopSize :: ClosureInfo -> Int +slopSize :: ClosureInfo -> WordOff slopSize cl_info = computeSlopSize (closureGoodStuffSize cl_info) (closureSMRep cl_info) (closureNeedsUpdSpace cl_info) -closureGoodStuffSize :: ClosureInfo -> Int +closureGoodStuffSize :: ClosureInfo -> WordOff closureGoodStuffSize cl_info = let (ptrs, nonptrs) = sizes_from_SMRep (closureSMRep cl_info) in ptrs + nonptrs -closurePtrsSize :: ClosureInfo -> Int +closurePtrsSize :: ClosureInfo -> WordOff closurePtrsSize cl_info = let (ptrs, _) = sizes_from_SMRep (closureSMRep cl_info) in ptrs -- not exported: -sizes_from_SMRep :: SMRep -> (Int,Int) +sizes_from_SMRep :: SMRep -> (WordOff,WordOff) sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) sizes_from_SMRep BlackHoleRep = (0, 0) \end{code} @@ -353,7 +406,7 @@ Static closures have an extra ``static link field'' at the end, but we don't bother taking that into account here. \begin{code} -computeSlopSize :: Int -> SMRep -> Bool -> Int +computeSlopSize :: WordOff -> SMRep -> Bool -> WordOff computeSlopSize tot_wds (GenericRep _ _ _ _) True -- Updatable = max 0 (mIN_UPD_SIZE - tot_wds) @@ -370,129 +423,6 @@ computeSlopSize tot_wds BlackHoleRep _ -- Updatable %************************************************************************ %* * -\subsection[layOutDynClosure]{Lay out a closure} -%* * -%************************************************************************ - -\begin{code} -layOutDynClosure, layOutStaticClosure - :: Id -- STG identifier of this closure - -> (a -> PrimRep) -- how to get a PrimRep for the fields - -> [a] -- the "things" being layed out - -> LambdaFormInfo -- what sort of closure it is - -> C_SRT -- its SRT - -> String -- closure description - -> (ClosureInfo, -- info about the closure - [(a, VirtualHeapOffset)]) -- things w/ offsets pinned on them - -layOutDynClosure = layOutClosure False -layOutStaticClosure = layOutClosure True - -layOutStaticNoFVClosure id lf_info srt_info descr - = fst (layOutClosure True id (panic "kind_fn") [] lf_info srt_info descr) - -layOutClosure - :: Bool -- True <=> static closure - -> Id -- STG identifier of this closure - -> (a -> PrimRep) -- how to get a PrimRep for the fields - -> [a] -- the "things" being layed out - -> LambdaFormInfo -- what sort of closure it is - -> C_SRT -- its SRT - -> String -- closure description - -> (ClosureInfo, -- info about the closure - [(a, VirtualHeapOffset)]) -- things w/ offsets pinned on them - -layOutClosure is_static id kind_fn things lf_info srt_info descr - = (ClosureInfo { closureName = name, - closureLFInfo = lf_info, - closureSMRep = sm_rep, - closureSRT = srt_info, - closureType = idType id, - closureDescr = descr }, - things_w_offsets) - where - name = idName id - (tot_wds, -- #ptr_wds + #nonptr_wds - ptr_wds, -- #ptr_wds - things_w_offsets) = mkVirtHeapOffsets kind_fn things - sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds - - -layOutDynConstr, layOutStaticConstr - :: DataCon - -> (a -> PrimRep) - -> [a] - -> (ClosureInfo, - [(a,VirtualHeapOffset)]) - -layOutDynConstr = layOutConstr False -layOutStaticConstr = layOutConstr True - -layOutConstr is_static data_con kind_fn args - = (ConInfo { closureSMRep = sm_rep, - closureCon = data_con }, - things_w_offsets) - where - (tot_wds, -- #ptr_wds + #nonptr_wds - ptr_wds, -- #ptr_wds - things_w_offsets) = mkVirtHeapOffsets kind_fn args - sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds -\end{code} - -%************************************************************************ -%* * -\subsection[mkStaticClosure]{Make a static closure} -%* * -%************************************************************************ - -Make a static closure, adding on any extra padding needed for CAFs, -and adding a static link field if necessary. - -\begin{code} -mkStaticClosure lbl cl_info ccs fields cafrefs - | opt_SccProfilingOn = - CStaticClosure - lbl - cl_info - (mkCCostCentreStack ccs) - all_fields - | otherwise = - CStaticClosure - lbl - cl_info - (panic "absent cc") - all_fields - - where - all_fields = fields ++ padding_wds ++ static_link_field - - upd_reqd = closureUpdReqd cl_info - - -- for the purposes of laying out the static closure, we consider all - -- thunks to be "updatable", so that the static link field is always - -- in the same place. - padding_wds - | not upd_reqd = [] - | otherwise = replicate n (mkIntCLit 0) -- a bunch of 0s - where n = max 0 (mIN_UPD_SIZE - length fields) - - -- We always have a static link field for a thunk, it's used to - -- save the closure's info pointer when we're reverting CAFs - -- (see comment in Storage.c) - static_link_field - | upd_reqd || staticClosureNeedsLink cl_info = [static_link_value] - | otherwise = [] - - -- for a static constructor which has NoCafRefs, we set the - -- static link field to a non-zero value so the garbage - -- collector will ignore it. - static_link_value - | cafrefs = mkIntCLit 0 - | otherwise = mkIntCLit 1 -\end{code} - -%************************************************************************ -%* * \subsection[SMreps]{Choosing SM reps} %* * %************************************************************************ @@ -501,23 +431,23 @@ mkStaticClosure lbl cl_info ccs fields cafrefs chooseSMRep :: Bool -- True <=> static closure -> LambdaFormInfo - -> Int -> Int -- Tot wds, ptr wds + -> WordOff -> WordOff -- Tot wds, ptr wds -> SMRep chooseSMRep is_static lf_info tot_wds ptr_wds = let nonptr_wds = tot_wds - ptr_wds - closure_type = getClosureType is_static tot_wds ptr_wds lf_info + closure_type = getClosureType is_static ptr_wds lf_info in GenericRep is_static ptr_wds nonptr_wds closure_type --- we *do* get non-updatable top-level thunks sometimes. eg. f = g +-- We *do* get non-updatable top-level thunks sometimes. eg. f = g -- gets compiled to a jump to g (if g has non-zero arity), instead of -- messing around with update frames and PAPs. We set the closure type -- to FUN_STATIC in this case. -getClosureType :: Bool -> Int -> Int -> LambdaFormInfo -> ClosureType -getClosureType is_static tot_wds ptr_wds lf_info +getClosureType :: Bool -> WordOff -> LambdaFormInfo -> ClosureType +getClosureType is_static ptr_wds lf_info = case lf_info of LFCon con | is_static && ptr_wds == 0 -> ConstrNoCaf | otherwise -> Constr @@ -529,42 +459,6 @@ getClosureType is_static tot_wds ptr_wds lf_info %************************************************************************ %* * -\subsection[mkVirtHeapOffsets]{Assigning heap offsets in a closure} -%* * -%************************************************************************ - -@mkVirtHeapOffsets@ (the heap version) always returns boxed things with -smaller offsets than the unboxed things, and furthermore, the offsets in -the result list - -\begin{code} -mkVirtHeapOffsets :: - (a -> PrimRep) -- To be able to grab kinds; - -- w/ a kind, we can find boxedness - -> [a] -- Things to make offsets for - -> (Int, -- *Total* number of words allocated - Int, -- Number of words allocated for *pointers* - [(a, VirtualHeapOffset)]) - -- Things with their offsets from start of - -- object in order of increasing offset - --- First in list gets lowest offset, which is initial offset + 1. - -mkVirtHeapOffsets kind_fun things - = let (ptrs, non_ptrs) = separateByPtrFollowness kind_fun things - (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs - (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs - in - (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets) - where - computeOffset wds_so_far thing - = (wds_so_far + (getPrimRepSize . kind_fun) thing, - (thing, fixedHdrSize + wds_so_far) - ) -\end{code} - -%************************************************************************ -%* * \subsection[ClosureInfo-4-questions]{Four major questions about @ClosureInfo@} %* * %************************************************************************ @@ -572,13 +466,10 @@ mkVirtHeapOffsets kind_fun things Be sure to see the stg-details notes about these... \begin{code} -nodeMustPointToIt :: LambdaFormInfo -> FCode Bool -nodeMustPointToIt lf_info - - = case lf_info of - LFReEntrant top _ no_fvs _ -> returnFC ( - not no_fvs || -- Certainly if it has fvs we need to point to it - isNotTopLevel top +nodeMustPointToIt :: LambdaFormInfo -> Bool +nodeMustPointToIt (LFReEntrant top _ no_fvs _) + = not no_fvs || -- Certainly if it has fvs we need to point to it + isNotTopLevel top -- If it is not top level we will point to it -- We can have a \r closure with no_fvs which -- is not top level as special case cgRhsClosure @@ -587,9 +478,8 @@ nodeMustPointToIt lf_info -- For lex_profiling we also access the cost centre for a -- non-inherited function i.e. not top level -- the not top case above ensures this is ok. - ) - LFCon _ -> returnFC True +nodeMustPointToIt (LFCon _) = True -- Strictly speaking, the above two don't need Node to point -- to it if the arity = 0. But this is a *really* unlikely @@ -602,9 +492,8 @@ nodeMustPointToIt lf_info -- having Node point to the result of an update. SLPJ -- 27/11/92. - LFThunk _ no_fvs updatable NonStandardThunk _ - -> returnFC (updatable || not no_fvs || opt_SccProfilingOn) - +nodeMustPointToIt (LFThunk _ no_fvs updatable NonStandardThunk _) + = updatable || not no_fvs || opt_SccProfilingOn -- For the non-updatable (single-entry case): -- -- True if has fvs (in which case we need access to them, and we @@ -612,15 +501,12 @@ nodeMustPointToIt lf_info -- or profiling (in which case we need to recover the cost centre -- from inside it) - LFThunk _ no_fvs updatable some_standard_form_thunk _ - -> returnFC True - -- Node must point to any standard-form thunk. +nodeMustPointToIt (LFThunk _ no_fvs updatable some_standard_form_thunk _) + = True -- Node must point to any standard-form thunk - LFUnknown _ -> returnFC True - LFBlackHole _ -> returnFC True - -- BH entry may require Node to point - - LFLetNoEscape _ -> returnFC False +nodeMustPointToIt (LFUnknown _) = True +nodeMustPointToIt (LFBlackHole _) = True -- BH entry may require Node to point +nodeMustPointToIt (LFLetNoEscape _) = False \end{code} The entry conventions depend on the type of closure being entered, @@ -652,7 +538,7 @@ When black-holing, single-entry closures could also be entered via node (rather than directly) to catch double-entry. \begin{code} -data CallingConvention +data CallMethod = EnterIt -- no args, not a function | JumpToIt CLabel -- no args, not a function, but we @@ -662,96 +548,72 @@ data CallingConvention -- zero args to apply to it, so just -- return it. + | ReturnCon DataCon -- It's a data constructor, just return it + | SlowCall -- Unknown fun, or known fun with -- too few args. | DirectEntry -- Jump directly, with args in regs CLabel -- The code label Int -- Its arity - [MagicId] -- Its register assignments - -- (possibly empty) - -getEntryConvention :: Name -- Function being applied - -> LambdaFormInfo -- Its info - -> [PrimRep] -- Available arguments - -> FCode CallingConvention - -getEntryConvention name lf_info arg_kinds - = nodeMustPointToIt lf_info `thenFC` \ node_points -> - returnFC ( - - -- if we're parallel, then we must always enter via node. The reason - -- is that the closure may have been fetched since we allocated it. - - if (node_points && opt_Parallel) then EnterIt else - - -- Commented out by SDM after futher thoughts: - -- - the only closure type that can be blackholed is a thunk - -- - we already enter thunks via node (unless the closure is - -- non-updatable, in which case why is it being re-entered...) - - case lf_info of - - LFReEntrant _ arity _ _ -> - if null arg_kinds then - if arity == 0 then - EnterIt -- a non-updatable thunk - else - ReturnIt -- no args at all - else if listLengthCmp arg_kinds arity == LT then - SlowCall -- not enough args - else - DirectEntry (mkEntryLabel name) arity arg_regs - where - (arg_regs, _) = assignRegs [node] (take arity arg_kinds) - -- we don't use node to pass args now (SDM) - - LFCon con - | isNullaryDataCon con - -- a real constructor. Don't bother entering it, just jump - -- to the constructor entry code directly. - -> --false:ASSERT (null arg_kinds) - -- Should have no args (meaning what?) - JumpToIt (mkStaticConEntryLabel (dataConName con)) - - | otherwise {- not nullary -} - -> --false:ASSERT (null arg_kinds) - -- Should have no args (meaning what?) - JumpToIt (mkConEntryLabel (dataConName con)) - - LFThunk _ _ updatable std_form_info is_fun - -- must always "call" a function-typed thing, cannot just enter it - | is_fun -> SlowCall - | updatable || opt_DoTickyProfiling -- to catch double entry - || opt_SMP -- always enter via node on SMP, since the + +getCallMethod :: Name -- Function being applied + -> LambdaFormInfo -- Its info + -> Int -- Number of available arguments + -> CallMethod + +getCallMethod name lf_info n_args + | nodeMustPointToIt lf_info && opt_Parallel + = -- If we're parallel, then we must always enter via node. + -- The reason is that the closure may have been + -- fetched since we allocated it. + EnterIt + +getCallMethod name (LFReEntrant _ arity _ _) n_args + | n_args == 0 = ASSERT( arity /= 0 ) + ReturnIt -- No args at all + | n_args < arity = SlowCall -- Not enough args + | otherwise = DirectEntry (enterIdLabel name) arity + +getCallMethod name (LFCon con) n_args + = ASSERT( n_args == 0 ) + ReturnCon con + +getCallMethod name (LFThunk _ _ updatable std_form_info is_fun) n_args + | is_fun -- Must always "call" a function-typed + = SlowCall -- thing, cannot just enter it [in eval/apply, the entry code + -- is the fast-entry code] + + | updatable || opt_DoTickyProfiling -- to catch double entry + || opt_SMP -- Always enter via node on SMP, since the -- thunk might have been blackholed in the -- meantime. - -> ASSERT(null arg_kinds) EnterIt - | otherwise - -> ASSERT(null arg_kinds) - JumpToIt (thunkEntryLabel name std_form_info updatable) - - LFUnknown True -> SlowCall -- might be a function - LFUnknown False -> ASSERT2 (null arg_kinds, ppr name <+> ppr arg_kinds) EnterIt -- not a function - - LFBlackHole _ -> SlowCall -- Presumably the black hole has by now - -- been updated, but we don't know with - -- what, so we slow call it - - LFLetNoEscape 0 - -> JumpToIt (mkReturnPtLabel (nameUnique name)) - - LFLetNoEscape arity - -> if (not (arg_kinds `lengthIs` arity)) then pprPanic "let-no-escape: " (ppr name <+> ppr arity) else - DirectEntry (mkReturnPtLabel (nameUnique name)) arity arg_regs - where - (arg_regs, _) = assignRegs [] arg_kinds - -- node never points to a LetNoEscape, see above --SDM - --live_regs = if node_points then [node] else [] - ) + = ASSERT( n_args == 0 ) EnterIt -blackHoleOnEntry :: ClosureInfo -> Bool + | otherwise -- Jump direct to code for single-entry thunks + = ASSERT( n_args == 0 ) + JumpToIt (thunkEntryLabel name std_form_info updatable) + +getCallMethod name (LFUnknown True) n_args + = SlowCall -- might be a function + +getCallMethod name (LFUnknown False) n_args + = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) + EnterIt -- Not a function +getCallMethod name (LFBlackHole _) n_args + = SlowCall -- Presumably the black hole has by now + -- been updated, but we don't know with + -- what, so we slow call it + +getCallMethod name (LFLetNoEscape 0) n_args + = JumpToIt (enterReturnPtLabel (nameUnique name)) + +getCallMethod name (LFLetNoEscape arity) n_args + | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity + | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity) + +blackHoleOnEntry :: ClosureInfo -> Bool -- Static closures are never themselves black-holed. -- Updatable ones will be overwritten with a CAFList cell, which points to a -- black hole; @@ -777,11 +639,14 @@ blackHoleOnEntry (ClosureInfo { closureLFInfo = lf_info, closureSMRep = rep }) other -> panic "blackHoleOnEntry" -- Should never happen isStandardFormThunk :: LambdaFormInfo -> Bool - isStandardFormThunk (LFThunk _ _ _ (SelectorThunk _) _) = True isStandardFormThunk (LFThunk _ _ _ (ApThunk _) _) = True isStandardFormThunk other_lf_info = False +isKnownFun :: LambdaFormInfo -> Bool +isKnownFun (LFReEntrant _ _ _ _) = True +isKnownFun (LFLetNoEscape _) = True +isKnownFun _ = False \end{code} ----------------------------------------------------------------------------- @@ -908,10 +773,9 @@ closureReEntrant :: ClosureInfo -> Bool closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True closureReEntrant other_closure = False -closureSemiTag :: ClosureInfo -> Maybe Int -closureSemiTag (ConInfo { closureCon = data_con }) - = Just (dataConTag data_con - fIRST_TAG) -closureSemiTag _ = Nothing +isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon +isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con +isConstrClosure_maybe _ = Nothing closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc}) @@ -948,8 +812,7 @@ infoTableLabelFromCI (ClosureInfo { closureName = name, LFThunk{} -> mkInfoTableLabel name - LFReEntrant _ _ _ (ArgGen _ _) -> mkInfoTableLabel name - LFReEntrant _ _ _ _ -> mkInfoTableLabel name + LFReEntrant _ _ _ _ -> mkInfoTableLabel name other -> panic "infoTableLabelFromCI" @@ -964,50 +827,37 @@ mkConInfoPtr con rep where name = dataConName con -mkConEntryPtr :: DataCon -> SMRep -> CLabel -mkConEntryPtr con rep - | isStaticRep rep = mkStaticConEntryLabel (dataConName con) - | otherwise = mkConEntryLabel (dataConName con) - closureLabelFromCI (ClosureInfo { closureName = nm }) = mkClosureLabel nm closureLabelFromCI _ = panic "closureLabelFromCI" -entryLabelFromCI :: ClosureInfo -> CLabel -entryLabelFromCI (ClosureInfo { closureName = id, - closureLFInfo = lf_info, - closureSMRep = rep }) - = case lf_info of - LFThunk _ _ upd_flag std_form_info _ -> - thunkEntryLabel id std_form_info upd_flag - other -> mkEntryLabel id - -entryLabelFromCI (ConInfo { closureCon = con, closureSMRep = rep }) - = mkConEntryPtr con rep - - -- thunkEntryLabel is a local help function, not exported. It's used from both --- entryLabelFromCI and getEntryConvention. +-- entryLabelFromCI and getCallMethod. thunkEntryLabel thunk_id (ApThunk arity) is_updatable - = mkApEntryLabel is_updatable arity + = enterApLabel is_updatable arity thunkEntryLabel thunk_id (SelectorThunk offset) upd_flag - = mkSelectorEntryLabel upd_flag offset + = enterSelectorLabel upd_flag offset thunkEntryLabel thunk_id _ is_updatable - = mkEntryLabel thunk_id -\end{code} + = enterIdLabel thunk_id -\begin{code} -allocProfilingMsg :: ClosureInfo -> FastString -allocProfilingMsg ConInfo{} = FSLIT("TICK_ALLOC_CON") -allocProfilingMsg ClosureInfo{ closureLFInfo = lf_info } - = case lf_info of - LFReEntrant _ _ _ _ -> FSLIT("TICK_ALLOC_FUN") - LFThunk _ _ True _ _ -> FSLIT("TICK_ALLOC_UP_THK") -- updatable - LFThunk _ _ False _ _ -> FSLIT("TICK_ALLOC_SE_THK") -- nonupdatable - LFBlackHole _ -> FSLIT("TICK_ALLOC_BH") - _ -> panic "allocProfilingMsg" +enterApLabel is_updatable arity + | tablesNextToCode = mkApInfoTableLabel is_updatable arity + | otherwise = mkApEntryLabel is_updatable arity + +enterSelectorLabel upd_flag offset + | tablesNextToCode = mkSelectorInfoLabel upd_flag offset + | otherwise = mkSelectorEntryLabel upd_flag offset + +enterIdLabel id + | tablesNextToCode = mkInfoTableLabel id + | otherwise = mkEntryLabel id + +enterReturnPtLabel name + | tablesNextToCode = mkReturnInfoLabel name + | otherwise = mkReturnPtLabel name \end{code} + We need a black-hole closure info to pass to @allocDynClosure@ when we want to allocate the black hole on entry to a CAF. These are the only ways to build an LFBlackHole, maintaining the invariant that it really @@ -1051,7 +901,12 @@ The type is determined from the type information stored with the @Id@ in the closure info using @closureTypeDescr@. \begin{code} -closureTypeDescr :: ClosureInfo -> String +closureValDescr, closureTypeDescr :: ClosureInfo -> String +closureValDescr (ClosureInfo {closureDescr = descr}) + = descr +closureValDescr (ConInfo {closureCon = con}) + = occNameUserString (getOccName con) + closureTypeDescr (ClosureInfo { closureType = ty }) = getTyDescription ty closureTypeDescr (ConInfo { closureCon = data_con }) @@ -1079,268 +934,4 @@ getPredTyDescription (ClassP cl tys) = getOccString cl getPredTyDescription (IParam ip ty) = getOccString (ipNameName ip) \end{code} -%************************************************************************ -%* * -\subsection{Making argument bitmaps} -%* * -%************************************************************************ - -\begin{code} --- bring in ARG_P, ARG_N, etc. -#include "../includes/StgFun.h" - -data ArgDescr - = ArgSpec - !Int -- ARG_P, ARG_N, ... - | ArgGen - CLabel -- label for a slow-entry point - Liveness -- the arg bitmap: describes pointedness of arguments - -mkArgDescr :: Name -> [Id] -> ArgDescr -mkArgDescr nm args = argDescr nm (filter nonVoidRep (map idPrimRep args)) - where nonVoidRep VoidRep = False - nonVoidRep _ = True - -argDescr nm [PtrRep] = ArgSpec ARG_P -argDescr nm [FloatRep] = ArgSpec ARG_F -argDescr nm [DoubleRep] = ArgSpec ARG_D -argDescr nm [r] | is64BitRep r = ArgSpec ARG_L -argDescr nm [r] | isNonPtrRep r = ArgSpec ARG_N - -argDescr nm [r1,r2] | isNonPtrRep r1 && isNonPtrRep r2 = ArgSpec ARG_NN -argDescr nm [r1,PtrRep] | isNonPtrRep r1 = ArgSpec ARG_NP -argDescr nm [PtrRep,r1] | isNonPtrRep r1 = ArgSpec ARG_PN -argDescr nm [PtrRep,PtrRep] = ArgSpec ARG_PP - -argDescr nm [r1,r2,r3] | isNonPtrRep r1 && isNonPtrRep r2 && isNonPtrRep r3 = ArgSpec ARG_NNN -argDescr nm [r1,r2,PtrRep] | isNonPtrRep r1 && isNonPtrRep r2 = ArgSpec ARG_NNP -argDescr nm [r1,PtrRep,r2] | isNonPtrRep r1 && isNonPtrRep r2 = ArgSpec ARG_NPN -argDescr nm [r1,PtrRep,PtrRep] | isNonPtrRep r1 = ArgSpec ARG_NPP -argDescr nm [PtrRep,r1,r2] | isNonPtrRep r1 && isNonPtrRep r2 = ArgSpec ARG_PNN -argDescr nm [PtrRep,r1,PtrRep] | isNonPtrRep r1 = ArgSpec ARG_PNP -argDescr nm [PtrRep,PtrRep,r1] | isNonPtrRep r1 = ArgSpec ARG_PPN -argDescr nm [PtrRep,PtrRep,PtrRep] = ArgSpec ARG_PPP - -argDescr nm [PtrRep,PtrRep,PtrRep,PtrRep] = ArgSpec ARG_PPPP -argDescr nm [PtrRep,PtrRep,PtrRep,PtrRep,PtrRep] = ArgSpec ARG_PPPPP -argDescr nm [PtrRep,PtrRep,PtrRep,PtrRep,PtrRep,PtrRep] = ArgSpec ARG_PPPPPP - -argDescr name reps = ArgGen (mkSlowEntryLabel name) liveness - where bitmap = argBits reps - lbl = mkBitmapLabel name - liveness = Liveness lbl (length bitmap) (mkBitmap bitmap) - -argBits [] = [] -argBits (rep : args) - | isFollowableRep rep = False : argBits args - | otherwise = take (getPrimRepSize rep) (repeat True) ++ argBits args -\end{code} - -%************************************************************************ -%* * -\subsection{Generating info tables} -%* * -%************************************************************************ - -Here we make a concrete info table, represented as a list of CAddrMode -(it can't be simply a list of Word, because the SRT field is -represented by a label+offset expression). - -\begin{code} -mkInfoTable :: ClosureInfo -> [CAddrMode] -mkInfoTable cl_info - | tablesNextToCode = extra_bits ++ std_info - | otherwise = std_info ++ extra_bits - where - std_info = mkStdInfoTable entry_amode - ty_descr_amode cl_descr_amode cl_type srt_len layout_amode - - entry_amode = CLbl (entryLabelFromCI cl_info) CodePtrRep - - closure_descr = - case cl_info of - ClosureInfo { closureDescr = descr } -> descr - ConInfo { closureCon = con } -> occNameUserString (getOccName con) - - ty_descr_amode = CLit (MachStr (mkFastString (closureTypeDescr cl_info))) - cl_descr_amode = CLit (MachStr (mkFastString closure_descr)) - - cl_type = getSMRepClosureTypeInt (closureSMRep cl_info) - - srt = closureSRT cl_info - needs_srt = needsSRT srt - - semi_tag = closureSemiTag cl_info - is_con = isJust semi_tag - - (srt_label,srt_len) - | Just tag <- semi_tag = (mkIntCLit 0, fromIntegral tag) -- constructor - | otherwise = - case srt of - NoC_SRT -> (mkIntCLit 0, 0) - C_SRT lbl off bitmap -> - (CAddr (CIndex (CLbl lbl DataPtrRep) (mkIntCLit off) WordRep), - bitmap) - - ptrs = closurePtrsSize cl_info - nptrs = size - ptrs - size = closureNonHdrSize cl_info - - layout_info :: StgWord -#ifdef WORDS_BIGENDIAN - layout_info = (fromIntegral ptrs `shiftL` hALF_WORD) .|. fromIntegral nptrs -#else - layout_info = (fromIntegral ptrs) .|. (fromIntegral nptrs `shiftL` hALF_WORD) -#endif - - layout_amode = mkWordCLit layout_info - - extra_bits - | is_fun = fun_extra_bits - | is_con = [] - | needs_srt = [srt_label] - | otherwise = [] - - maybe_fun_stuff = closureFunInfo cl_info - is_fun = isJust maybe_fun_stuff - (Just (arity, arg_descr)) = maybe_fun_stuff - - fun_extra_bits - | tablesNextToCode = reg_fun_extra_bits - | otherwise = reverse reg_fun_extra_bits - - reg_fun_extra_bits - | ArgGen slow_lbl liveness <- arg_descr - = [ - CLbl slow_lbl CodePtrRep, - livenessToAddrMode liveness, - srt_label, - fun_amode - ] - | needs_srt = [srt_label, fun_amode] - | otherwise = [fun_amode] - -#ifdef WORDS_BIGENDIAN - fun_desc = (fromIntegral fun_type `shiftL` hALF_WORD) .|. fromIntegral arity -#else - fun_desc = (fromIntegral fun_type) .|. (fromIntegral arity `shiftL` hALF_WORD) -#endif - - fun_amode = mkWordCLit fun_desc - - fun_type = case arg_descr of - ArgSpec n -> n - ArgGen _ (Liveness _ size _) - | size <= mAX_SMALL_BITMAP_SIZE -> ARG_GEN - | otherwise -> ARG_GEN_BIG - --- Return info tables come in two flavours: direct returns and --- vectored returns. - -mkRetInfoTable :: CLabel -> C_SRT -> Liveness -> [CAddrMode] -mkRetInfoTable entry_lbl srt liveness - = mkBitmapInfoTable (CLbl entry_lbl CodePtrRep) srt liveness [] - -mkVecInfoTable :: [CAddrMode] -> C_SRT -> Liveness -> [CAddrMode] -mkVecInfoTable vector srt liveness - = mkBitmapInfoTable zero_amode srt liveness vector - -mkBitmapInfoTable - :: CAddrMode - -> C_SRT -> Liveness - -> [CAddrMode] - -> [CAddrMode] -mkBitmapInfoTable entry_amode srt liveness vector - | tablesNextToCode = extra_bits ++ std_info - | otherwise = std_info ++ extra_bits - where - std_info = mkStdInfoTable entry_amode zero_amode zero_amode - cl_type srt_len liveness_amode - - liveness_amode = livenessToAddrMode liveness - - (srt_label,srt_len) = - case srt of - NoC_SRT -> (mkIntCLit 0, 0) - C_SRT lbl off bitmap -> - (CAddr (CIndex (CLbl lbl DataPtrRep) (mkIntCLit off) WordRep), - bitmap) - - cl_type = case (null vector, isBigLiveness liveness) of - (True, True) -> rET_BIG - (True, False) -> rET_SMALL - (False, True) -> rET_VEC_BIG - (False, False) -> rET_VEC_SMALL - - srt_bit | needsSRT srt || not (null vector) = [srt_label] - | otherwise = [] - - extra_bits | tablesNextToCode = reverse vector ++ srt_bit - | otherwise = srt_bit ++ vector - --- The standard bits of an info table. This part of the info table --- corresponds to the StgInfoTable type defined in InfoTables.h. - -mkStdInfoTable - :: CAddrMode -- entry label - -> CAddrMode -- closure type descr (profiling) - -> CAddrMode -- closure descr (profiling) - -> Int -- closure type - -> StgHalfWord -- SRT length - -> CAddrMode -- layout field - -> [CAddrMode] -mkStdInfoTable entry_lbl type_descr closure_descr cl_type srt_len layout_amode - = std_info - where - std_info - | tablesNextToCode = std_info' - | otherwise = entry_lbl : std_info' - - std_info' = - -- par info - prof_info ++ - -- ticky info - -- debug info - [layout_amode] ++ - CLit (MachWord (fromIntegral type_info)) : - [] - - prof_info - | opt_SccProfilingOn = [ type_descr, closure_descr ] - | otherwise = [] - - -- sigh: building up the info table is endian-dependent. - -- ToDo: do this using .byte and .word directives. - type_info :: StgWord -#ifdef WORDS_BIGENDIAN - type_info = (fromIntegral cl_type `shiftL` hALF_WORD) .|. - (fromIntegral srt_len) -#else - type_info = (fromIntegral cl_type) .|. - (fromIntegral srt_len `shiftL` hALF_WORD) -#endif - -isBigLiveness (Liveness _ size _) = size > mAX_SMALL_BITMAP_SIZE - -livenessToAddrMode :: Liveness -> CAddrMode -livenessToAddrMode (Liveness lbl size bits) - | size <= mAX_SMALL_BITMAP_SIZE = small - | otherwise = CLbl lbl DataPtrRep - where - small = mkWordCLit (fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT)) - small_bits = case bits of - [] -> 0 - [b] -> fromIntegral b - _ -> panic "livenessToAddrMode" - -zero_amode = mkIntCLit 0 - --- IA64 mangler doesn't place tables next to code -tablesNextToCode :: Bool -#ifdef ia64_TARGET_ARCH -tablesNextToCode = False -#else -tablesNextToCode = not opt_Unregisterised -#endif -\end{code} diff --git a/ghc/compiler/codeGen/CodeGen.lhs b/ghc/compiler/codeGen/CodeGen.lhs index 1c817aef51..d7f2f70c43 100644 --- a/ghc/compiler/codeGen/CodeGen.lhs +++ b/ghc/compiler/codeGen/CodeGen.lhs @@ -19,39 +19,41 @@ module CodeGen ( codeGen ) where #include "HsVersions.h" +import DriverState ( v_Build_tag, v_MainModIs ) + -- Kludge (??) so that CgExpr is reached via at least one non-SOURCE -- import. Before, that wasn't the case, and CM therefore didn't -- bother to compile it. import CgExpr ( {-NOTHING!-} ) -- DO NOT DELETE THIS IMPORT +import CgProf +import CgMonad +import CgBindery ( CgIdInfo, addBindC, addBindsC, getCgIdInfo, + cgIdInfoId ) +import CgClosure ( cgTopRhsClosure ) +import CgCon ( cgTopRhsCon, cgTyCon ) +import CgUtils ( cmmRegOffW, emitRODataLits, cmmNeWord, emitRtsCall ) + +import CLabel ( mkSRTLabel, mkClosureLabel, moduleRegdLabel, + mkPlainModuleInitLabel, mkModuleInitLabel ) +import Cmm +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr ) +import PprCmm ( pprCmms ) +import MachOp ( wordRep, MachHint(..) ) -import DriverState ( v_Build_tag, v_MainModIs ) import StgSyn -import CgMonad -import AbsCSyn import PrelNames ( gHC_PRIM, rOOT_MAIN, mAIN_Name, pREL_TOP_HANDLER ) -import CLabel ( mkSRTLabel, mkClosureLabel, - mkPlainModuleInitLabel, mkModuleInitLabel ) -import PprAbsC ( dumpRealC ) -import AbsCUtils ( mkAbstractCs, flattenAbsC ) -import CgBindery ( CgIdInfo, addBindC, addBindsC, getCAddrModeAndInfo ) -import CgClosure ( cgTopRhsClosure ) -import CgCon ( cgTopRhsCon ) -import CgConTbls ( genStaticConBits ) -import ClosureInfo ( mkClosureLFInfo ) -import CmdLineOpts ( DynFlags, DynFlag(..), - opt_SccProfilingOn, opt_EnsureSplittableC ) +import CmdLineOpts ( DynFlags, DynFlag(..), opt_EnsureSplittableC, + opt_SccProfilingOn ) + import HscTypes ( ForeignStubs(..), TypeEnv, typeEnvTyCons ) import CostCentre ( CollectedCCs ) import Id ( Id, idName, setIdName ) import Name ( nameSrcLoc, nameOccName, nameUnique, isInternalName, mkExternalName ) import OccName ( mkLocalOcc ) -import PrimRep ( PrimRep(..) ) import TyCon ( isDataTyCon ) import Module ( Module, mkModuleName ) -import BasicTypes ( TopLevelFlag(..) ) -import UniqSupply ( mkSplitUniqSupply ) import ErrUtils ( dumpIfSet_dyn, showPass ) -import Panic ( assertPanic ) +import Panic ( assertPanic, trace ) import qualified Module ( moduleName ) #ifdef DEBUG @@ -69,44 +71,37 @@ codeGen :: DynFlags -> [Module] -- directly-imported modules -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs - -> IO AbstractC -- Output + -> IO [Cmm] -- Output codeGen dflags this_mod type_env foreign_stubs imported_mods cost_centre_info stg_binds = do - showPass dflags "CodeGen" - fl_uniqs <- mkSplitUniqSupply 'f' - way <- readIORef v_Build_tag - mb_main_mod <- readIORef v_MainModIs - - let - tycons = typeEnvTyCons type_env - data_tycons = filter isDataTyCon tycons - - mapM_ (\x -> seq x (return ())) data_tycons - - let - - cinfo = MkCompInfo this_mod - - datatype_stuff = genStaticConBits cinfo data_tycons - code_stuff = initC cinfo (mapCs cgTopBinding stg_binds) - init_stuff = mkModuleInit way cost_centre_info - this_mod mb_main_mod - foreign_stubs imported_mods - - abstractC = mkAbstractCs [ maybeSplitCode, - init_stuff, - code_stuff, - datatype_stuff] + { showPass dflags "CodeGen" + ; way <- readIORef v_Build_tag + ; mb_main_mod <- readIORef v_MainModIs + + ; let tycons = typeEnvTyCons type_env + data_tycons = filter isDataTyCon tycons + +-- Why? +-- ; mapM_ (\x -> seq x (return ())) data_tycons + + ; code_stuff <- initC this_mod $ do + { cmm_binds <- mapM (getCmm . cgTopBinding) stg_binds + ; cmm_tycons <- mapM cgTyCon data_tycons + ; cmm_init <- getCmm (mkModuleInit way cost_centre_info + this_mod mb_main_mod + foreign_stubs imported_mods) + ; return (cmm_binds ++ concat cmm_tycons ++ [cmm_init]) + } -- Put datatype_stuff after code_stuff, because the -- datatype closure table (for enumeration types) to -- (say) PrelBase_True_closure, which is defined in -- code_stuff - dumpIfSet_dyn dflags Opt_D_dump_absC "Abstract C" (dumpRealC abstractC) + ; dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff) - return $! flattenAbsC fl_uniqs abstractC + ; return code_stuff } \end{code} %************************************************************************ @@ -115,6 +110,43 @@ codeGen dflags this_mod type_env foreign_stubs imported_mods %* * %************************************************************************ +/* ----------------------------------------------------------------------------- + Module initialisation + + The module initialisation code looks like this, roughly: + + FN(__stginit_Foo) { + JMP_(__stginit_Foo_1_p) + } + + FN(__stginit_Foo_1_p) { + ... + } + + We have one version of the init code with a module version and the + 'way' attached to it. The version number helps to catch cases + where modules are not compiled in dependency order before being + linked: if a module has been compiled since any modules which depend on + it, then the latter modules will refer to a different version in their + init blocks and a link error will ensue. + + The 'way' suffix helps to catch cases where modules compiled in different + ways are linked together (eg. profiled and non-profiled). + + We provide a plain, unadorned, version of the module init code + which just jumps to the version with the label and way attached. The + reason for this is that when using foreign exports, the caller of + startupHaskell() must supply the name of the init function for the "top" + module in the program, and we don't want to require that this name + has the version and way info appended to it. + -------------------------------------------------------------------------- */ + +We initialise the module tree by keeping a work-stack, + * pointed to by Sp + * that grows downward + * Sp points to the last occupied slot + + \begin{code} mkModuleInit :: String -- the "way" @@ -123,61 +155,95 @@ mkModuleInit -> Maybe String -- Just m ==> we have flag: -main-is Foo.baz -> ForeignStubs -> [Module] - -> AbstractC + -> Code mkModuleInit way cost_centre_info this_mod mb_main_mod foreign_stubs imported_mods - = let - (cc_decls, cc_regs) = mkCostCentreStuff cost_centre_info + = do { - register_foreign_exports - = case foreign_stubs of - NoStubs -> [] - ForeignStubs _ _ _ fe_bndrs -> map mk_export_register fe_bndrs + -- Allocate the static boolean that records if this + -- module has been registered already + ; emitData Data [CmmDataLabel moduleRegdLabel, + CmmStaticLit zeroCLit] - mk_export_register bndr - = CMacroStmt REGISTER_FOREIGN_EXPORT [lbl] - where - lbl = CLbl (mkClosureLabel (idName bndr)) PtrRep - -- we don't want/need to init GHC.Prim, so filter it out + ; emitSimpleProc real_init_lbl $ do + { -- The return-code pops the work stack by + -- incrementing Sp, and then jumpd to the popped item + ret_blk <- forkLabelledCode $ stmtsC + [ CmmAssign spReg (cmmRegOffW spReg 1) + , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) wordRep) [] ] - mk_import_register mod - | mod == gHC_PRIM = AbsCNop - | otherwise = CMacroStmt REGISTER_IMPORT [ - CLbl (mkModuleInitLabel mod way) AddrRep - ] + ; init_blk <- forkLabelledCode $ do + { mod_init_code; stmtC (CmmBranch ret_blk) } + + ; stmtC (CmmCondBranch (cmmNeWord (CmmLit zeroCLit) mod_reg_val) + ret_blk) + ; stmtC (CmmBranch init_blk) + } - extra_imported_mods - | Module.moduleName this_mod == main_mod_name = [ pREL_TOP_HANDLER ] - | otherwise = [ ] - register_mod_imports = - map mk_import_register (imported_mods ++ extra_imported_mods) + -- Make the "plain" procedure jump to the "real" init procedure + ; emitSimpleProc plain_init_lbl jump_to_init -- When compiling the module in which the 'main' function lives, + -- (that is, Module.moduleName this_mod == main_mod_name) -- we inject an extra stg_init procedure for stg_init_ZCMain, for the -- RTS to invoke. We must consult the -main-is flag in case the -- user specified a different function to Main.main - main_mod_name = case mb_main_mod of - Just mod_name -> mkModuleName mod_name - Nothing -> mAIN_Name - main_init_block - | Module.moduleName this_mod /= main_mod_name - = AbsCNop -- The normal case - | otherwise -- this_mod contains the main function - = CCodeBlock (mkPlainModuleInitLabel rOOT_MAIN) - (CJump (CLbl (mkPlainModuleInitLabel this_mod) CodePtrRep)) - - in - mkAbstractCs [ - cc_decls, - CModuleInitBlock (mkPlainModuleInitLabel this_mod) - (mkModuleInitLabel this_mod way) - (mkAbstractCs (register_foreign_exports ++ - cc_regs : - register_mod_imports)), - main_init_block - ] + ; whenC (Module.moduleName this_mod == main_mod_name) + (emitSimpleProc plain_main_init_lbl jump_to_init) + } + where + plain_init_lbl = mkPlainModuleInitLabel this_mod + real_init_lbl = mkModuleInitLabel this_mod way + plain_main_init_lbl = mkPlainModuleInitLabel rOOT_MAIN + + jump_to_init = stmtC (CmmJump (mkLblExpr real_init_lbl) []) + + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) wordRep + + main_mod_name = case mb_main_mod of + Just mod_name -> mkModuleName mod_name + Nothing -> mAIN_Name + + -- Main refers to GHC.TopHandler.runIO, so make sure we call the + -- init function for GHC.TopHandler. + extra_imported_mods + | Module.moduleName this_mod == main_mod_name = [pREL_TOP_HANDLER] + | otherwise = [] + + mod_init_code = do + { -- Set mod_reg to 1 to record that we've been here + stmtC (CmmStore (mkLblExpr moduleRegdLabel) (CmmLit (mkIntCLit 1))) + + -- Now do local stuff + ; registerForeignExports foreign_stubs + ; initCostCentres cost_centre_info + ; mapCs (registerModuleImport way) (imported_mods++extra_imported_mods) + } + + +----------------------- +registerModuleImport :: String -> Module -> Code +registerModuleImport way mod + | mod == gHC_PRIM + = nopC + | otherwise -- Push the init procedure onto the work stack + = stmtsC [ CmmAssign spReg (cmmRegOffW spReg (-1)) + , CmmStore (CmmReg spReg) (mkLblExpr (mkModuleInitLabel mod way)) ] + +----------------------- +registerForeignExports :: ForeignStubs -> Code +registerForeignExports NoStubs + = nopC +registerForeignExports (ForeignStubs _ _ _ fe_bndrs) + = mapM_ mk_export_register fe_bndrs + where + mk_export_register bndr + = emitRtsCall SLIT("getStablePtr") + [ (CmmLit (CmmLabel (mkClosureLabel (idName bndr))), PtrHint) ] \end{code} + + Cost-centre profiling: Besides the usual stuff, we must produce declarations for the cost-centres defined in this module; @@ -185,28 +251,16 @@ declarations for the cost-centres defined in this module; code-generator.) \begin{code} -mkCostCentreStuff (local_CCs, extern_CCs, singleton_CCSs) - | not opt_SccProfilingOn = (AbsCNop, AbsCNop) - | otherwise = - ( mkAbstractCs ( - map (CCostCentreDecl True) local_CCs ++ - map (CCostCentreDecl False) extern_CCs ++ - map CCostCentreStackDecl singleton_CCSs), - mkAbstractCs (mkCcRegister local_CCs singleton_CCSs) - ) - where - mkCcRegister ccs cc_stacks - = let - register_ccs = mkAbstractCs (map mk_register ccs) - register_cc_stacks = mkAbstractCs (map mk_register_ccs cc_stacks) - in - [ register_ccs, register_cc_stacks ] - where - mk_register cc - = CCallProfCCMacro FSLIT("REGISTER_CC") [mkCCostCentre cc] - - mk_register_ccs ccs - = CCallProfCCMacro FSLIT("REGISTER_CCS") [mkCCostCentreStack ccs] +initCostCentres :: CollectedCCs -> Code +-- Emit the declarations, and return code to register them +initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) + | not opt_SccProfilingOn = nopC + | otherwise + = do { mapM_ emitCostCentreDecl local_CCs + ; mapM_ emitCostCentreStackDecl singleton_CCSs + ; mapM_ emitRegisterCC local_CCs + ; mapM_ emitRegisterCCS singleton_CCSs + } \end{code} %************************************************************************ @@ -228,44 +282,37 @@ variable. \begin{code} cgTopBinding :: (StgBinding,[(Id,[Id])]) -> Code cgTopBinding (StgNonRec id rhs, srts) - = absC maybeSplitCode `thenC` - maybeExternaliseId id `thenFC` \ id' -> - mapM_ (mkSRT [id']) srts `thenC` - cgTopRhs id' rhs `thenFC` \ (id, info) -> - addBindC id info `thenC` - -- Add the un-externalised Id to the envt, so we - -- find it when we look up occurrences - nopC + = do { id' <- maybeExternaliseId id + ; mapM_ (mkSRT [id']) srts + ; (id,info) <- cgTopRhs id' rhs + ; addBindC id info -- Add the *un-externalised* Id to the envt, + -- so we find it when we look up occurrences + } cgTopBinding (StgRec pairs, srts) - = absC maybeSplitCode `thenC` - let - (bndrs, rhss) = unzip pairs - in - mapFCs maybeExternaliseId bndrs `thenFC` \ bndrs' -> - let - pairs' = zip bndrs' rhss - in - mapM_ (mkSRT bndrs') srts `thenC` - fixC (\ new_binds -> - addBindsC new_binds `thenC` - mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' - ) `thenFC` \ new_binds -> - nopC + = do { let (bndrs, rhss) = unzip pairs + ; bndrs' <- mapFCs maybeExternaliseId bndrs + ; let pairs' = zip bndrs' rhss + ; mapM_ (mkSRT bndrs') srts + ; new_binds <- fixC (\ new_binds -> do + { addBindsC new_binds + ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) + ; nopC } mkSRT :: [Id] -> (Id,[Id]) -> Code mkSRT these (id,[]) = nopC mkSRT these (id,ids) - = mapFCs remap ids `thenFC` \ ids -> - remap id `thenFC` \ id -> - absC (CSRT (mkSRTLabel (idName id)) (map (mkClosureLabel . idName) ids)) + = do { ids <- mapFCs remap ids + ; id <- remap id + ; emitRODataLits (mkSRTLabel (idName id)) + (map (CmmLabel . mkClosureLabel . idName) ids) + } where - -- sigh, better map all the ids against the environment in case they've - -- been externalised (see maybeExternaliseId below). + -- Sigh, better map all the ids against the environment in + -- case they've been externalised (see maybeExternaliseId below). remap id = case filter (==id) these of - [] -> getCAddrModeAndInfo id - `thenFC` \ (id, _, _) -> returnFC id (id':_) -> returnFC id' + [] -> do { info <- getCgIdInfo id; return (cgIdInfoId info) } -- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs -- to enclose the listFCs in cgTopBinding, but that tickled the @@ -280,12 +327,8 @@ cgTopRhs bndr (StgRhsCon cc con args) cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) = ASSERT(null fvs) -- There should be no free variables - let - srt_label = mkSRTLabel (idName bndr) - lf_info = mkClosureLFInfo bndr TopLevel [{-no fvs-}] upd_flag args - in - setSRTLabel srt_label $ - forkStatics (cgTopRhsClosure bndr cc bi srt args body lf_info) + setSRTLabel (mkSRTLabel (idName bndr)) $ + forkStatics (cgTopRhsClosure bndr cc bi srt upd_flag args body) \end{code} @@ -303,21 +346,17 @@ which refers to this name). maybeExternaliseId :: Id -> FCode Id maybeExternaliseId id | opt_EnsureSplittableC, -- Externalise the name for -split-objs - isInternalName name - = moduleName `thenFC` \ mod -> - returnFC (setIdName id (mkExternalName uniq mod new_occ Nothing (nameSrcLoc name))) - | otherwise - = returnFC id + isInternalName name = do { mod <- moduleName + ; returnFC (setIdName id (externalise mod)) } + | otherwise = returnFC id where - name = idName id - uniq = nameUnique name - new_occ = mkLocalOcc uniq (nameOccName name) + externalise mod = mkExternalName uniq mod new_occ Nothing loc + name = idName id + uniq = nameUnique name + new_occ = mkLocalOcc uniq (nameOccName name) + loc = nameSrcLoc name -- We want to conjure up a name that can't clash with any -- existing name. So we generate -- Mod_$L243foo -- where 243 is the unique. - -maybeSplitCode - | opt_EnsureSplittableC = CSplitMarker - | otherwise = AbsCNop \end{code} diff --git a/ghc/compiler/codeGen/SMRep.lhs b/ghc/compiler/codeGen/SMRep.lhs index 4f53f4bfee..92b9513d56 100644 --- a/ghc/compiler/codeGen/SMRep.lhs +++ b/ghc/compiler/codeGen/SMRep.lhs @@ -8,27 +8,236 @@ Other modules should access this info through ClosureInfo. \begin{code} module SMRep ( + -- Words and bytes + StgWord, StgHalfWord, + hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS, + WordOff, ByteOff, + + -- Argument/return representations + CgRep(..), nonVoidArg, + argMachRep, primRepToCgRep, primRepHint, + isFollowableArg, isVoidArg, + isFloatingArg, isNonPtrArg, is64BitArg, + separateByPtrFollowness, + cgRepSizeW, cgRepSizeB, + retAddrSizeW, + + typeCgRep, idCgRep, tyConCgRep, typeHint, + + -- Closure repesentation SMRep(..), ClosureType(..), isStaticRep, fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, - stdItblSize, retItblSize, - getSMRepClosureTypeInt, + profHdrSize, + tablesNextToCode, + smRepClosureType, smRepClosureTypeInt, - rET_SMALL, rET_VEC_SMALL, rET_BIG, rET_VEC_BIG, - - StgWord, StgHalfWord, hALF_WORD, + rET_SMALL, rET_VEC_SMALL, rET_BIG, rET_VEC_BIG ) where #include "HsVersions.h" #include "../includes/MachDeps.h" -import CmdLineOpts +import Id ( Id, idType ) +import Type ( Type, typePrimRep, PrimRep(..) ) +import TyCon ( TyCon, tyConPrimRep ) +import MachOp ( MachRep(..), MachHint(..), wordRep ) +import CmdLineOpts ( opt_SccProfilingOn, opt_GranMacros, opt_Unregisterised ) import Constants import Outputable import DATA_WORD \end{code} + +%************************************************************************ +%* * + Words and bytes +%* * +%************************************************************************ + +\begin{code} +type WordOff = Int -- Word offset, or word count +type ByteOff = Int -- Byte offset, or byte count +\end{code} + +StgWord is a type representing an StgWord on the target platform. + +\begin{code} +#if SIZEOF_HSWORD == 4 +type StgWord = Word32 +type StgHalfWord = Word16 +hALF_WORD_SIZE = 2 :: ByteOff +hALF_WORD_SIZE_IN_BITS = 16 :: Int +#elif SIZEOF_HSWORD == 8 +type StgWord = Word64 +type StgHalfWord = Word32 +hALF_WORD_SIZE = 4 :: ByteOff +hALF_WORD_SIZE_IN_BITS = 32 :: Int +#else +#error unknown SIZEOF_HSWORD +#endif +\end{code} + + +%************************************************************************ +%* * + CgRep +%* * +%************************************************************************ + +An CgRep is an abstraction of a Type which tells the code generator +all it needs to know about the calling convention for arguments (and +results) of that type. In particular, the ArgReps of a function's +arguments are used to decide which of the RTS's generic apply +functions to call when applying an unknown function. + +It contains more information than the back-end data type MachRep, +so one can easily convert from CgRep -> MachRep. (Except that +there's no MachRep for a VoidRep.) + +It distinguishes + pointers from non-pointers (we sort the pointers together + when building closures) + + void from other types: a void argument is different from no argument + +All 64-bit types map to the same CgRep, because they're passed in the +same register, but a PtrArg is still different from an NonPtrArg +because the function's entry convention has to take into account the +pointer-hood of arguments for the purposes of describing the stack on +entry to the garbage collector. + +\begin{code} +data CgRep + = VoidArg -- Void + | PtrArg -- Word-sized Ptr + | NonPtrArg -- Word-sized non-pointer + | LongArg -- 64-bit non-pointer + | FloatArg -- 32-bit float + | DoubleArg -- 64-bit float + deriving Eq + +instance Outputable CgRep where + ppr VoidArg = ptext SLIT("V_") + ppr PtrArg = ptext SLIT("P_") + ppr NonPtrArg = ptext SLIT("I_") + ppr LongArg = ptext SLIT("L_") + ppr FloatArg = ptext SLIT("F_") + ppr DoubleArg = ptext SLIT("D_") + +argMachRep :: CgRep -> MachRep +argMachRep PtrArg = wordRep +argMachRep NonPtrArg = wordRep +argMachRep LongArg = I64 +argMachRep FloatArg = F32 +argMachRep DoubleArg = F64 +argMachRep VoidArg = panic "argMachRep:VoidRep" + +primRepToCgRep :: PrimRep -> CgRep +primRepToCgRep VoidRep = VoidArg +primRepToCgRep PtrRep = PtrArg +primRepToCgRep IntRep = NonPtrArg +primRepToCgRep WordRep = NonPtrArg +primRepToCgRep Int64Rep = LongArg +primRepToCgRep Word64Rep = LongArg +primRepToCgRep AddrRep = NonPtrArg +primRepToCgRep FloatRep = FloatArg +primRepToCgRep DoubleRep = DoubleArg + +primRepHint :: PrimRep -> MachHint +primRepHint VoidRep = panic "primRepHint:VoidRep" +primRepHint PtrRep = PtrHint +primRepHint IntRep = SignedHint +primRepHint WordRep = NoHint +primRepHint Int64Rep = SignedHint +primRepHint Word64Rep = NoHint +primRepHint AddrRep = PtrHint -- NB! PtrHint, but NonPtrArg +primRepHint FloatRep = FloatHint +primRepHint DoubleRep = FloatHint + +idCgRep :: Id -> CgRep +idCgRep = typeCgRep . idType + +tyConCgRep :: TyCon -> CgRep +tyConCgRep = primRepToCgRep . tyConPrimRep + +typeCgRep :: Type -> CgRep +typeCgRep = primRepToCgRep . typePrimRep + +typeHint :: Type -> MachHint +typeHint = primRepHint . typePrimRep +\end{code} + +Whether or not the thing is a pointer that the garbage-collector +should follow. Or, to put it another (less confusing) way, whether +the object in question is a heap object. + +Depending on the outcome, this predicate determines what stack +the pointer/object possibly will have to be saved onto, and the +computation of GC liveness info. + +\begin{code} +isFollowableArg :: CgRep -> Bool -- True <=> points to a heap object +isFollowableArg PtrArg = True +isFollowableArg other = False + +isVoidArg :: CgRep -> Bool +isVoidArg VoidArg = True +isVoidArg other = False + +nonVoidArg :: CgRep -> Bool +nonVoidArg VoidArg = False +nonVoidArg other = True + +-- isFloatingArg is used to distinguish @Double@ and @Float@ which +-- cause inadvertent numeric conversions if you aren't jolly careful. +-- See codeGen/CgCon:cgTopRhsCon. + +isFloatingArg :: CgRep -> Bool +isFloatingArg DoubleArg = True +isFloatingArg FloatArg = True +isFloatingArg _ = False + +isNonPtrArg :: CgRep -> Bool +-- Identify anything which is one word large and not a pointer. +isNonPtrArg NonPtrArg = True +isNonPtrArg other = False + +is64BitArg :: CgRep -> Bool +is64BitArg LongArg = True +is64BitArg _ = False +\end{code} + +\begin{code} +separateByPtrFollowness :: [(CgRep,a)] -> ([(CgRep,a)], [(CgRep,a)]) +-- Returns (ptrs, non-ptrs) +separateByPtrFollowness things + = sep_things things [] [] + -- accumulating params for follow-able and don't-follow things... + where + sep_things [] bs us = (reverse bs, reverse us) + sep_things ((PtrArg,a):ts) bs us = sep_things ts ((PtrArg,a):bs) us + sep_things (t :ts) bs us = sep_things ts bs (t:us) +\end{code} + +\begin{code} +cgRepSizeB :: CgRep -> ByteOff +cgRepSizeB DoubleArg = dOUBLE_SIZE +cgRepSizeB LongArg = wORD64_SIZE +cgRepSizeB VoidArg = 0 +cgRepSizeB _ = wORD_SIZE + +cgRepSizeW :: CgRep -> ByteOff +cgRepSizeW DoubleArg = dOUBLE_SIZE `quot` wORD_SIZE +cgRepSizeW LongArg = wORD64_SIZE `quot` wORD_SIZE +cgRepSizeW VoidArg = 0 +cgRepSizeW _ = 1 + +retAddrSizeW :: WordOff +retAddrSizeW = 1 -- One word +\end{code} + %************************************************************************ %* * \subsubsection[SMRep-datatype]{@SMRep@---storage manager representation} @@ -59,44 +268,32 @@ data ClosureType -- Corresponds 1-1 with the varieties of closures Size of a closure header. \begin{code} -fixedHdrSize :: Int{-words-} +fixedHdrSize :: WordOff fixedHdrSize = sTD_HDR_SIZE + profHdrSize + granHdrSize -profHdrSize :: Int{-words-} +profHdrSize :: WordOff profHdrSize | opt_SccProfilingOn = pROF_HDR_SIZE | otherwise = 0 -granHdrSize :: Int{-words-} +granHdrSize :: WordOff granHdrSize | opt_GranMacros = gRAN_HDR_SIZE | otherwise = 0 -arrWordsHdrSize :: Int{-words-} -arrWordsHdrSize = fixedHdrSize + aRR_WORDS_HDR_SIZE +arrWordsHdrSize :: ByteOff +arrWordsHdrSize = fixedHdrSize*wORD_SIZE + sIZEOF_StgArrWords_NoHdr -arrPtrsHdrSize :: Int{-words-} -arrPtrsHdrSize = fixedHdrSize + aRR_PTRS_HDR_SIZE +arrPtrsHdrSize :: ByteOff +arrPtrsHdrSize = fixedHdrSize*wORD_SIZE + sIZEOF_StgMutArrPtrs_NoHdr \end{code} -Size of an info table. - \begin{code} -stdItblSize :: Int{-words-} -stdItblSize = sTD_ITBL_SIZE + profItblSize + granItblSize + tickyItblSize - -retItblSize :: Int{-words-} -retItblSize = stdItblSize + rET_ITBL_SIZE - -profItblSize :: Int{-words-} -profItblSize | opt_SccProfilingOn = pROF_ITBL_SIZE - | otherwise = 0 - -granItblSize :: Int{-words-} -granItblSize | opt_GranMacros = gRAN_ITBL_SIZE - | otherwise = 0 - -tickyItblSize :: Int{-words-} -tickyItblSize | opt_DoTickyProfiling = tICKY_ITBL_SIZE - | otherwise = 0 +-- IA64 mangler doesn't place tables next to code +tablesNextToCode :: Bool +#ifdef ia64_TARGET_ARCH +tablesNextToCode = False +#else +tablesNextToCode = not opt_Unregisterised +#endif \end{code} \begin{code} @@ -109,38 +306,43 @@ isStaticRep BlackHoleRep = False #include "../includes/ClosureTypes.h" -- Defines CONSTR, CONSTR_1_0 etc -getSMRepClosureTypeInt :: SMRep -> Int -getSMRepClosureTypeInt (GenericRep False 1 0 Constr) = CONSTR_1_0 -getSMRepClosureTypeInt (GenericRep False 0 1 Constr) = CONSTR_0_1 -getSMRepClosureTypeInt (GenericRep False 2 0 Constr) = CONSTR_2_0 -getSMRepClosureTypeInt (GenericRep False 1 1 Constr) = CONSTR_1_1 -getSMRepClosureTypeInt (GenericRep False 0 2 Constr) = CONSTR_0_2 -getSMRepClosureTypeInt (GenericRep False _ _ Constr) = CONSTR -getSMRepClosureTypeInt (GenericRep False 1 0 Fun) = FUN_1_0 -getSMRepClosureTypeInt (GenericRep False 0 1 Fun) = FUN_0_1 -getSMRepClosureTypeInt (GenericRep False 2 0 Fun) = FUN_2_0 -getSMRepClosureTypeInt (GenericRep False 1 1 Fun) = FUN_1_1 -getSMRepClosureTypeInt (GenericRep False 0 2 Fun) = FUN_0_2 -getSMRepClosureTypeInt (GenericRep False _ _ Fun) = FUN +smRepClosureType :: SMRep -> ClosureType +smRepClosureType (GenericRep _ _ _ ty) = ty +smRepClosureType BlackHoleRep = panic "smRepClosureType: black hole" + +smRepClosureTypeInt :: SMRep -> Int +smRepClosureTypeInt (GenericRep False 1 0 Constr) = CONSTR_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Constr) = CONSTR_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Constr) = CONSTR_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Constr) = CONSTR_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Constr) = CONSTR_0_2 +smRepClosureTypeInt (GenericRep False _ _ Constr) = CONSTR + +smRepClosureTypeInt (GenericRep False 1 0 Fun) = FUN_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Fun) = FUN_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Fun) = FUN_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Fun) = FUN_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Fun) = FUN_0_2 +smRepClosureTypeInt (GenericRep False _ _ Fun) = FUN -getSMRepClosureTypeInt (GenericRep False 1 0 Thunk) = THUNK_1_0 -getSMRepClosureTypeInt (GenericRep False 0 1 Thunk) = THUNK_0_1 -getSMRepClosureTypeInt (GenericRep False 2 0 Thunk) = THUNK_2_0 -getSMRepClosureTypeInt (GenericRep False 1 1 Thunk) = THUNK_1_1 -getSMRepClosureTypeInt (GenericRep False 0 2 Thunk) = THUNK_0_2 -getSMRepClosureTypeInt (GenericRep False _ _ Thunk) = THUNK +smRepClosureTypeInt (GenericRep False 1 0 Thunk) = THUNK_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Thunk) = THUNK_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Thunk) = THUNK_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Thunk) = THUNK_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Thunk) = THUNK_0_2 +smRepClosureTypeInt (GenericRep False _ _ Thunk) = THUNK -getSMRepClosureTypeInt (GenericRep False _ _ ThunkSelector) = THUNK_SELECTOR +smRepClosureTypeInt (GenericRep False _ _ ThunkSelector) = THUNK_SELECTOR -getSMRepClosureTypeInt (GenericRep True _ _ Constr) = CONSTR_STATIC -getSMRepClosureTypeInt (GenericRep True _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC -getSMRepClosureTypeInt (GenericRep True _ _ Fun) = FUN_STATIC -getSMRepClosureTypeInt (GenericRep True _ _ Thunk) = THUNK_STATIC +smRepClosureTypeInt (GenericRep True _ _ Constr) = CONSTR_STATIC +smRepClosureTypeInt (GenericRep True _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC +smRepClosureTypeInt (GenericRep True _ _ Fun) = FUN_STATIC +smRepClosureTypeInt (GenericRep True _ _ Thunk) = THUNK_STATIC -getSMRepClosureTypeInt BlackHoleRep = BLACKHOLE +smRepClosureTypeInt BlackHoleRep = BLACKHOLE -getSMRepClosureTypeInt rep = panic "getSMRepClosureTypeInt" +smRepClosureTypeInt rep = panic "smRepClosuretypeint" -- We export these ones @@ -150,18 +352,3 @@ rET_BIG = (RET_BIG :: Int) rET_VEC_BIG = (RET_VEC_BIG :: Int) \end{code} -A type representing an StgWord on the target platform. - -\begin{code} -#if SIZEOF_HSWORD == 4 -type StgWord = Word32 -type StgHalfWord = Word16 -hALF_WORD = 16 :: Int -#elif SIZEOF_HSWORD == 8 -type StgWord = Word64 -type StgHalfWord = Word32 -hALF_WORD = 32 :: Int -#else -#error unknown SIZEOF_HSWORD -#endif -\end{code} |