diff options
Diffstat (limited to 'compiler/codeGen')
30 files changed, 1072 insertions, 985 deletions
diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs index 198e192f5c..12fae4888b 100644 --- a/compiler/codeGen/CgBindery.lhs +++ b/compiler/codeGen/CgBindery.lhs @@ -7,14 +7,16 @@ \begin{code} module CgBindery ( - CgBindings, CgIdInfo, + CgBindings, CgIdInfo, CgIdElemInfo, StableLoc, VolatileLoc, - cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF, + cgIdInfoId, cgIdInfoElems, cgIdElemInfoArgRep, cgIdElemInfoLF, + cgIdInfoSingleElem, stableIdInfo, heapIdInfo, taggedStableIdInfo, taggedHeapIdInfo, - letNoEscapeIdInfo, idInfoToAmode, + letNoEscapeIdInfo, + idInfoToAmodes, idElemInfoToAmode, addBindC, addBindsC, @@ -23,15 +25,17 @@ module CgBindery ( getLiveStackSlots, getLiveStackBindings, - bindArgsToStack, rebindToStack, - bindNewToNode, bindNewToUntagNode, bindNewToReg, bindArgsToRegs, - bindNewToTemp, - getArgAmode, getArgAmodes, + rebindToStack, bindArgsToRegOrStack, + bindNewToNode, bindNewToUntagNode, bindNewToReg, + bindNewToTemp, bindToRegs, + getArgAmodes, getCgIdInfo, - getCAddrModeIfVolatile, getVolatileRegs, + getVolatilesCAddrModes, getVolatileRegs, maybeLetNoEscape, ) where +#include "HsVersions.h" + import CgMonad import CgHeapery import CgStackery @@ -55,6 +59,11 @@ import Unique import UniqSet import Outputable import FastString +import Util +import UniqSupply + +import Control.Monad +import Data.List \end{code} @@ -80,36 +89,32 @@ data CgIdInfo { cg_id :: Id -- Id that this is the info for -- Can differ from the Id at occurrence sites by -- virtue of being externalised, for splittable C - , cg_rep :: CgRep + , cg_elems :: [CgIdElemInfo] + } + +data CgIdElemInfo + = CgIdElemInfo + { cg_rep :: CgRep , cg_vol :: VolatileLoc , cg_stb :: StableLoc , cg_lf :: LambdaFormInfo , cg_tag :: {-# UNPACK #-} !Int -- tag to be added in idInfoToAmode } +-- Used only for Id with a guaranteed-unary CgRep mkCgIdInfo :: Id -> VolatileLoc -> StableLoc -> LambdaFormInfo -> CgIdInfo mkCgIdInfo id vol stb lf - = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, - cg_lf = lf, cg_rep = idCgRep id, cg_tag = tag } + = CgIdInfo { cg_id = id + , cg_elems = [mkCgIdElemInfo rep vol stb lf] + } where - tag - | Just con <- isDataConWorkId_maybe id, - {- Is this an identifier for a static constructor closure? -} - isNullaryRepDataCon con - {- If yes, is this a nullary constructor? - If yes, we assume that the constructor is evaluated and can - be tagged. - -} - = tagForCon con + rep = case idCgRep id of [rep] -> rep; _ -> panic "mkCgIdInfo" - | otherwise - = funTagLFInfo lf - -voidIdInfo :: Id -> CgIdInfo -voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc - , cg_stb = VoidLoc, cg_lf = mkLFArgument id - , cg_rep = VoidArg, cg_tag = 0 } - -- Used just for VoidRep things +mkCgIdElemInfo :: CgRep -> VolatileLoc -> StableLoc -> LambdaFormInfo -> CgIdElemInfo +mkCgIdElemInfo rep vol stb lf + = CgIdElemInfo { cg_vol = vol, cg_stb = stb + , cg_lf = lf, cg_rep = rep, cg_tag = funTagLFInfo lf } + where data VolatileLoc -- These locations die across a call = NoVolatileLoc @@ -120,11 +125,13 @@ data VolatileLoc -- These locations die across a call -- NB. Byte offset, because we subtract R1's -- tag from the offset. -mkTaggedCgIdInfo :: Id -> VolatileLoc -> StableLoc -> LambdaFormInfo -> DataCon - -> CgIdInfo -mkTaggedCgIdInfo id vol stb lf con - = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, - cg_lf = lf, cg_rep = idCgRep id, cg_tag = tagForCon con } +-- Used only for Id with a guaranteed-unary CgRep +mkTaggedCgIdElemInfo :: Id -> VolatileLoc -> StableLoc -> LambdaFormInfo -> DataCon + -> CgIdElemInfo +mkTaggedCgIdElemInfo id vol stb lf con + = CgIdElemInfo { cg_rep = rep, cg_vol = vol, cg_stb = stb + , cg_lf = lf, cg_tag = tagForCon con } + where rep = case idCgRep id of [rep] -> rep; _ -> panic "mkTaggedCgIdElemInfo" \end{code} @StableLoc@ encodes where an Id can be found, used by @@ -142,14 +149,15 @@ data StableLoc -- (as opposed to the contents of the slot) | StableLoc CmmExpr - | VoidLoc -- Used only for VoidRep variables. They never need to - -- be saved, so it makes sense to treat treat them as - -- having a stable location instance PlatformOutputable CgIdInfo where - pprPlatform platform (CgIdInfo id _ vol stb _ _) + pprPlatform platform (CgIdInfo id elems) + = ppr id <+> ptext (sLit "-->") <+> vcat (map (pprPlatform platform) elems) + +instance PlatformOutputable CgIdElemInfo where + pprPlatform platform (CgIdElemInfo _ vol stb _ _) -- TODO, pretty pring the tag info - = ppr id <+> ptext (sLit "-->") <+> vcat [ppr vol, pprPlatform platform stb] + = vcat [ppr vol, pprPlatform platform stb] instance Outputable VolatileLoc where ppr NoVolatileLoc = empty @@ -159,7 +167,6 @@ instance Outputable VolatileLoc where instance PlatformOutputable StableLoc where pprPlatform _ NoStableLoc = empty - pprPlatform _ VoidLoc = ptext (sLit "void") pprPlatform _ (VirStkLoc v) = ptext (sLit "vs") <+> ppr v pprPlatform _ (VirStkLNE v) = ptext (sLit "lne") <+> ppr v pprPlatform platform (StableLoc a) = ptext (sLit "amode") <+> pprPlatform platform a @@ -181,31 +188,33 @@ heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf letNoEscapeIdInfo :: Id -> VirtualSpOffset -> LambdaFormInfo -> CgIdInfo letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info -stackIdInfo :: Id -> VirtualSpOffset -> LambdaFormInfo -> CgIdInfo -stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info +stackIdElemInfo :: CgRep -> VirtualSpOffset -> LambdaFormInfo -> CgIdElemInfo +stackIdElemInfo rep sp lf_info = mkCgIdElemInfo rep NoVolatileLoc (VirStkLoc sp) lf_info + +nodeIdElemInfo :: CgRep -> VirtualHpOffset -> LambdaFormInfo -> CgIdElemInfo +nodeIdElemInfo rep offset lf_info = mkCgIdElemInfo rep (VirNodeLoc (wORD_SIZE*offset)) NoStableLoc lf_info -nodeIdInfo :: Id -> Int -> LambdaFormInfo -> CgIdInfo -nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset)) NoStableLoc lf_info +untagNodeIdElemInfo :: CgRep -> VirtualHpOffset -> LambdaFormInfo -> Int -> CgIdElemInfo +untagNodeIdElemInfo rep offset lf_info tag + = mkCgIdElemInfo rep (VirNodeLoc (wORD_SIZE*offset - tag)) NoStableLoc lf_info -regIdInfo :: Id -> CmmReg -> LambdaFormInfo -> CgIdInfo -regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info +regIdElemInfo :: CgRep -> CmmReg -> LambdaFormInfo -> CgIdElemInfo +regIdElemInfo rep reg lf_info = mkCgIdElemInfo rep (RegLoc reg) NoStableLoc lf_info taggedStableIdInfo :: Id -> CmmExpr -> LambdaFormInfo -> DataCon -> CgIdInfo taggedStableIdInfo id amode lf_info con - = mkTaggedCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info con + = CgIdInfo id [mkTaggedCgIdElemInfo id NoVolatileLoc (StableLoc amode) lf_info con] -taggedHeapIdInfo :: Id -> VirtualHpOffset -> LambdaFormInfo -> DataCon - -> CgIdInfo +taggedHeapIdInfo :: Id -> VirtualHpOffset -> LambdaFormInfo -> DataCon -> CgIdInfo taggedHeapIdInfo id offset lf_info con - = mkTaggedCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info con + = CgIdInfo id [mkTaggedCgIdElemInfo id (VirHpLoc offset) NoStableLoc lf_info con] -untagNodeIdInfo :: Id -> Int -> LambdaFormInfo -> Int -> CgIdInfo -untagNodeIdInfo id offset lf_info tag - = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset - tag)) NoStableLoc lf_info +idInfoToAmodes :: CgIdInfo -> FCode [CmmExpr] +idInfoToAmodes = mapM idElemInfoToAmode . cg_elems -idInfoToAmode :: CgIdInfo -> FCode CmmExpr -idInfoToAmode info +idElemInfoToAmode :: CgIdElemInfo -> FCode CmmExpr +idElemInfoToAmode info = case cg_vol info of { RegLoc reg -> returnFC (CmmReg reg) ; VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetB (CmmReg nodeReg) nd_off) @@ -221,12 +230,7 @@ idInfoToAmode info VirStkLNE sp_off -> getSpRelOffset sp_off - VoidLoc -> return $ pprPanic "idInfoToAmode: void" (ppr (cg_id info)) - -- We return a 'bottom' amode, rather than panicing now - -- In this way getArgAmode returns a pair of (VoidArg, bottom) - -- and that's exactly what we want - - NoStableLoc -> pprPanic "idInfoToAmode: no loc" (ppr (cg_id info)) + NoStableLoc -> panic "idInfoToAmode: no loc" } where mach_rep = argMachRep (cg_rep info) @@ -239,15 +243,22 @@ idInfoToAmode info cgIdInfoId :: CgIdInfo -> Id cgIdInfoId = cg_id -cgIdInfoLF :: CgIdInfo -> LambdaFormInfo -cgIdInfoLF = cg_lf +cgIdInfoElems :: CgIdInfo -> [CgIdElemInfo] +cgIdInfoElems = cg_elems + +cgIdInfoSingleElem :: String -> CgIdInfo -> CgIdElemInfo +cgIdInfoSingleElem _ (CgIdInfo { cg_elems = [elem] }) = elem +cgIdInfoSingleElem msg _ = panic $ "cgIdInfoSingleElem: " ++ msg -cgIdInfoArgRep :: CgIdInfo -> CgRep -cgIdInfoArgRep = cg_rep +cgIdElemInfoLF :: CgIdElemInfo -> LambdaFormInfo +cgIdElemInfoLF = cg_lf -maybeLetNoEscape :: CgIdInfo -> Maybe VirtualSpOffset -maybeLetNoEscape (CgIdInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off -maybeLetNoEscape _ = Nothing +cgIdElemInfoArgRep :: CgIdElemInfo -> CgRep +cgIdElemInfoArgRep = cg_rep + +maybeLetNoEscape :: CgIdElemInfo -> Maybe VirtualSpOffset +maybeLetNoEscape (CgIdElemInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off +maybeLetNoEscape _ = Nothing \end{code} %************************************************************************ @@ -262,6 +273,17 @@ There are three basic routines, for adding (@addBindC@), modifying A @Id@ is bound to a @(VolatileLoc, StableLoc)@ triple. The name should not already be bound. (nice ASSERT, eh?) +Note [CgIdInfo knot] +~~~~~~~~~~~~~~~~~~~~ + +We can't be too strict in the CgIdInfo, because in e.g. letrecs the CgIdInfo +is knot-tied. A loop I build in practice was + cgExpr LetRec -> cgRhs StgRhsCon -> buildDynCon' +from code like (let xs = (:) y xs in xs) because we fixpoint the CgIdInfo for +xs and buildDynCon' is strict in the length of the CgIdElemInfo list. + +To work around this we try to be yield the length of the CgIdInfo element list +lazily by lazily zipping it with the idCgReps. \begin{code} addBindC :: Id -> CgIdInfo -> Code addBindC name stuff_to_bind = do @@ -281,9 +303,17 @@ modifyBindC name mangle_fn = do binds <- getBinds setBinds $ modifyVarEnv mangle_fn binds name +-- See: Note [CgIdInfo knot] +etaCgIdInfo :: Id -> CgIdInfo -> CgIdInfo +etaCgIdInfo id ~(CgIdInfo { cg_id = lazy_id, cg_elems = elems }) + = CgIdInfo { cg_id = lazy_id + , cg_elems = zipLazyWith (showPpr (id, idCgRep id, length elems)) (\_ elem -> elem) (idCgRep id) elems } + +-- Note eta-expansion of CgIdInfo: getCgIdInfo :: Id -> FCode CgIdInfo getCgIdInfo id - = do { -- Try local bindings first + = liftM (etaCgIdInfo id) $ + do { -- Try local bindings first ; local_binds <- getBinds ; case lookupVarEnv local_binds id of { Just info -> return info ; @@ -301,11 +331,9 @@ getCgIdInfo id in if isExternalName name then do let ext_lbl = CmmLit (CmmLabel (mkClosureLabel name $ idCafInfo id)) - return (stableIdInfo id ext_lbl (mkLFImported id)) - else - if isVoidArg (idCgRep id) then - -- Void things are never in the environment - return (voidIdInfo id) + return $ case mkLFImported id of + Nothing -> CgIdInfo id [] + Just lf_info -> stableIdInfo id ext_lbl lf_info else -- Bug cgLookupPanic id @@ -339,11 +367,17 @@ we don't leave any (NoVolatile, NoStable) binds around... \begin{code} nukeVolatileBinds :: CgBindings -> CgBindings nukeVolatileBinds binds - = mkVarEnv (foldr keep_if_stable [] (varEnvElts binds)) + = mkVarEnv (foldr (\info acc -> case keep_if_stable (cg_elems info) of Just infos -> (cg_id info, info { cg_elems = infos }) : acc; Nothing -> acc) [] (varEnvElts binds)) where - keep_if_stable (CgIdInfo { cg_stb = NoStableLoc }) acc = acc - keep_if_stable info acc - = (cg_id info, info { cg_vol = NoVolatileLoc }) : acc + has_no_stable_loc (CgIdElemInfo { cg_stb = NoStableLoc }) = True + has_no_stable_loc _ = False + + keep_if_stable infos + | any has_no_stable_loc infos + = ASSERT(all has_no_stable_loc infos) + Nothing + | otherwise + = Just (map (\info -> info { cg_vol = NoVolatileLoc }) infos) \end{code} @@ -354,14 +388,13 @@ nukeVolatileBinds binds %************************************************************************ \begin{code} -getCAddrModeIfVolatile :: Id -> FCode (Maybe CmmExpr) -getCAddrModeIfVolatile id +getVolatilesCAddrModes :: Id -> FCode [Maybe (CgRep, CmmExpr)] +getVolatilesCAddrModes id = do { info <- getCgIdInfo id - ; case cg_stb info of - NoStableLoc -> do -- Aha! So it is volatile! - amode <- idInfoToAmode info - return $ Just amode - _ -> return Nothing } + ; forM (cg_elems info) $ \elem_info -> case cg_stb elem_info of + NoStableLoc -> liftM (\expr -> Just (cg_rep elem_info, expr)) + (idElemInfoToAmode elem_info) + _ -> return Nothing } \end{code} @getVolatileRegs@ gets a set of live variables, and returns a list of @@ -375,51 +408,39 @@ forget the volatile one. getVolatileRegs :: StgLiveVars -> FCode [GlobalReg] getVolatileRegs vars = do do { stuff <- mapFCs snaffle_it (varSetElems vars) - ; returnFC $ catMaybes stuff } + ; returnFC $ concat stuff } where snaffle_it var = do { info <- getCgIdInfo var - ; let - -- commoned-up code... - consider_reg reg - = -- We assume that all regs can die across C calls - -- We leave it to the save-macros to decide which - -- regs *really* need to be saved. - case cg_stb info of - NoStableLoc -> returnFC (Just reg) -- got one! - _ -> do - { -- has both volatile & stable locations; - -- force it to rely on the stable location - modifyBindC var nuke_vol_bind - ; return Nothing } - - ; case cg_vol info of - RegLoc (CmmGlobal reg) -> consider_reg reg - VirNodeLoc _ -> consider_reg node - _ -> returnFC Nothing -- Local registers + ; let (vol_regs, elems') = unzip $ flip map (cg_elems info) $ \elem_info -> + let -- commoned-up code... + consider_reg reg + = -- We assume that all regs can die across C calls + -- We leave it to the save-macros to decide which + -- regs *really* need to be saved. + case cg_stb elem_info of + NoStableLoc -> (Just reg, elem_info) -- got one! + -- has both volatile & stable locations; + -- force it to rely on the stable location + _ -> (Nothing, elem_info { cg_vol = NoVolatileLoc }) + in case cg_vol elem_info of + RegLoc (CmmGlobal reg) -> consider_reg reg + VirNodeLoc _ -> consider_reg node + _ -> (Nothing, elem_info) -- Local registers + ; modifyBindC var (const info { cg_elems = elems' }) + ; return (catMaybes vol_regs) } - nuke_vol_bind info = info { cg_vol = NoVolatileLoc } - -getArgAmode :: StgArg -> FCode (CgRep, CmmExpr) -getArgAmode (StgVarArg var) +getArgAmodes :: StgArg -> FCode [(CgRep, CmmExpr)] +getArgAmodes (StgVarArg var) = do { info <- getCgIdInfo var - ; amode <- idInfoToAmode info - ; return (cgIdInfoArgRep info, amode ) } - -getArgAmode (StgLitArg lit) + ; forM (cg_elems info) $ \elem_info -> do + amode <- idElemInfoToAmode elem_info + return (cg_rep elem_info, amode) } +getArgAmodes (StgLitArg lit) = do { cmm_lit <- cgLit lit - ; return (typeCgRep (literalType lit), CmmLit cmm_lit) } - -getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg" - -getArgAmodes :: [StgArg] -> FCode [(CgRep, CmmExpr)] -getArgAmodes [] = returnFC [] -getArgAmodes (atom:atoms) - | isStgTypeArg atom = getArgAmodes atoms - | otherwise = do { amode <- getArgAmode atom - ; amodes <- getArgAmodes atoms - ; return ( amode : amodes ) } + ; return $ zipEqual "getArgAmodes" (typeCgRep (literalType lit)) [CmmLit cmm_lit] } +getArgAmodes (StgTypeArg _) = return [] \end{code} %************************************************************************ @@ -429,50 +450,60 @@ getArgAmodes (atom:atoms) %************************************************************************ \begin{code} -bindArgsToStack :: [(Id, VirtualSpOffset)] -> Code -bindArgsToStack args - = mapCs bind args - where - bind(id, offset) = addBindC id (stackIdInfo id offset (mkLFArgument id)) - -bindArgsToRegs :: [(Id, GlobalReg)] -> Code -bindArgsToRegs args - = mapCs bind args +bindArgsToRegOrStack :: [(Id, [Either GlobalReg VirtualSpOffset])] -> Code +bindArgsToRegOrStack = mapCs bind where - bind (arg, reg) = bindNewToReg arg (CmmGlobal reg) (mkLFArgument arg) - -bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code -bindNewToNode id offset lf_info - = addBindC id (nodeIdInfo id offset lf_info) - -bindNewToUntagNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Int -> Code -bindNewToUntagNode id offset lf_info tag - = addBindC id (untagNodeIdInfo id offset lf_info tag) + bind (id, ei_reg_offs) = addBindC id $ CgIdInfo id $ + zipWith3Equal "bindArgsToRegOrStack" + (\rep lf_info ei_reg_off -> case ei_reg_off of + Left reg -> regIdElemInfo rep (CmmGlobal reg) lf_info + Right off -> stackIdElemInfo rep off lf_info) + (idCgRep id) (mkLFArgument (idType id)) ei_reg_offs + +bindNewToNode :: Id -> [(VirtualHpOffset, LambdaFormInfo)] -> Code +bindNewToNode id offset_lf_infos + = addBindC id (CgIdInfo id $ zipWithEqual "bindNewToNode" (\rep (offset, lf_info) -> nodeIdElemInfo rep offset lf_info) (idCgRep id) offset_lf_infos) + +-- NB: the tag is for the *node*, not the thing we load from it, so it is shared amongst elements +bindNewToUntagNode :: Id -> [(VirtualHpOffset, LambdaFormInfo)] -> Int -> Code +bindNewToUntagNode id offset_lf_infos tag + = addBindC id (CgIdInfo id $ zipWithEqual "bindNewToUntagNode" (\rep (offset, lf_info) -> untagNodeIdElemInfo rep offset lf_info tag) (idCgRep id) offset_lf_infos) + +idRegs :: Id -> FCode [LocalReg] +idRegs id = do + us <- newUniqSupply + let cg_reps = idCgRep id + temp_regs = zipWith LocalReg (getUnique id : uniqsFromSupply us) (map argMachRep cg_reps) + return temp_regs -- Create a new temporary whose unique is that in the id, -- bind the id to it, and return the addressing mode for the -- temporary. -bindNewToTemp :: Id -> FCode LocalReg +bindNewToTemp :: Id -> FCode [LocalReg] bindNewToTemp id - = do addBindC id (regIdInfo id (CmmLocal temp_reg) lf_info) - return temp_reg + = do temp_regs <- idRegs id + bindToRegs id temp_regs + return temp_regs + +bindToRegs :: Id -> [LocalReg] -> FCode () +bindToRegs id temp_regs + = addBindC id $ CgIdInfo id $ zipWith3Equal "bindNewToTemp" (\rep temp_reg lf_info -> regIdElemInfo rep (CmmLocal temp_reg) lf_info) (idCgRep id) temp_regs lf_infos where - uniq = getUnique id - temp_reg = LocalReg uniq (argMachRep (idCgRep id)) - lf_info = mkLFArgument id -- Always used of things we - -- know nothing about - -bindNewToReg :: Id -> CmmReg -> LambdaFormInfo -> Code -bindNewToReg name reg lf_info - = addBindC name info + lf_infos = mkLFArgument (idType id) -- Always used of things we + -- know nothing about + +bindNewToReg :: Id -> [(CmmReg, LambdaFormInfo)] -> Code +bindNewToReg name regs_lf_infos + = addBindC name (CgIdInfo name elem_infos) where - info = mkCgIdInfo name (RegLoc reg) NoStableLoc lf_info + elem_infos = zipWithEqual "bindNewToReg" (\rep (reg, lf_info) -> regIdElemInfo rep reg lf_info) + (idCgRep name) regs_lf_infos -rebindToStack :: Id -> VirtualSpOffset -> Code -rebindToStack name offset +rebindToStack :: Id -> [Maybe VirtualSpOffset] -> Code +rebindToStack name offsets = modifyBindC name replace_stable_fn where - replace_stable_fn info = info { cg_stb = VirStkLoc offset } + replace_stable_fn info = info { cg_elems = zipWithEqual "rebindToStack" (\elem_info mb_offset -> case mb_offset of Just offset -> elem_info { cg_stb = VirStkLoc offset }; Nothing -> elem_info) (cg_elems info) offsets } \end{code} %************************************************************************ @@ -503,7 +534,7 @@ nukeDeadBindings live_vars = do binds <- getBinds let (dead_stk_slots, bs') = dead_slots live_vars - [] [] + [] [] [] [ (cg_id b, b) | b <- varEnvElts binds ] setBinds $ mkVarEnv bs' freeStackSlots dead_stk_slots @@ -511,50 +542,56 @@ nukeDeadBindings live_vars = do Several boring auxiliary functions to do the dirty work. +Note that some stack slots can be mentioned in *more than one* CgIdInfo. +This commonly happens where the stack slots for the case binders of an +unboxed tuple case are a subset of the stack slots for the unboxed tuple case binder. + \begin{code} dead_slots :: StgLiveVars -> [(Id,CgIdInfo)] -> [VirtualSpOffset] + -> [VirtualSpOffset] -> [(Id,CgIdInfo)] -> ([VirtualSpOffset], [(Id,CgIdInfo)]) -- dead_slots carries accumulating parameters for --- filtered bindings, dead slots -dead_slots _ fbs ds [] - = (ds, reverse fbs) -- Finished; rm the dups, if any +-- filtered bindings, possibly-dead slots, live slots +dead_slots _ fbs ds ls [] + = (ds \\ ls, reverse fbs) -- Finished; rm the dups, if any -dead_slots live_vars fbs ds ((v,i):bs) +dead_slots live_vars fbs ds ls ((v,i):bs) | v `elementOfUniqSet` live_vars - = dead_slots live_vars ((v,i):fbs) ds bs + = dead_slots live_vars ((v,i):fbs) ds (infoLiveSlots i ++ ls) bs -- Live, so don't record it in dead slots -- Instead keep it in the filtered bindings | otherwise - = case cg_stb i of - VirStkLoc offset - | size > 0 - -> dead_slots live_vars fbs ([offset-size+1 .. offset] ++ ds) bs + = dead_slots live_vars fbs (infoLiveSlots i ++ ds) ls bs - _ -> dead_slots live_vars fbs ds bs - where - size :: WordOff - size = cgRepSizeW (cg_rep i) +infoLiveSlots :: CgIdInfo -> [WordOff] +infoLiveSlots i = [free | elem_i <- cg_elems i + , VirStkLoc offset <- [cg_stb elem_i] + , let size = cgRepSizeW (cg_rep elem_i) :: WordOff + , size > 0 + , free <- [offset-size+1 .. offset]] getLiveStackSlots :: FCode [VirtualSpOffset] -- Return the offsets of slots in stack containig live pointers getLiveStackSlots = do { binds <- getBinds - ; return [off | CgIdInfo { cg_stb = VirStkLoc off, - cg_rep = rep } <- varEnvElts binds, - isFollowableArg rep] } + ; return [off | info <- varEnvElts binds + , CgIdElemInfo { cg_stb = VirStkLoc off + , cg_rep = rep } <- cg_elems info + , isFollowableArg rep] } -getLiveStackBindings :: FCode [(VirtualSpOffset, CgIdInfo)] +getLiveStackBindings :: FCode [(VirtualSpOffset, CgRep)] getLiveStackBindings = do { binds <- getBinds - ; return [(off, bind) | - bind <- varEnvElts binds, - CgIdInfo { cg_stb = VirStkLoc off, - cg_rep = rep} <- [bind], + ; return [(off, rep) | + info <- varEnvElts binds, + elem_info <- cg_elems info, + CgIdElemInfo { cg_stb = VirStkLoc off, + cg_rep = rep} <- [elem_info], isFollowableArg rep] } \end{code} diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index c65194b62f..958f65ea09 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -36,7 +36,7 @@ import CLabel import Constants import CgStackery -import ClosureInfo( CgRep(..), nonVoidArg, idCgRep, cgRepSizeW, isFollowableArg ) +import ClosureInfo( CgRep(..), idCgRep, cgRepSizeW, isFollowableArg ) import OldCmmUtils import Maybes import Id @@ -45,7 +45,6 @@ import Util import StaticFlags import Module import FastString -import Outputable import Data.Bits ------------------------------------------------------------------------- @@ -71,8 +70,7 @@ mkArgDescr _nm args Nothing -> return (ArgGen arg_bits) where arg_bits = argBits arg_reps - arg_reps = filter nonVoidArg (map idCgRep args) - -- Getting rid of voids eases matching of standard patterns + arg_reps = concatMap idCgRep args argBits :: [CgRep] -> [Bool] -- True for non-ptr, False for ptr argBits [] = [] @@ -118,7 +116,7 @@ stdPattern _ = Nothing -- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS(). ------------------------------------------------------------------------- -mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord +mkRegLiveness :: [(CgRep, GlobalReg)] -> Int -> Int -> StgWord mkRegLiveness regs ptrs nptrs = (fromIntegral nptrs `shiftL` 16) .|. (fromIntegral ptrs `shiftL` 24) .|. @@ -127,7 +125,7 @@ mkRegLiveness regs ptrs nptrs all_non_ptrs = 0xff reg_bits [] = 0 - reg_bits ((id, VanillaReg i _) : regs) | isFollowableArg (idCgRep id) + reg_bits ((cgrep, VanillaReg i _) : regs) | isFollowableArg cgrep = (1 `shiftL` (i - 1)) .|. reg_bits regs reg_bits (_ : regs) = reg_bits regs @@ -141,10 +139,10 @@ mkRegLiveness regs ptrs nptrs -- For a slow call, we must take a bunch of arguments and intersperse -- some stg_ap_<pattern>_ret_info return addresses. constructSlowCall - :: [(CgRep,CmmExpr)] + :: [[(CgRep,CmmExpr)]] -> (CLabel, -- RTS entry point for call [(CgRep,CmmExpr)], -- args to pass to the entry point - [(CgRep,CmmExpr)]) -- stuff to save on the stack + [[(CgRep,CmmExpr)]]) -- stuff to save on the stack -- don't forget the zero case constructSlowCall [] @@ -159,7 +157,7 @@ constructSlowCall amodes -- | 'slowArgs' takes a list of function arguments and prepares them for -- pushing on the stack for "extra" arguments to a function which requires -- fewer arguments than we currently have. -slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)] +slowArgs :: [[(CgRep,CmmExpr)]] -> [(CgRep,CmmExpr)] slowArgs [] = [] slowArgs amodes | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest @@ -171,29 +169,30 @@ slowArgs amodes save_cccs = [(NonPtrArg, mkLblExpr save_cccs_lbl), (NonPtrArg, curCCS)] save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") -matchSlowPattern :: [(CgRep,CmmExpr)] - -> (FastString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)]) -matchSlowPattern amodes = (arg_pat, these, rest) - where (arg_pat, n) = slowCallPattern (map fst amodes) +matchSlowPattern :: [[(CgRep,CmmExpr)]] + -> (FastString, [(CgRep,CmmExpr)], [[(CgRep,CmmExpr)]]) +matchSlowPattern amodes = (arg_pat, concat these, rest) + where (arg_pat, n) = slowCallPattern (map (map fst) amodes) (these, rest) = splitAt n amodes -- These cases were found to cover about 99% of all slow calls: -slowCallPattern :: [CgRep] -> (FastString, Int) -slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (fsLit "stg_ap_pppppp", 6) -slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (fsLit "stg_ap_ppppp", 5) -slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: _) = (fsLit "stg_ap_pppp", 4) -slowCallPattern (PtrArg: PtrArg: PtrArg: VoidArg: _) = (fsLit "stg_ap_pppv", 4) -slowCallPattern (PtrArg: PtrArg: PtrArg: _) = (fsLit "stg_ap_ppp", 3) -slowCallPattern (PtrArg: PtrArg: VoidArg: _) = (fsLit "stg_ap_ppv", 3) -slowCallPattern (PtrArg: PtrArg: _) = (fsLit "stg_ap_pp", 2) -slowCallPattern (PtrArg: VoidArg: _) = (fsLit "stg_ap_pv", 2) -slowCallPattern (PtrArg: _) = (fsLit "stg_ap_p", 1) -slowCallPattern (VoidArg: _) = (fsLit "stg_ap_v", 1) -slowCallPattern (NonPtrArg: _) = (fsLit "stg_ap_n", 1) -slowCallPattern (FloatArg: _) = (fsLit "stg_ap_f", 1) -slowCallPattern (DoubleArg: _) = (fsLit "stg_ap_d", 1) -slowCallPattern (LongArg: _) = (fsLit "stg_ap_l", 1) -slowCallPattern _ = panic "CgStackery.slowCallPattern" +slowCallPattern :: [[CgRep]] -> (FastString, Int) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_pppppp", 6) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_ppppp", 5) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_pppp", 4) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: []: _) = (fsLit "stg_ap_pppv", 4) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_ppp", 3) +slowCallPattern ([PtrArg]: [PtrArg]: []: _) = (fsLit "stg_ap_ppv", 3) +slowCallPattern ([PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_pp", 2) +slowCallPattern ([PtrArg]: []: _) = (fsLit "stg_ap_pv", 2) +slowCallPattern ([PtrArg]: _) = (fsLit "stg_ap_p", 1) +slowCallPattern ([NonPtrArg]: _) = (fsLit "stg_ap_n", 1) +slowCallPattern ([FloatArg]: _) = (fsLit "stg_ap_f", 1) +slowCallPattern ([DoubleArg]: _) = (fsLit "stg_ap_d", 1) +slowCallPattern ([LongArg]: _) = (fsLit "stg_ap_l", 1) +slowCallPattern ([]: _) = (fsLit "stg_ap_v", 1) +slowCallPattern (rs: _) = (error "FIXME" rs, 1) +slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- -- @@ -207,7 +206,6 @@ dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1 VNonGcPtr) dataReturnConvPrim LongArg = CmmGlobal (LongReg 1) dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1) dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1) -dataReturnConvPrim VoidArg = panic "dataReturnConvPrim: void" -- getSequelAmode returns an amode which refers to an info table. The info @@ -281,14 +279,12 @@ assignReturnRegs args -- Also, the bytecode compiler assumes this when compiling -- case expressions and ccalls, so it only needs to know one set of -- return conventions. - | [(rep,arg)] <- non_void_args, CmmGlobal r <- dataReturnConvPrim rep + | [(rep,arg)] <- args, CmmGlobal r <- dataReturnConvPrim rep = ([(arg, r)], []) | otherwise = assign_regs args (mkRegTbl []) -- For returning unboxed tuples etc, - -- we use all regs - where - non_void_args = filter ((/= VoidArg).fst) args + -- we use all r assign_regs :: [(CgRep,a)] -- Arg or result values to assign -> AvailRegs -- Regs still avail: Vanilla, Float, Double, Longs @@ -297,8 +293,6 @@ assign_regs args supply = go args [] supply where go [] acc _ = (acc, []) -- Return the results reversed (doesn't matter) - go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and - = go args acc supply -- there's nothing to bind them to go ((rep,arg) : args) acc supply = case assign_reg rep supply of Just (reg, supply') -> go args ((arg,reg):acc) supply' diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs index dd607de1fc..9d81cf900b 100644 --- a/compiler/codeGen/CgCase.lhs +++ b/compiler/codeGen/CgCase.lhs @@ -42,10 +42,12 @@ import PrimOp import Type import TyCon import Util +import UniqSupply +import MonadUtils import Outputable import FastString -import Control.Monad (when) +import Control.Monad \end{code} \begin{code} @@ -110,10 +112,10 @@ Special case #1: case of literal. \begin{code} cgCase (StgLit lit) _live_in_whole_case _live_in_alts bndr alt_type@(PrimAlt _) alts - = do { tmp_reg <- bindNewToTemp bndr + = do { [tmp_reg] <- bindNewToTemp bndr ; cm_lit <- cgLit lit ; stmtC (CmmAssign (CmmLocal tmp_reg) (CmmLit cm_lit)) - ; cgPrimAlts NoGC alt_type (CmmLocal tmp_reg) alts } + ; cgPrimAlts NoGC alt_type [CmmLocal tmp_reg] alts } \end{code} Special case #2: scrutinising a primitive-typed variable. No @@ -124,15 +126,9 @@ allocating more heap than strictly necessary, but it will sometimes eliminate a heap check altogether. \begin{code} -cgCase (StgApp _v []) _live_in_whole_case _live_in_alts bndr - (PrimAlt _) [(DEFAULT,bndrs,_,rhs)] - | isVoidArg (idCgRep bndr) - = ASSERT( null bndrs ) - WARN( True, ptext (sLit "Case of void constant; missing optimisation somewhere") <+> ppr bndr) - cgExpr rhs - cgCase (StgApp v []) _live_in_whole_case _live_in_alts bndr - alt_type@(PrimAlt _) alts + alt_type alts + | case alt_type of PrimAlt _ -> True; UbxTupAlt _ -> True; _ -> False -- Note [ticket #3132]: we might be looking at a case of a lifted Id -- that was cast to an unlifted type. The Id will always be bottom, -- but we don't want the code generator to fall over here. If we @@ -140,7 +136,7 @@ cgCase (StgApp v []) _live_in_whole_case _live_in_alts bndr -- type-incorrect Cmm. Hence we check that the types match, and if -- they don't we'll fall through and emit the usual enter/return -- code. Test case: codeGen/should_compile/3132.hs - | isUnLiftedType (idType v) + , isUnLiftedType (idType v) -- However, we also want to allow an assignment to be generated -- in the case when the types are compatible, because this allows @@ -151,40 +147,45 @@ cgCase (StgApp v []) _live_in_whole_case _live_in_alts bndr -- the HValue really is a MutVar#. The types are compatible though, -- so we can just generate an assignment. || reps_compatible - = do { when (not reps_compatible) $ + = WARN( null (idCgRep v), ptext (sLit "Case of void constant; missing optimisation somewhere") <+> ppr bndr) + do { when (not reps_compatible) $ panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" - -- Careful! we can't just bind the default binder to the same thing - -- as the scrutinee, since it might be a stack location, and having - -- two bindings pointing at the same stack locn doesn't work (it - -- confuses nukeDeadBindings). Hence, use a new temp. - ; v_info <- getCgIdInfo v - ; amode <- idInfoToAmode v_info - ; tmp_reg <- bindNewToTemp bndr - ; stmtC (CmmAssign (CmmLocal tmp_reg) amode) + -- TODO: could just bind the default binder to the same thing as the scrutinee, + -- rather than allocating these temporaries. + -- Having two Ids share locations doesn't confuse nukeDeadBindings any longer. + ; (tmp_regs, do_rhs) <- case alt_type of + PrimAlt _ -> do + tmp_regs <- bindNewToTemp bndr + return (tmp_regs, cgPrimAlts NoGC alt_type (map CmmLocal tmp_regs) alts) + UbxTupAlt _ + | [(DEFAULT, [], _, rhs)] <- alts -> do + tmp_regs <- bindNewToTemp bndr + return (tmp_regs, cgExpr rhs) + | [(DataAlt _, args, _, rhs)] <- alts -> do + tmp_regss <- mapM bindNewToTemp args + bindToRegs bndr (concat tmp_regss) + return (concat tmp_regss, cgExpr rhs) + _ -> panic "cgCase: weird UbxTupAlt?" - ; cgPrimAlts NoGC alt_type (CmmLocal tmp_reg) alts } + ; v_info <- getCgIdInfo v + ; amodes <- idInfoToAmodes v_info + ; forM_ (zipEqual "cgCase" tmp_regs amodes) $ \(tmp_reg, amode) -> stmtC (CmmAssign (CmmLocal tmp_reg) amode) + ; do_rhs } where reps_compatible = idCgRep v == idCgRep bndr \end{code} Special case #2.5; seq# - case seq# a s of v - (# s', a' #) -> e - - ==> - - case a of v - (# s', a' #) -> e - (taking advantage of the fact that the return convention for (# State#, a #) is the same as the return convention for just 'a') \begin{code} cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) - live_in_whole_case live_in_alts bndr alt_type alts - = cgCase (StgApp a []) live_in_whole_case live_in_alts bndr alt_type alts + _live_in_whole_case live_in_alts bndr alt_type alts + = do { fun_info <- getCgIdInfo a + ; cgCaseOfApp fun_info [] live_in_alts bndr alt_type alts } \end{code} Special case #3: inline PrimOps and foreign calls. @@ -211,13 +212,12 @@ cgCase (StgOpApp (StgFCallOp fcall _) args _) = ASSERT( isSingleton alts ) do -- *must* be an unboxed tuple alt. -- exactly like the cgInlinePrimOp case for unboxed tuple alts.. - { res_tmps <- mapFCs bindNewToTemp non_void_res_ids - ; let res_hints = map (typeForeignHint.idType) non_void_res_ids - ; cgForeignCall (zipWith CmmHinted res_tmps res_hints) fcall args live_in_alts + { res_tmps <- concatMapM bindNewToTemp res_ids + ; let res_hints = concatMap (typeForeignHint.idType) res_ids + ; cgForeignCall (zipWithEqual "cgCase" CmmHinted res_tmps res_hints) fcall args live_in_alts ; cgExpr rhs } where (_, res_ids, _, rhs) = head alts - non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids unsafe_foreign_call = case fcall of @@ -232,26 +232,8 @@ we can reuse/trim the stack slot holding the variable (if it is in one). cgCase (StgApp fun args) _live_in_whole_case live_in_alts bndr alt_type alts = do { fun_info <- getCgIdInfo fun - ; arg_amodes <- getArgAmodes args - - -- Nuking dead bindings *before* calculating the saves is the - -- value-add here. We might end up freeing up some slots currently - -- occupied by variables only required for the call. - -- NOTE: we need to look up the variables used in the call before - -- doing this, because some of them may not be in the environment - -- afterward. - ; nukeDeadBindings live_in_alts - ; (save_assts, alts_eob_info, maybe_cc_slot) - <- saveVolatileVarsAndRegs live_in_alts - - ; scrut_eob_info - <- forkEval alts_eob_info - (allocStackTop retAddrSizeW >> nopC) - (do { deAllocStackTop retAddrSizeW - ; cgEvalAlts maybe_cc_slot bndr alt_type alts }) - - ; setEndOfBlockInfo scrut_eob_info - (performTailCall fun_info arg_amodes save_assts) } + ; arg_amodes <- mapM getArgAmodes args + ; cgCaseOfApp fun_info arg_amodes live_in_alts bndr alt_type alts } \end{code} Note about return addresses: we *always* push a return address, even @@ -291,6 +273,35 @@ cgCase expr live_in_whole_case live_in_alts bndr alt_type alts } \end{code} +\begin{code} +cgCaseOfApp :: CgIdInfo + -> [[(CgRep, CmmExpr)]] + -> StgLiveVars + -> Id + -> AltType + -> [StgAlt] + -> Code +cgCaseOfApp fun_info arg_amodes live_in_alts bndr alt_type alts + = do { -- Nuking dead bindings *before* calculating the saves is the + -- value-add here. We might end up freeing up some slots currently + -- occupied by variables only required for the call. + -- NOTE: we need to look up the variables used in the call before + -- doing this, because some of them may not be in the environment + -- afterward. + ; nukeDeadBindings live_in_alts + ; (save_assts, alts_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_alts + + ; scrut_eob_info + <- forkEval alts_eob_info + (allocStackTop retAddrSizeW >> nopC) + (do { deAllocStackTop retAddrSizeW + ; cgEvalAlts maybe_cc_slot bndr alt_type alts }) + + ; setEndOfBlockInfo scrut_eob_info + (performTailCall fun_info arg_amodes save_assts) } +\end{code} + There's a lot of machinery going on behind the scenes to manage the stack pointer here. forkEval takes the virtual Sp and free list from the first argument, and turns that into the *real* Sp for the second @@ -327,36 +338,28 @@ anywhere within the record). cgInlinePrimOp :: PrimOp -> [StgArg] -> Id -> AltType -> StgLiveVars -> [(AltCon, [Id], [Bool], StgExpr)] -> Code -cgInlinePrimOp primop args bndr (PrimAlt _) live_in_alts alts - | isVoidArg (idCgRep bndr) - = ASSERT( con == DEFAULT && isSingleton alts && null bs ) - do { -- VOID RESULT; just sequencing, - -- so get in there and do it - -- The bndr should not occur, so no need to bind it - cgPrimOp [] primop args live_in_alts - ; cgExpr rhs } - where - (con,bs,_,rhs) = head alts - cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts - = do { -- PRIMITIVE ALTS, with non-void result - tmp_reg <- bindNewToTemp bndr - ; cgPrimOp [tmp_reg] primop args live_in_alts - ; cgPrimAlts NoGC (PrimAlt tycon) (CmmLocal tmp_reg) alts } + = do { -- PRIMITIVE ALTS, with void OR non-void result + tmp_regs <- bindNewToTemp bndr + ; cgPrimOp tmp_regs primop args live_in_alts + ; cgPrimAlts NoGC (PrimAlt tycon) (map CmmLocal tmp_regs) alts } -cgInlinePrimOp primop args _ (UbxTupAlt _) live_in_alts alts - = ASSERT( isSingleton alts ) - do { -- UNBOXED TUPLE ALTS +cgInlinePrimOp primop args bndr (UbxTupAlt _) live_in_alts alts + = do { -- UNBOXED TUPLE ALTS -- No heap check, no yield, just get in there and do it. - -- NB: the case binder isn't bound to anything; - -- it has a unboxed tuple type - res_tmps <- mapFCs bindNewToTemp non_void_res_ids + ; (res_tmps, rhs) <- case alts of + [(DEFAULT, [], _, rhs)] | Just (_, tys) <- splitTyConApp_maybe (idType bndr) -> do + us <- newUniqSupply + let res_tmps = zipWith LocalReg (uniqsFromSupply us) (concatMap (map (argMachRep . primRepToCgRep) . typePrimRep) tys) + return (res_tmps, rhs) + [(DataAlt _, res_ids, _, rhs)] -> do + res_tmps <- concatMapM bindNewToTemp res_ids + return (res_tmps, rhs) + _ -> panic "cgInlinePrimOp" + ; bindToRegs bndr res_tmps ; cgPrimOp res_tmps primop args live_in_alts ; cgExpr rhs } - where - (_, res_ids, _, rhs) = head alts - non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts = do { -- ENUMERATION TYPE RETURN @@ -370,7 +373,7 @@ cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts -- (avoiding it avoids the assignment) -- The deadness info is set by StgVarInfo ; whenC (not (isDeadBinder bndr)) - (do { tmp_reg <- bindNewToTemp bndr + (do { [tmp_reg] <- bindNewToTemp bndr ; stmtC (CmmAssign (CmmLocal tmp_reg) (tagToClosure tycon tag_amode)) }) @@ -387,7 +390,7 @@ cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result do_enum_primop TagToEnumOp -- No code! | [arg] <- args = do - (_,e) <- getArgAmode arg + [(_,e)] <- getArgAmodes arg return e do_enum_primop primop = do tmp <- newTemp bWord @@ -418,32 +421,36 @@ cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, -- without risk of duplicating code cgEvalAlts cc_slot bndr alt_type@(PrimAlt tycon) alts - = do { let rep = tyConCgRep tycon - reg = dataReturnConvPrim rep -- Bottom for voidRep + = do { let reps = tyConCgRep tycon + regs = case reps of [] -> [] + [rep] -> [dataReturnConvPrim rep] + _ -> panic "cgEvalAlts" ; abs_c <- forkProc $ do - { -- Bind the case binder, except if it's void - -- (reg is bottom in that case) - whenC (nonVoidArg rep) $ - bindNewToReg bndr reg (mkLFArgument bndr) + { -- Bind the case binder + bindNewToReg bndr (zipEqual "cgEvalAlts" regs (mkLFArgument (idType bndr))) ; restoreCurrentCostCentre cc_slot True - ; cgPrimAlts GCMayHappen alt_type reg alts } + ; cgPrimAlts GCMayHappen alt_type regs alts } ; lbl <- emitReturnTarget (idName bndr) abs_c ; returnFC (CaseAlts lbl Nothing bndr) } cgEvalAlts cc_slot bndr (UbxTupAlt _) [(con,args,_,rhs)] - = -- Unboxed tuple case - -- By now, the simplifier should have have turned it - -- into case e of (# a,b #) -> e - -- There shouldn't be a - -- case e of DEFAULT -> e - ASSERT2( case con of { DataAlt _ -> True; _ -> False }, - text "cgEvalAlts: dodgy case of unboxed tuple type" ) - do { -- forkAbsC for the RHS, so that the envt is + = do { -- forkAbsC for the RHS, so that the envt is -- not changed for the emitReturn call abs_c <- forkProc $ do - { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args + { (flat_arg_locs, live_regs, ptrs, nptrs) <- case con of + DEFAULT + | Just (_, tys) <- splitTyConApp_maybe (idType bndr) + , [] <- args -> do + (arg_locs, live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents [((), typeCgRep ty) | ty <- tys] + return (concatMap snd arg_locs, live_regs, ptrs, nptrs) + DataAlt _ -> do + (arg_locs, live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents [(arg, idCgRep arg) | arg <- args] + bindArgsToRegOrStack arg_locs + return (concatMap snd arg_locs, live_regs, ptrs, nptrs) + _ -> panic "cgEvalAlts" + ; bindArgsToRegOrStack [(bndr, flat_arg_locs)] -- Restore the CC *after* binding the tuple components, -- so that we get the stack offset of the saved CC right. ; restoreCurrentCostCentre cc_slot True @@ -457,7 +464,7 @@ cgEvalAlts cc_slot bndr (UbxTupAlt _) [(con,args,_,rhs)] cgEvalAlts cc_slot bndr alt_type alts = -- Algebraic and polymorphic case do { -- Bind the default binder - bindNewToReg bndr nodeReg (mkLFArgument bndr) + bindNewToReg bndr [(nodeReg, only (mkLFArgument (idType bndr)))] -- Generate sequel info for use downstream -- At the moment, we only do it if the type is vector-returnable. @@ -559,7 +566,7 @@ As usual, no binders in the alternatives are yet bound. \begin{code} cgPrimAlts :: GCFlag -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck - -> CmmReg -- Scrutinee + -> [CmmReg] -- Scrutinee registers: either unary or nullary (if void) -> [StgAlt] -- Alternatives -> Code -- NB: cgPrimAlts emits code that does the case analysis. @@ -568,11 +575,14 @@ cgPrimAlts :: GCFlag -- different to cgAlgAlts -- -- INVARIANT: the default binder is already bound -cgPrimAlts gc_flag alt_type scrutinee alts +cgPrimAlts gc_flag alt_type scrutinees alts = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts) ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others] - ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC } + ; case scrutinees of + [] -> emitCgStmts deflt_absC + [scrut] -> emitLitSwitch (CmmReg scrut) alt_absCs deflt_absC + _ -> panic "cgPrimAlts: unboxed tuple scrutinee" } cgPrimAlt :: GCFlag -> AltType @@ -621,21 +631,19 @@ saveVolatileVars :: StgLiveVars -- Vars which should be made safe -> FCode CmmStmts -- Assignments to to the saves saveVolatileVars vars - = do { stmts_s <- mapFCs save_it (varSetElems vars) + = do { stmts_s <- concatMapM save_it (varSetElems vars) ; return (foldr plusStmts noStmts stmts_s) } where save_it var - = do { v <- getCAddrModeIfVolatile var - ; case v of - Nothing -> return noStmts -- Non-volatile - Just vol_amode -> save_var var vol_amode -- Aha! It's volatile - } - - save_var var vol_amode - = do { slot <- allocPrimStack (idCgRep var) - ; rebindToStack var slot - ; sp_rel <- getSpRelOffset slot - ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) } + = do { vol_amodes <- getVolatilesCAddrModes var -- If non-volatile, empty list + ; (stmts, slots) <- liftM unzip $ forM vol_amodes $ \mb_vol_amode -> case mb_vol_amode of + Nothing -> return (noStmts, Nothing) + Just (rep, vol_amode) -> do + slot <- allocPrimStack rep + sp_rel <- getSpRelOffset slot + returnFC (oneStmt (CmmStore sp_rel vol_amode), Just slot) + ; rebindToStack var slots + ; return stmts } \end{code} --------------------------------------------------------------------------- diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index 4d1ce50099..78308854b0 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -53,7 +53,10 @@ import StaticFlags import DynFlags import Outputable import FastString +import MonadUtils +import Control.Arrow (second) +import Control.Monad import Data.List \end{code} @@ -118,7 +121,7 @@ cgStdRhsClosure cgStdRhsClosure bndr _cc _bndr_info _fvs _args _body lf_info payload = do -- AHA! A STANDARD-FORM THUNK { -- LAY OUT THE OBJECT - amodes <- getArgAmodes payload + amodes <- concatMapM getArgAmodes payload ; mod_name <- getModuleName ; let (tot_wds, ptr_wds, amodes_w_offsets) = mkVirtHeapOffsets (isLFThunk lf_info) amodes @@ -169,11 +172,20 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do ; fv_infos <- mapFCs getCgIdInfo reduced_fvs ; srt_info <- getSRTInfo ; mod_name <- getModuleName - ; let bind_details :: [(CgIdInfo, VirtualHpOffset)] - (tot_wds, ptr_wds, bind_details) - = mkVirtHeapOffsets (isLFThunk lf_info) (map add_rep fv_infos) - - add_rep info = (cgIdInfoArgRep info, info) + ; let flat_bind_details :: [((Id, CgIdElemInfo), VirtualHpOffset)] + (tot_wds, ptr_wds, flat_bind_details) + = mkVirtHeapOffsets (isLFThunk lf_info) + [(cgIdElemInfoArgRep elem_info, + (cgIdInfoId info, elem_info)) + | info <- fv_infos + , elem_info <- cgIdInfoElems info] + + bind_details :: [(Id, [(VirtualHpOffset, CgIdElemInfo)])] + bind_details = [(info_id, [ (offset, elem_info) + | ((id, elem_info), offset) <- flat_bind_details + , id == info_id ]) + | info <- fv_infos + , let info_id = cgIdInfoId info] descr = closureDescription mod_name name closure_info = mkClosureInfo False -- Not static @@ -187,25 +199,26 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do -- A function closure pointer may be tagged, so we -- must take it into account when accessing the free variables. mbtag = tagForArity (length args) - bind_fv (info, offset) + bind_fv (id, offset_infos) | Just tag <- mbtag - = bindNewToUntagNode (cgIdInfoId info) offset (cgIdInfoLF info) tag + = bindNewToUntagNode id (map (second cgIdElemInfoLF) offset_infos) tag | otherwise - = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info) + = bindNewToNode id (map (second cgIdElemInfoLF) offset_infos) ; mapCs bind_fv bind_details -- Bind the binder itself, if it is a free var - ; whenC bndr_is_a_fv (bindNewToReg bndr nodeReg lf_info) + ; whenC bndr_is_a_fv (bindNewToReg bndr [(nodeReg, lf_info)]) -- Compile the body ; closureCodeBody bndr_info closure_info cc args body }) -- BUILD THE OBJECT - ; let - to_amode (info, offset) = do { amode <- idInfoToAmode info - ; return (amode, offset) } + ; let -- info_offsets :: [(CgIdElemInfo, LambdaFormInfo)] + to_amode (_id, offset_infos) = forM offset_infos $ \(offset, info) -> do + amode <- idElemInfoToAmode info + return (amode, offset) -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body - ; amodes_w_offsets <- mapFCs to_amode bind_details + ; amodes_w_offsets <- concatMapM to_amode bind_details ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets -- RETURN @@ -274,7 +287,8 @@ closureCodeBody _binder_info cl_info cc args body do { -- Get the current virtual Sp (it might not be zero, -- eg. if we're compiling a let-no-escape). vSp <- getVirtSp - ; let (reg_args, other_args) = assignCallRegs (addIdReps args) + ; let args_with_reps = addIdReps args + (reg_args, other_args) = assignCallRegs args_with_reps (sp_top, stk_args) = mkVirtStkOffsets vSp other_args -- Allocate the global ticky counter @@ -286,34 +300,35 @@ closureCodeBody _binder_info cl_info cc args body ; setTickyCtrLabel ticky_ctr_lbl $ do -- Emit the slow-entry code - { reg_save_code <- mkSlowEntryCode cl_info reg_args + { reg_save_code <- mkSlowEntryCode cl_info [(idCgRep arg !! i , reg) | ((arg, i), reg) <- reg_args] -- Emit the main entry code ; blks <- forkProc $ - mkFunEntryCode cl_info cc reg_args stk_args + mkFunEntryCode cl_info cc (lookupArgLocs reg_args stk_args args) sp_top reg_save_code body ; emitClosureCodeAndInfoTable cl_info [] blks }} - mkFunEntryCode :: ClosureInfo -> CostCentreStack - -> [(Id,GlobalReg)] -- Args in regs - -> [(Id,VirtualSpOffset)] -- Args on stack + -> [(Id,[Either GlobalReg VirtualSpOffset])] -- Args in regs/stack -> VirtualSpOffset -- Last allocated word on stack -> CmmStmts -- Register-save code in case of GC -> StgExpr -> Code -- The main entry code for the closure -mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do +mkFunEntryCode cl_info cc args sp_top reg_save_code body = do { -- Bind args to regs/stack as appropriate, -- and record expected position of sps - ; bindArgsToRegs reg_args - ; bindArgsToStack stk_args + ; bindArgsToRegOrStack args ; setRealAndVirtualSp sp_top -- Do the business + ; let reg_args :: [(CgRep, GlobalReg)] + reg_args = [ (rep, reg) + | (id, ei_reg_offs) <- args + , (rep, Left reg) <- zipEqual "mkFunEntryCode" (idCgRep id) ei_reg_offs ] ; funWrapper cl_info reg_args reg_save_code $ do { tickyEnterFun cl_info ; enterCostCentreFun cc @@ -337,7 +352,7 @@ The slow entry point is used in two places: (b) returning from a heap-check failure \begin{code} -mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts +mkSlowEntryCode :: ClosureInfo -> [(CgRep,GlobalReg)] -> FCode CmmStmts -- If this function doesn't have a specialised ArgDescr, we need -- to generate the function's arg bitmap, slow-entry code, and -- register-save code for the heap-check failure @@ -357,7 +372,7 @@ mkSlowEntryCode cl_info reg_args save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts reps_w_regs :: [(CgRep,GlobalReg)] - reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args] + reps_w_regs = reverse $ reg_args (final_stk_offset, stk_offsets) = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off)) 0 reps_w_regs @@ -407,10 +422,10 @@ thunkWrapper closure_info thunk_code = do -- setupUpdate *encloses* the thunk_code } -funWrapper :: ClosureInfo -- Closure whose code body this is - -> [(Id,GlobalReg)] -- List of argument registers (if any) - -> CmmStmts -- reg saves for the heap check failure - -> Code -- Body of function being compiled +funWrapper :: ClosureInfo -- Closure whose code body this is + -> [(CgRep,GlobalReg)] -- List of argument registers (if any) + -> CmmStmts -- reg saves for the heap check failure + -> Code -- Body of function being compiled -> Code funWrapper closure_info arg_regs reg_save_code fun_body = do { let node_points = nodeMustPointToIt (closureLFInfo closure_info) diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index 9049504dca..22a7c792c7 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -41,7 +41,6 @@ import TyCon import DataCon import Id import IdInfo -import Type import PrelInfo import Outputable import ListSetOps @@ -51,6 +50,7 @@ import DynFlags import FastString import Platform import StaticFlags +import MonadUtils import Control.Monad \end{code} @@ -75,7 +75,7 @@ cgTopRhsCon id con args ; ASSERT( args `lengthIs` dataConRepArity con ) return () -- LAY IT OUT - ; amodes <- getArgAmodes args + ; amodes <- concatMapM getArgAmodes args ; let platform = targetPlatform dflags @@ -250,11 +250,13 @@ bindConArgs con args let -- The binding below forces the masking out of the tag bits -- when accessing the constructor field. - bind_arg (arg, offset) = bindNewToUntagNode arg offset (mkLFArgument arg) (tagForCon con) (_, args_w_offsets) = layOutDynConstr con (addIdReps args) -- ASSERT(not (isUnboxedTupleCon con)) return () - mapCs bind_arg args_w_offsets + forM_ args $ \arg -> do + let offset_lf_infos = zipWith (\i lf_info -> (assoc "bindConArgs" args_w_offsets (arg, i), lf_info)) + [0..] (mkLFArgument (idType arg)) + bindNewToUntagNode arg offset_lf_infos (tagForCon con) \end{code} Unboxed tuples are handled slightly differently - the object is @@ -262,20 +264,21 @@ returned in registers and on the stack instead of the heap. \begin{code} bindUnboxedTupleComponents - :: [Id] -- Args - -> FCode ([(Id,GlobalReg)], -- Regs assigned + :: [(a, [CgRep])] -- Arg reps + -> FCode ([(a, [Either GlobalReg VirtualSpOffset])], -- Argument locations + [(CgRep,GlobalReg)], -- Regs assigned WordOff, -- Number of pointer stack slots WordOff, -- Number of non-pointer stack slots VirtualSpOffset) -- Offset of return address slot -- (= realSP on entry) -bindUnboxedTupleComponents args +bindUnboxedTupleComponents repss = do { vsp <- getVirtSp ; rsp <- getRealSp -- Assign as many components as possible to registers - ; let (reg_args, stk_args) = assignReturnRegs (addIdReps args) + ; let (reg_args, stk_args) = assignReturnRegs $ addIdReps' (map snd repss) -- Separate the rest of the args into pointers and non-pointers (ptr_args, nptr_args) = separateByPtrFollowness stk_args @@ -299,11 +302,9 @@ bindUnboxedTupleComponents args -- (trimming back the virtual SP), but the real SP still points to that slot ; freeStackSlots [vsp+1,vsp+2 .. rsp] - ; bindArgsToRegs reg_args - ; bindArgsToStack ptr_offsets - ; bindArgsToStack nptr_offsets + ; let arg_locs = lookupArgLocs' reg_args (ptr_offsets ++ nptr_offsets) repss - ; returnFC (reg_args, ptrs, nptrs, rsp) } + ; returnFC (arg_locs, [((snd (repss !! n)) !! i, reg) | ((n, i), reg) <- reg_args], ptrs, nptrs, rsp) } \end{code} %************************************************************************ @@ -324,7 +325,8 @@ cgReturnDataCon con amodes -- for it to be marked as "used" for LDV profiling. | opt_SccProfilingOn = build_it_then enter_it | otherwise - = ASSERT( amodes `lengthIs` dataConRepArity con ) + = -- NB: this assert is not true because some elements may be void/unboxed tuples + -- ASSERT( length amodes == dataConArity con ) do { EndOfBlockInfo _ sequel <- getEndOfBlockInfo ; case sequel of CaseAlts _ (Just (alts, deflt_lbl)) bndr @@ -369,7 +371,7 @@ cgReturnDataCon con amodes -- out as '54' :-) tickyReturnNewCon (length amodes) ; idinfo <- buildDynCon (dataConWorkId con) currentCCS con amodes - ; amode <- idInfoToAmode idinfo + ; amode <- idElemInfoToAmode (cgIdInfoSingleElem "cgReturnDataCon" idinfo) ; checkedAbsC (CmmAssign nodeReg amode) ; performReturn return_code } \end{code} @@ -466,8 +468,8 @@ cgDataCon data_con ; ldvEnter (CmmReg nodeReg) ; body_code } - arg_reps :: [(CgRep, Type)] - arg_reps = [(typeCgRep ty, ty) | ty <- dataConRepArgTys data_con] + arg_reps :: [(CgRep, ())] + arg_reps = [(rep, ()) | ty <- dataConRepArgTys data_con, rep <- typeCgRep ty] body_code = do { -- NB: We don't set CC when entering data (WDP 94/06) diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs index cb3a86ef7f..41d713bde4 100644 --- a/compiler/codeGen/CgExpr.lhs +++ b/compiler/codeGen/CgExpr.lhs @@ -48,6 +48,7 @@ import Maybes import ListSetOps import BasicTypes import Util +import MonadUtils import Outputable import StaticFlags \end{code} @@ -83,7 +84,7 @@ cgExpr (StgApp fun args) = cgTailCall fun args \begin{code} cgExpr (StgConApp con args) - = do { amodes <- getArgAmodes args + = do { amodes <- concatMapM getArgAmodes args ; cgReturnDataCon con amodes } \end{code} @@ -94,9 +95,9 @@ top of the stack. \begin{code} cgExpr (StgLit lit) = do { cmm_lit <- cgLit lit - ; performPrimReturn rep (CmmLit cmm_lit) } + ; performPrimReturn [(rep, CmmLit cmm_lit)] } where - rep = (typeCgRep) (literalType lit) + [rep] = typeCgRep (literalType lit) \end{code} @@ -122,16 +123,15 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do a return address right before doing the call, so the args must be out of the way. -} - reps_n_amodes <- getArgAmodes stg_args + reps_n_amodes <- mapM getArgAmodes stg_args let -- Get the *non-void* args, and jiggle them with shimForeignCall - arg_exprs = [ (shimForeignCallArg stg_arg expr, stg_arg) - | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, - nonVoidArg rep] + arg_exprs = [ expr + | (stg_arg, rep_exprs) <- stg_args `zip` reps_n_amodes + , expr <- shimForeignCallArg stg_arg (map snd rep_exprs) ] - arg_tmps <- sequence [ assignTemp arg - | (arg, _) <- arg_exprs] - let arg_hints = zipWith CmmHinted arg_tmps (map (typeForeignHint.stgArgType) stg_args) + arg_tmps <- mapM assignTemp arg_exprs + let arg_hints = zipWith CmmHinted arg_tmps (concatMap (typeForeignHint.stgArgType) stg_args) {- Now, allocate some result regs. -} @@ -145,7 +145,7 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) = ASSERT(isEnumerationTyCon tycon) - do { (_rep,amode) <- getArgAmode arg + do { [(_rep,amode)] <- getArgAmodes arg ; amode' <- assignTemp amode -- We're going to use it twice, -- so save in a temp if non-trivial ; stmtC (CmmAssign nodeReg (tagToClosure tycon amode')) @@ -170,15 +170,17 @@ cgExpr (StgOpApp (StgPrimOp primop) args res_ty) | primOpOutOfLine primop = tailCallPrimOp primop args - | ReturnsPrim VoidRep <- result_info + | ReturnsPrim [] <- result_info = do cgPrimOp [] primop args emptyVarSet -- ToDo: STG Live -- worried about this performReturn $ emitReturnInstr (Just []) - | ReturnsPrim rep <- result_info - = do res <- newTemp (typeCmmType res_ty) - cgPrimOp [res] primop args emptyVarSet - performPrimReturn (primRepToCgRep rep) (CmmReg (CmmLocal res)) + | ReturnsPrim reps <- result_info + = do ress <- mapM newTemp (typeCmmType res_ty) + cgPrimOp ress primop args emptyVarSet + performPrimReturn $ zipWithEqual "cgExpr" + (\rep res -> (primRepToCgRep rep, CmmReg (CmmLocal res))) + reps ress | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon = do (reps, regs, _hints) <- newUnboxedTupleRegs res_ty @@ -305,7 +307,7 @@ cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) -- the Id is passed along so a binding can be set up cgRhs name (StgRhsCon maybe_cc con args) - = do { amodes <- getArgAmodes args + = do { amodes <- concatMapM getArgAmodes args ; idinfo <- buildDynCon name maybe_cc con amodes ; returnFC (name, idinfo) } @@ -345,9 +347,10 @@ mkRhsClosure bndr cc bi (AlgAlt _) [(DataAlt con, params, _use_mask, (StgApp selectee [{-no args-}]))]) - | the_fv == scrutinee -- Scrutinee is the only free variable - && maybeToBool maybe_offset -- Selectee is a component of the tuple - && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + | the_fv == scrutinee -- Scrutinee is the only free variable + , [_] <- idCgRep selectee -- Selectee is unary (so guaranteed contiguous layout) + , maybeToBool maybe_offset -- Selectee is a component of the tuple + , offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough = -- NOT TRUE: ASSERT(is_single_constructor) -- The simplifier may have statically determined that the single alternative -- is the only possible case and eliminated the others, even if there are @@ -360,7 +363,7 @@ mkRhsClosure bndr cc bi (isUpdatable upd_flag) (_, params_w_offsets) = layOutDynConstr con (addIdReps params) -- Just want the layout - maybe_offset = assocMaybe params_w_offsets selectee + maybe_offset = assocMaybe params_w_offsets (selectee, 0) Just the_offset = maybe_offset offset_into_int = the_offset - fixedHdrSize \end{code} @@ -389,7 +392,8 @@ mkRhsClosure bndr cc bi body@(StgApp fun_id args) | args `lengthIs` (arity-1) - && all isFollowableArg (map idCgRep fvs) + && all (\fv -> case idCgRep fv of [rep] | isFollowableArg rep -> True; _ -> False) + fvs && isUpdatable upd_flag && arity <= mAX_SPEC_AP_SIZE && not opt_SccProfilingOn -- not when profiling: we don't want to @@ -481,9 +485,9 @@ newUnboxedTupleRegs :: Type -> FCode ([CgRep], [LocalReg], [ForeignHint]) newUnboxedTupleRegs res_ty = let ty_args = tyConAppArgs (repType res_ty) - (reps,hints) = unzip [ (rep, typeForeignHint ty) | ty <- ty_args, - let rep = typeCgRep ty, - nonVoidArg rep ] + (reps,hints) = unzip [ res + | ty <- ty_args + , res <- zipEqual "newUnboxedTupleRegs" (typeCgRep ty) (typeForeignHint ty) ] make_new_temp rep = newTemp (argMachRep rep) in do regs <- mapM make_new_temp reps diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index 16e77eca35..4b714d552b 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -24,7 +24,6 @@ import CgMonad import CgUtils import Type import TysPrim -import ClosureInfo( nonVoidArg ) import CLabel import OldCmm import OldCmmUtils @@ -36,6 +35,7 @@ import Outputable import Module import FastString import BasicTypes +import Util import Control.Monad @@ -50,15 +50,14 @@ cgForeignCall -> Code cgForeignCall results fcall stg_args live = do - reps_n_amodes <- getArgAmodes stg_args + reps_n_amodess <- mapM getArgAmodes stg_args let - -- Get the *non-void* args, and jiggle them with shimForeignCall - arg_exprs = [ shimForeignCallArg stg_arg expr - | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, - nonVoidArg rep] - - arg_hints = zipWith CmmHinted - arg_exprs (map (typeForeignHint.stgArgType) stg_args) + -- Get the args, and jiggle them with shimForeignCall + arg_hints = [ CmmHinted shimmed_expr hint + | (stg_arg, reps_n_amodes) <- zipEqual "cgForeignCall" stg_args reps_n_amodess + , let exprs = map snd reps_n_amodes + , (shimmed_expr, hint) <- zipEqual "cgForeignCall" (shimForeignCallArg stg_arg exprs) + (typeForeignHint (stgArgType stg_arg)) ] -- in emitForeignCall results fcall arg_hints live @@ -300,15 +299,14 @@ hpAlloc = CmmGlobal HpAlloc -- value passed to the call. For ByteArray#/Array# we pass the -- address of the actual array, not the address of the heap object. -shimForeignCallArg :: StgArg -> CmmExpr -> CmmExpr -shimForeignCallArg arg expr +shimForeignCallArg :: StgArg -> [CmmExpr] -> [CmmExpr] +shimForeignCallArg arg [expr] | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon - = cmmOffsetB expr arrPtrsHdrSize + = [cmmOffsetB expr arrPtrsHdrSize] | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon - = cmmOffsetB expr arrWordsHdrSize - - | otherwise = expr + = [cmmOffsetB expr arrWordsHdrSize] where -- should be a tycon app, since this is a foreign call tycon = tyConAppTyCon (repType (stgArgType arg)) +shimForeignCallArg _ exprs = exprs diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index dfe146dfc8..4571fe0a24 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -43,11 +43,9 @@ import SMRep import OldCmm import OldCmmUtils -import Id import DataCon import TyCon import CostCentre -import Util import Module import Constants import Outputable @@ -158,8 +156,7 @@ mkVirtHeapOffsets -- First in list gets lowest offset, which is initial offset + 1. mkVirtHeapOffsets is_thunk things - = let non_void_things = filterOut (isVoidArg . fst) things - (ptrs, non_ptrs) = separateByPtrFollowness non_void_things + = let (ptrs, non_ptrs) = separateByPtrFollowness things (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs in @@ -374,17 +371,18 @@ altHeapCheck alt_type code -- Enter R1 after the heap check; it's a pointer gc_info (PrimAlt tc) - = case primRepToCgRep (tyConPrimRep tc) of - VoidArg -> (mkL "stg_gc_noregs", Just []) - FloatArg -> (mkL "stg_gc_f1", Just [FloatReg 1]) - DoubleArg -> (mkL "stg_gc_d1", Just [DoubleReg 1]) - LongArg -> (mkL "stg_gc_l1", Just [LongReg 1]) + = case map primRepToCgRep (tyConPrimRep tc) of + [] -> (mkL "stg_gc_noregs", Just []) + [FloatArg] -> (mkL "stg_gc_f1", Just [FloatReg 1]) + [DoubleArg] -> (mkL "stg_gc_d1", Just [DoubleReg 1]) + [LongArg] -> (mkL "stg_gc_l1", Just [LongReg 1]) -- R1 is boxed but unlifted: - PtrArg -> (mkL "stg_gc_unpt_r1", Just [node]) + [PtrArg] -> (mkL "stg_gc_unpt_r1", Just [node]) -- R1 is unboxed: - NonPtrArg -> (mkL "stg_gc_unbx_r1", Just [node]) + [NonPtrArg] -> (mkL "stg_gc_unbx_r1", Just [node]) + _ -> panic "altHeapCheck: n-ary type bound in PrimAlt" - gc_info (UbxTupAlt _) = panic "altHeapCheck" + gc_info (UbxTupAlt _) = panic "altHeapCheck: unboxed tuple" \end{code} @@ -397,7 +395,7 @@ non-pointers, and pass the number of each to the heap check code. \begin{code} unbxTupleHeapCheck - :: [(Id, GlobalReg)] -- Live registers + :: [(CgRep, GlobalReg)] -- Live registers -> WordOff -- no. of stack slots containing ptrs -> WordOff -- no. of stack slots containing nonptrs -> CmmStmts -- code to insert in the failure path diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 1e80616887..a9bac49d20 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -42,6 +42,7 @@ import OldCmm import CLabel import Name import Unique +import UniqSupply import StaticFlags import Constants @@ -178,24 +179,27 @@ mkStackLayout = do [(offset - frame_sp - retAddrSizeW, b) | (offset, b) <- binds] + us <- newUniqSupply WARN( not (all (\bind -> fst bind >= 0) rel_binds), - pprPlatform platform binds $$ pprPlatform platform rel_binds $$ + pprPlatform platform (map fst binds) $$ pprPlatform platform (map fst rel_binds) $$ ppr frame_size $$ ppr real_sp $$ ppr frame_sp ) - return $ stack_layout rel_binds frame_size + return $ stack_layout us rel_binds frame_size -stack_layout :: [(VirtualSpOffset, CgIdInfo)] +stack_layout :: UniqSupply + -> [(VirtualSpOffset, CgRep)] -> WordOff -> [Maybe LocalReg] -stack_layout [] sizeW = replicate sizeW Nothing -stack_layout ((off, bind):binds) sizeW | off == sizeW - 1 = - (Just stack_bind) : (stack_layout binds (sizeW - rep_size)) +stack_layout _ [] sizeW = replicate sizeW Nothing +stack_layout us ((off, rep):binds) sizeW | off == sizeW - 1 = + (Just stack_bind) : (stack_layout us' binds (sizeW - rep_size)) where - rep_size = cgRepSizeW (cgIdInfoArgRep bind) + rep_size = cgRepSizeW rep stack_bind = LocalReg unique machRep - unique = getUnique (cgIdInfoId bind) - machRep = argMachRep (cgIdInfoArgRep bind) -stack_layout binds@(_:_) sizeW | otherwise = - Nothing : (stack_layout binds (sizeW - 1)) + (unique, us') = takeUniqFromSupply us + machRep = argMachRep rep +stack_layout us binds@(_:_) sizeW + | sizeW < 0 = panic "stack_layout: infinite loop?" + | otherwise = Nothing : (stack_layout us binds (sizeW - 1)) {- Another way to write the function that might be less error prone (untested) stack_layout offsets sizeW = result diff --git a/compiler/codeGen/CgLetNoEscape.lhs b/compiler/codeGen/CgLetNoEscape.lhs index 2fb603baed..8f13918279 100644 --- a/compiler/codeGen/CgLetNoEscape.lhs +++ b/compiler/codeGen/CgLetNoEscape.lhs @@ -188,7 +188,8 @@ cgLetNoEscapeBody :: Id -- Name of the joint point -> Code cgLetNoEscapeBody bndr _ cc_slot all_args body = do - { (arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents all_args + { (arg_locs, arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents [(arg, idCgRep arg) | arg <- all_args] + ; bindArgsToRegOrStack arg_locs -- restore the saved cost centre. BUT: we must not free the stack slot -- containing the cost centre, because it might be needed for a diff --git a/compiler/codeGen/CgParallel.hs b/compiler/codeGen/CgParallel.hs index 2804104708..af4c094de7 100644 --- a/compiler/codeGen/CgParallel.hs +++ b/compiler/codeGen/CgParallel.hs @@ -21,9 +21,9 @@ module CgParallel( doGranAllocate ) where +import ClosureInfo (CgRep) import CgMonad import CgCallConv -import Id import OldCmm import StaticFlags import Outputable @@ -50,7 +50,7 @@ doGranAllocate _hp ------------------------- -granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers +granFetchAndReschedule :: [(CgRep,GlobalReg)] -- Live registers -> Bool -- Node reqd? -> Code -- Emit code for simulating a fetch and then reschedule. @@ -89,7 +89,7 @@ reschedule _liveness _node_reqd = panic "granReschedule" -- that they are not inlined (see @CgCases.lhs@). These alternatives will -- be turned into separate functions. -granYield :: [(Id,GlobalReg)] -- Live registers +granYield :: [(CgRep,GlobalReg)] -- Live registers -> Bool -- Node reqd? -> Code diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index 3f1187f6be..17f508b666 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -32,6 +32,7 @@ import Constants import Outputable import FastString import StaticFlags +import MonadUtils import Control.Monad @@ -45,9 +46,8 @@ cgPrimOp :: [CmmFormal] -- where to put the results -> Code cgPrimOp results op args live - = do arg_exprs <- getArgAmodes args - let non_void_args = [ e | (r,e) <- arg_exprs, nonVoidArg r ] - emitPrimOp results op non_void_args live + = do arg_exprs <- concatMapM getArgAmodes args + emitPrimOp results op (map snd arg_exprs) live emitPrimOp :: [CmmFormal] -- where to put the results diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs index 2628760183..5053150bb9 100644 --- a/compiler/codeGen/CgStackery.lhs +++ b/compiler/codeGen/CgStackery.lhs @@ -119,14 +119,12 @@ mkVirtStkOffsets :: VirtualSpOffset -- Offset of the last allocated thing -> [(CgRep,a)] -- things to make offsets for -> (VirtualSpOffset, -- OUTPUTS: Topmost allocated word - [(a, VirtualSpOffset)]) -- things with offsets (voids filtered out) + [(a, VirtualSpOffset)]) -- things with offsets mkVirtStkOffsets init_Sp_offset things = loop init_Sp_offset [] (reverse things) where loop offset offs [] = (offset,offs) - loop offset offs ((VoidArg,_):things) = loop offset offs things - -- ignore Void arguments loop offset offs ((rep,t):things) = loop thing_slot ((t,thing_slot):offs) things where diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index 499529d841..c0d63b4e14 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -43,9 +43,11 @@ import StgSyn import PrimOp import Outputable import StaticFlags +import Util +import Maybes +import MonadUtils import Control.Monad -import Data.Maybe ----------------------------------------------------------------------------- -- Tail Calls @@ -78,11 +80,11 @@ cgTailCall fun args ; if isUnLiftedType (idType fun) then -- Primitive return ASSERT( null args ) - do { fun_amode <- idInfoToAmode fun_info - ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } + do { fun_amodes <- idInfoToAmodes fun_info + ; performPrimReturn (zipEqual "cgTail" (map cgIdElemInfoArgRep (cgIdInfoElems fun_info)) fun_amodes) } else -- Normal case, fun is boxed - do { arg_amodes <- getArgAmodes args + do { arg_amodes <- mapM getArgAmodes args ; performTailCall fun_info arg_amodes noStmts } } @@ -91,26 +93,28 @@ cgTailCall fun args -- The guts of a tail-call performTailCall - :: CgIdInfo -- The function - -> [(CgRep,CmmExpr)] -- Args - -> CmmStmts -- Pending simultaneous assignments - -- *** GUARANTEED to contain only stack assignments. + :: CgIdInfo -- The function + -> [[(CgRep,CmmExpr)]] -- Args + -> CmmStmts -- Pending simultaneous assignments + -- *** GUARANTEED to contain only stack assignments. -> Code performTailCall fun_info arg_amodes pending_assts - | Just join_sp <- maybeLetNoEscape fun_info + | Just join_sp <- maybeLetNoEscape fun_elem_info = -- A let-no-escape is slightly different, because we -- arrange the stack arguments into pointers and non-pointers -- to make the heap check easier. The tail-call sequence -- is very similar to returning an unboxed tuple, so we -- share some code. - do { (final_sp, arg_assts, live) <- pushUnboxedTuple join_sp arg_amodes + -- + -- NB: let-no-escapes calls are always saturated or better! + do { (final_sp, arg_assts, live) <- pushUnboxedTuple join_sp (concat arg_amodes) ; emitSimultaneously (pending_assts `plusStmts` arg_assts) ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info)) ; doFinalJump final_sp True $ jumpToLbl lbl (Just live) } | otherwise - = do { fun_amode <- idInfoToAmode fun_info + = do { fun_amode <- idElemInfoToAmode fun_elem_info ; let assignSt = CmmAssign nodeReg fun_amode node_asst = oneStmt assignSt node_live = Just [node] @@ -160,7 +164,7 @@ performTailCall fun_info arg_amodes pending_assts { if (isKnownFun lf_info) then tickyKnownCallTooFewArgs else tickyUnknownCall - ; tickySlowCallPat (map fst arg_amodes) + ; tickySlowCallPat (concatMap (map fst) arg_amodes) } ; let (apply_lbl, args, extra_args) @@ -173,24 +177,25 @@ performTailCall fun_info arg_amodes pending_assts -- A direct function call (possibly with some left-over arguments) DirectEntry lbl arity -> do - { if arity == length arg_amodes - then tickyKnownCallExact - else do tickyKnownCallExtraArgs - tickySlowCallPat (map fst (drop arity arg_amodes)) + { if length arg_amodes == arity + then tickyKnownCallExact + else do tickyKnownCallExtraArgs + tickySlowCallPat (concatMap (map fst) (drop arity arg_amodes)) ; let -- The args beyond the arity go straight on the stack (arity_args, extra_args) = splitAt arity arg_amodes - ; directCall sp lbl arity_args extra_args opt_node_live + ; directCall sp lbl (concat arity_args) extra_args opt_node_live (opt_node_asst `plusStmts` pending_assts) } } where fun_id = cgIdInfoId fun_info fun_name = idName fun_id - lf_info = cgIdInfoLF fun_info - fun_has_cafs = idCafInfo fun_id + fun_elem_info = cgIdInfoSingleElem ("performTailCall: " ++ showPpr fun_id) fun_info + lf_info = cgIdElemInfoLF fun_elem_info + fun_has_cafs = idCafInfo fun_id untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) -- Test if closure is a constructor maybeSwitchOnCons enterClosure eob @@ -247,7 +252,7 @@ performTailCall fun_info arg_amodes pending_assts -} directCall :: VirtualSpOffset -> CLabel -> [(CgRep, CmmExpr)] - -> [(CgRep, CmmExpr)] -> Maybe [GlobalReg] -> CmmStmts + -> [[(CgRep, CmmExpr)]] -> Maybe [GlobalReg] -> CmmStmts -> Code directCall sp lbl args extra_args live_node assts = do let @@ -302,22 +307,17 @@ performReturn finish_code -- ---------------------------------------------------------------------------- -- Primitive Returns --- Just load the return value into the right register, and return. +-- Just load the return values into the right registers, and return. -performPrimReturn :: CgRep -> CmmExpr -> Code +performPrimReturn :: [(CgRep, CmmExpr)] -> Code --- non-void return value -performPrimReturn rep amode | not (isVoidArg rep) - = do { stmtC (CmmAssign ret_reg amode) - ; performReturn $ emitReturnInstr live_regs } - where - -- careful here as 'dataReturnConvPrim' will panic if given a Void rep - ret_reg@(CmmGlobal r) = dataReturnConvPrim rep - live_regs = Just [r] - --- void return value -performPrimReturn _ _ - = performReturn $ emitReturnInstr (Just []) +-- works for both void, non-void and unboxed-tuple Id return values +performPrimReturn [] = performReturn $ emitReturnInstr (Just []) +performPrimReturn [(rep, amode)] + = do { let ret_reg@(CmmGlobal r) = dataReturnConvPrim rep + ; stmtC (CmmAssign ret_reg amode) + ; performReturn $ emitReturnInstr (Just [r]) } +performPrimReturn rep_amodes = returnUnboxedTuple rep_amodes -- --------------------------------------------------------------------------- @@ -412,7 +412,7 @@ tailCallPrim lbl args = do { -- We're going to perform a normal-looking tail call, -- except that *all* the arguments will be in registers. -- Hence the ASSERT( null leftovers ) - arg_amodes <- getArgAmodes args + arg_amodes <- concatMapM getArgAmodes args ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes live_regs = Just $ map snd arg_regs jump_to_primop = jumpToLbl lbl live_regs diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs index 0ff440e6bf..2b7ed902d5 100644 --- a/compiler/codeGen/CgTicky.hs +++ b/compiler/codeGen/CgTicky.hs @@ -34,7 +34,7 @@ module CgTicky ( tickyUpdateBhCaf, tickyBlackHole, - tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyUnboxedTupleReturn, tickyReturnOldCon, tickyReturnNewCon, tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, @@ -62,6 +62,7 @@ import FastString import Constants import Outputable import Module +import Maybes -- Turgid imports for showTypeCategory import PrelNames @@ -71,8 +72,6 @@ import TyCon import DynFlags -import Data.Maybe - ----------------------------------------------------------------------------- -- -- Ticky-ticky profiling @@ -200,16 +199,11 @@ tickyReturnNewCon arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr") ; bumpHistogram (fsLit "RET_NEW_hst") arity } -tickyUnboxedTupleReturn :: Int -> Code +tickyUnboxedTupleReturn :: Arity -> Code tickyUnboxedTupleReturn arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr") ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity } -tickyVectoredReturn :: Int -> Code -tickyVectoredReturn family_size - = ifTicky $ do { bumpTickyCounter (fsLit "VEC_RETURN_ctr") - ; bumpHistogram (fsLit "RET_VEC_RETURN_hst") family_size } - -- ----------------------------------------------------------------------------- -- Ticky calls diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index f971a0500a..0092cad5b7 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -7,7 +7,8 @@ ----------------------------------------------------------------------------- module CgUtils ( - addIdReps, + addIdReps, lookupArgLocs, + addIdReps', lookupArgLocs', cgLit, emitDataLits, mkDataLits, emitRODataLits, mkRODataLits, @@ -80,8 +81,43 @@ import Data.Maybe -- ------------------------------------------------------------------------- -addIdReps :: [Id] -> [(CgRep, Id)] -addIdReps ids = [(idCgRep id, id) | id <- ids] +-- FIXME: perhaps nicer to just use the primed versions everywhere? + +addIdReps :: [Id] -> [(CgRep, (Id, Int))] +addIdReps ids = [(rep, (id, i)) + | id <- ids + , (i, rep) <- [0..] `zip` idCgRep id] + +addIdReps' :: [[CgRep]] -> [(CgRep, (Int, Int))] +addIdReps' repss = [(rep, (n, i)) + | (n, reps) <- [0..] `zip` repss + , (i, rep) <- [0..] `zip` reps] + +lookupArgLocs :: [((Id, Int), GlobalReg)] + -> [((Id, Int), VirtualSpOffset)] + -> [Id] + -> [(Id, [Either GlobalReg VirtualSpOffset])] +lookupArgLocs reg_args stk_args args + = [(arg, [case lookup (arg, i) reg_args of + Just reg -> Left reg + Nothing -> case lookup (arg, i) stk_args of + Just off -> Right off + _ -> pprPanic "lookupArgLocs" (ppr (arg, i)) + | (i, _rep) <- [0..] `zip` idCgRep arg]) + | arg <- args] + +lookupArgLocs' :: [((Int, Int), GlobalReg)] + -> [((Int, Int), VirtualSpOffset)] + -> [(a, [CgRep])] + -> [(a, [Either GlobalReg VirtualSpOffset])] +lookupArgLocs' reg_args stk_args repss + = [(x, [case lookup (n, i) reg_args of + Just reg -> Left reg + Nothing -> case lookup (n, i) stk_args of + Just off -> Right off + _ -> pprPanic "lookupArgLocs'" (ppr (n, i)) + | (i, _rep) <- [0..] `zip` reps]) + | (n, (x, reps)) <- [0..] `zip` repss] ------------------------------------------------------------------------- -- diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index 34746984c2..de23091973 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -61,10 +61,9 @@ module ClosureInfo ( staticClosureNeedsLink, -- CgRep and its functions - CgRep(..), nonVoidArg, + CgRep(..), argMachRep, primRepToCgRep, - isFollowableArg, isVoidArg, - isFloatingArg, is64BitArg, + isFollowableArg, isFloatingArg, is64BitArg, separateByPtrFollowness, cgRepSizeW, cgRepSizeB, retAddrSizeW, @@ -156,7 +155,7 @@ ClosureInfo contains a LambdaFormInfo. data LambdaFormInfo = LFReEntrant -- Reentrant closure (a function) TopLevelFlag -- True if top level - !Int -- Arity. Invariant: always > 0 + !Arity -- Arity. INVARIANT: > 0 !Bool -- True <=> no fvs ArgDescr -- Argument descriptor (should reall be in ClosureInfo) @@ -180,7 +179,7 @@ data LambdaFormInfo | LFLetNoEscape -- See LetNoEscape module for precise description of -- these "lets". - !Int -- arity; + !Arity -- arity; | LFBlackHole -- Used for the closures allocated to hold the result -- of a CAF. We want the target of the update frame to @@ -211,7 +210,7 @@ data StandardFormInfo -- The code for the thunk just pushes x2..xn on the stack and enters x1. -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled -- in the RTS to save space. - Int -- Arity, n + Arity -- Arity, n \end{code} @@ -228,14 +227,10 @@ arguments are used to decide which of the RTS's generic apply functions to call when applying an unknown function. It contains more information than the back-end data type MachRep, -so one can easily convert from CgRep -> MachRep. (Except that -there's no MachRep for a VoidRep.) +so one can easily convert from CgRep -> MachRep. -It distinguishes - pointers from non-pointers (we sort the pointers together - when building closures) - - void from other types: a void argument is different from no argument +It distinguishes pointers from non-pointers (we sort the pointers +together when building closures) All 64-bit types map to the same CgRep, because they're passed in the same register, but a PtrArg is still different from an NonPtrArg @@ -245,8 +240,7 @@ entry to the garbage collector. \begin{code} data CgRep - = VoidArg -- Void - | PtrArg -- Word-sized heap pointer, followed + = PtrArg -- Word-sized heap pointer, followed -- by the garbage collector | NonPtrArg -- Word-sized non-pointer -- (including addresses not followed by GC) @@ -256,7 +250,6 @@ data CgRep deriving Eq instance Outputable CgRep where - ppr VoidArg = ptext (sLit "V_") ppr PtrArg = ptext (sLit "P_") ppr NonPtrArg = ptext (sLit "I_") ppr LongArg = ptext (sLit "L_") @@ -269,10 +262,8 @@ argMachRep NonPtrArg = bWord argMachRep LongArg = b64 argMachRep FloatArg = f32 argMachRep DoubleArg = f64 -argMachRep VoidArg = panic "argMachRep:VoidRep" primRepToCgRep :: PrimRep -> CgRep -primRepToCgRep VoidRep = VoidArg primRepToCgRep PtrRep = PtrArg primRepToCgRep IntRep = NonPtrArg primRepToCgRep WordRep = NonPtrArg @@ -282,14 +273,14 @@ primRepToCgRep AddrRep = NonPtrArg primRepToCgRep FloatRep = FloatArg primRepToCgRep DoubleRep = DoubleArg -idCgRep :: Id -> CgRep +idCgRep :: Id -> [CgRep] idCgRep x = typeCgRep . idType $ x -tyConCgRep :: TyCon -> CgRep -tyConCgRep = primRepToCgRep . tyConPrimRep +tyConCgRep :: TyCon -> [CgRep] +tyConCgRep = map primRepToCgRep . tyConPrimRep -typeCgRep :: Type -> CgRep -typeCgRep = primRepToCgRep . typePrimRep +typeCgRep :: Type -> [CgRep] +typeCgRep = map primRepToCgRep . typePrimRep \end{code} Whether or not the thing is a pointer that the garbage-collector @@ -305,14 +296,6 @@ isFollowableArg :: CgRep -> Bool -- True <=> points to a heap object isFollowableArg PtrArg = True isFollowableArg _ = False -isVoidArg :: CgRep -> Bool -isVoidArg VoidArg = True -isVoidArg _ = False - -nonVoidArg :: CgRep -> Bool -nonVoidArg VoidArg = False -nonVoidArg _ = True - -- isFloatingArg is used to distinguish @Double@ and @Float@ which -- cause inadvertent numeric conversions if you aren't jolly careful. -- See codeGen/CgCon:cgTopRhsCon. @@ -343,13 +326,11 @@ separateByPtrFollowness things cgRepSizeB :: CgRep -> ByteOff cgRepSizeB DoubleArg = dOUBLE_SIZE cgRepSizeB LongArg = wORD64_SIZE -cgRepSizeB VoidArg = 0 cgRepSizeB _ = wORD_SIZE cgRepSizeW :: CgRep -> ByteOff cgRepSizeW DoubleArg = dOUBLE_SIZE `quot` wORD_SIZE cgRepSizeW LongArg = wORD64_SIZE `quot` wORD_SIZE -cgRepSizeW VoidArg = 0 cgRepSizeW _ = 1 retAddrSizeW :: WordOff @@ -404,7 +385,7 @@ mkSelectorLFInfo id offset updatable = LFThunk NotTopLevel False updatable (SelectorThunk offset) (might_be_a_function (idType id)) -mkApLFInfo :: Id -> UpdateFlag -> Int -> LambdaFormInfo +mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo mkApLFInfo id upd_flag arity = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity) (might_be_a_function (idType id)) @@ -413,17 +394,36 @@ mkApLFInfo id upd_flag arity Miscellaneous LF-infos. \begin{code} -mkLFArgument :: Id -> LambdaFormInfo -mkLFArgument id = LFUnknown (might_be_a_function (idType id)) +mkLFArgument :: Type -> [LambdaFormInfo] +mkLFArgument ty + | [] <- typePrimRep ty + = [] + | Just (tc, tys) <- splitTyConApp_maybe ty + , isUnboxedTupleTyCon tc + = concatMap mkLFArgument tys + | otherwise + = [LFUnknown (might_be_a_function ty)] mkLFLetNoEscape :: Int -> LambdaFormInfo mkLFLetNoEscape = LFLetNoEscape -mkLFImported :: Id -> LambdaFormInfo +-- Returns Nothing if the imported Id has void representation +mkLFImported :: Id -> Maybe LambdaFormInfo mkLFImported id - = case idArity id of - n | n > 0 -> LFReEntrant TopLevel n True (panic "arg_descr") -- n > 0 - _ -> mkLFArgument id -- Not sure of exact arity + | Just con <- isDataConWorkId_maybe id + , isNullaryRepDataCon con + = Just $ LFCon con -- An imported nullary constructor + -- We assume that the constructor is evaluated so that + -- the id really does point directly to the constructor + + | idArity id > 0 + = Just $ LFReEntrant TopLevel (idArity id) True (panic "arg_descr") -- n > 0 + + | otherwise + = case mkLFArgument (idType id) of + [] -> Nothing + [lf] -> Just lf -- Not sure of exact arity + _ -> pprPanic "mkLFImported: unboxed tuple import?" (ppr id) \end{code} \begin{code} @@ -634,13 +634,13 @@ data CallMethod | DirectEntry -- Jump directly, with args in regs CLabel -- The code label - Int -- Its arity + Arity -- Its arity getCallMethod :: DynFlags - -> Name -- Function being applied - -> CafInfo -- Can it refer to CAF's? - -> LambdaFormInfo -- Its info - -> Int -- Number of available arguments + -> Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? + -> LambdaFormInfo -- Its info + -> Arity -- Number of available arguments, Nothing if thunk use (i.e. no StgArgs at all, not even a void one) -> CallMethod getCallMethod _ _ _ lf_info _ @@ -651,10 +651,13 @@ getCallMethod _ _ _ lf_info _ EnterIt getCallMethod _ name caf (LFReEntrant _ arity _ _) n_args - | n_args == 0 = ASSERT( arity /= 0 ) - ReturnIt -- No args at all - | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel name caf) arity + | n_args == 0 + = ASSERT( arity /= 0 ) + ReturnIt -- No args at all + | n_args < arity + = SlowCall -- Not enough args + | otherwise + = DirectEntry (enterIdLabel name caf) arity getCallMethod _ _ _ (LFCon con) n_args | opt_SccProfilingOn -- when profiling, we must always enter @@ -695,7 +698,7 @@ getCallMethod _ _ _ (LFUnknown True) _ = SlowCall -- Might be a function getCallMethod _ name _ (LFUnknown False) n_args - | n_args > 0 + | n_args > 0 = WARN( True, ppr name <+> ppr n_args ) SlowCall -- Note [Unsafe coerce complications] @@ -711,8 +714,10 @@ getCallMethod _ name _ (LFLetNoEscape 0) _ = JumpToIt (enterReturnPtLabel (nameUnique name)) getCallMethod _ name _ (LFLetNoEscape arity) n_args - | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity - | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity) + | n_args == arity + = DirectEntry (enterReturnPtLabel (nameUnique name)) arity + | otherwise + = pprPanic "let-no-escape: " (ppr name <+> ppr arity) blackHoleOnEntry :: ClosureInfo -> Bool @@ -911,11 +916,11 @@ isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con isConstrClosure_maybe _ = Nothing -closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo :: ClosureInfo -> Maybe (Arity, ArgDescr) closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info closureFunInfo _ = Nothing -lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo :: LambdaFormInfo -> Maybe (Arity, ArgDescr) lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) lfFunInfo _ = Nothing @@ -935,7 +940,7 @@ funTagLFInfo lf | otherwise = 0 -tagForArity :: Int -> Maybe Int +tagForArity :: Arity -> Maybe Int tagForArity i | i <= mAX_PTR_TAG = Just i | otherwise = Nothing diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index 7aa159844b..79e5c5d8af 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -253,7 +253,7 @@ cgDataCon data_con = do { let (tot_wds, -- #ptr_wds + #nonptr_wds ptr_wds, -- #ptr_wds - arg_things) = mkVirtConstrOffsets arg_reps + _) = mkVirtConstrOffsets arg_reps nonptr_wds = tot_wds - ptr_wds @@ -268,13 +268,13 @@ cgDataCon data_con = -- NB: We don't set CC when entering data (WDP 94/06) do { _ <- ticky_code ; ldvEnter (CmmReg nodeReg) - ; tickyReturnOldCon (length arg_things) + ; tickyReturnOldCon (length arg_reps) ; emitReturn [cmmOffsetB (CmmReg nodeReg) (tagForCon data_con)] } -- The case continuation code expects a tagged pointer - arg_reps :: [(PrimRep, Type)] - arg_reps = [(typePrimRep ty, ty) | ty <- dataConRepArgTys data_con] + arg_reps :: [(PrimRep, ())] + arg_reps = [(rep, ()) | ty <- dataConRepArgTys data_con, rep <- typePrimRep ty] -- Dynamic closure code for non-nullary constructors only ; whenC (not (isNullaryRepDataCon data_con)) diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 9bf57b1cb4..11e8d9e712 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -44,6 +44,8 @@ import CLabel import StgSyn import CostCentre import Id +import Type ( PrimRep ) +import Control.Arrow ( second ) import Control.Monad import Name import Module @@ -53,7 +55,7 @@ import BasicTypes import Constants import Outputable import FastString -import Maybes +import MonadUtils ( concatMapM ) import DynFlags import StaticFlags @@ -89,12 +91,8 @@ cgTopRhsClosure id ccs _ upd_flag srt args body = do -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) ; emitDataLits closure_label closure_rep - ; let fv_details :: [(NonVoid Id, VirtualHpOffset)] - (_, _, fv_details) = mkVirtHeapOffsets (isLFThunk lf_info) - (addIdReps []) - -- Don't drop the non-void args until the closure info has been made ; forkClosureBody (closureCodeBody True id closure_info ccs - (nonVoidIds args) (length args) body fv_details) + args body ([], [])) ; returnFC cg_id_info } @@ -162,14 +160,14 @@ cgRhs name (StgRhsCon cc con args) = buildDynCon name cc con args cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) - = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body + = mkRhsClosure name cc bi fvs upd_flag srt args body ------------------------------------------------------------------------ -- Non-constructor right hand sides ------------------------------------------------------------------------ mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo - -> [NonVoid Id] -- Free vars + -> [Id] -- Free vars -> UpdateFlag -> SRT -> [Id] -- Args -> StgExpr @@ -212,7 +210,7 @@ for semi-obvious reasons. ---------- Note [Selectors] ------------------ mkRhsClosure bndr cc bi - [NonVoid the_fv] -- Just one free var + [the_fv] -- Just one free var upd_flag -- Updatable thunk _srt [] -- A thunk @@ -221,9 +219,11 @@ mkRhsClosure bndr cc bi (AlgAlt _) [(DataAlt _, params, _use_mask, (StgApp selectee [{-no args-}]))]) - | the_fv == scrutinee -- Scrutinee is the only free variable - && maybeToBool maybe_offset -- Selectee is a component of the tuple - && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + | the_fv == scrutinee -- Scrutinee is the only free variable + , [_] <- idPrimRep selectee -- Selectee is unary (so guaranteed contiguous layout) + , Just the_offset <- maybe_offset -- Selectee is a component of the tuple + , let offset_into_int = the_offset - fixedHdrSize + , offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough = -- NOT TRUE: ASSERT(is_single_constructor) -- The simplifier may have statically determined that the single alternative -- is the only possible case and eliminated the others, even if there are @@ -232,15 +232,12 @@ mkRhsClosure bndr cc bi -- will evaluate to. -- -- srt is discarded; it must be empty - cgStdThunk bndr cc bi body lf_info [StgVarArg the_fv] + cgStdThunk bndr cc bi body (mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag)) + [StgVarArg the_fv] where - lf_info = mkSelectorLFInfo bndr offset_into_int - (isUpdatable upd_flag) - (_, _, params_w_offsets) = mkVirtConstrOffsets (addIdReps params) - -- Just want the layout - maybe_offset = assocMaybe params_w_offsets (NonVoid selectee) - Just the_offset = maybe_offset - offset_into_int = the_offset - fixedHdrSize + (_, _, params_w_offsets) = mkVirtConstrOffsets [(rep, param) | param <- params, rep <- idPrimRep param] + -- Just want the offset of the first and only PrimRep belonging to this Id + maybe_offset = assocMaybe params_w_offsets selectee ---------- Note [Ap thunks] ------------------ mkRhsClosure bndr cc bi @@ -251,7 +248,7 @@ mkRhsClosure bndr cc bi body@(StgApp fun_id args) | args `lengthIs` (arity-1) - && all (isGcPtrRep . idPrimRep . stripNV) fvs + && all (\fv -> case idPrimRep fv of [rep] -> isGcPtrRep rep; _ -> False) fvs && isUpdatable upd_flag && arity <= mAX_SPEC_AP_SIZE && not opt_SccProfilingOn -- not when profiling: we don't want to @@ -279,8 +276,8 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body -- Node points to it... ; let is_elem = isIn "cgRhsClosure" - bndr_is_a_fv = (NonVoid bndr) `is_elem` fvs - reduced_fvs | bndr_is_a_fv = fvs `minusList` [NonVoid bndr] + bndr_is_a_fv = bndr `is_elem` fvs + reduced_fvs | bndr_is_a_fv = filter (/= bndr) fvs | otherwise = fvs @@ -288,12 +285,13 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args ; mod_name <- getModuleName ; c_srt <- getSRTInfo srt + ; fvs_regss <- idsToRegs reduced_fvs ; let name = idName bndr descr = closureDescription mod_name name - fv_details :: [(NonVoid Id, VirtualHpOffset)] - (tot_wds, ptr_wds, fv_details) + regs_offsets :: [(LocalReg, VirtualHpOffset)] + (tot_wds, ptr_wds, regs_offsets) = mkVirtHeapOffsets (isLFThunk lf_info) - (addIdReps (map stripNV reduced_fvs)) + (concatMap snd fvs_regss) closure_info = mkClosureInfo False -- Not static bndr lf_info tot_wds ptr_wds c_srt descr @@ -303,24 +301,24 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere -- (b) ignore Sequel from context; use empty Sequel -- And compile the body - closureCodeBody False bndr closure_info cc (nonVoidIds args) - (length args) body fv_details + closureCodeBody False bndr closure_info cc args body (map (second (map snd)) fvs_regss, regs_offsets) -- BUILD THE OBJECT -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body ; let use_cc = curCCS; blame_cc = curCCS ; emit (mkComment $ mkFastString "calling allocDynClosure") - ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off) ; let info_tbl = mkCmmInfo closure_info - ; (tmp, init) <- allocDynClosure info_tbl lf_info use_cc blame_cc - (map toVarArg fv_details) + ; fvs_exprs <- concatMapM (liftM idInfoToAmodes . getCgIdInfo) reduced_fvs + ; (tmp, init) <- allocDynClosureCmm info_tbl lf_info use_cc blame_cc + (zipWithEqual "mkRhsClosure" (\expr (_, offset) -> (expr, offset)) fvs_exprs regs_offsets) -- RETURN ; regIdInfo bndr lf_info tmp init } --- Use with care; if used inappropriately, it could break invariants. -stripNV :: NonVoid a -> a -stripNV (NonVoid a) = a +idsToRegs :: [Id] -> FCode [(Id, [(PrimRep, LocalReg)])] +idsToRegs ids = forM ids $ \id -> do + regs <- idToReg id + return (id, zipEqual "idsToRegs" (idPrimRep id) regs) ------------------------- cgStdThunk @@ -336,8 +334,9 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload = do -- AHA! A STANDARD-FORM THUNK { -- LAY OUT THE OBJECT mod_name <- getModuleName + ; payload_reps <- concatMapM addArgReps payload ; let (tot_wds, ptr_wds, payload_w_offsets) - = mkVirtHeapOffsets (isLFThunk lf_info) (addArgReps payload) + = mkVirtHeapOffsets (isLFThunk lf_info) payload_reps descr = closureDescription mod_name (idName bndr) closure_info = mkClosureInfo False -- Not static @@ -350,22 +349,22 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload -- BUILD THE OBJECT ; let info_tbl = mkCmmInfo closure_info - ; (tmp, init) <- allocDynClosure info_tbl lf_info - use_cc blame_cc payload_w_offsets + ; (tmp, init) <- allocDynClosureCmm info_tbl lf_info + use_cc blame_cc payload_w_offsets -- RETURN ; regIdInfo bndr lf_info tmp init } mkClosureLFInfo :: Id -- The binder -> TopLevelFlag -- True of top level - -> [NonVoid Id] -- Free vars + -> [Id] -- Free vars -> UpdateFlag -- Update flag -> [Id] -- Args -> FCode LambdaFormInfo mkClosureLFInfo bndr top fvs upd_flag args - | null args = return (mkLFThunk (idType bndr) top (map stripNV fvs) upd_flag) + | null args = return (mkLFThunk (idType bndr) top fvs upd_flag) | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args - ; return (mkLFReEntrant top (map stripNV fvs) args arg_descr) } + ; return (mkLFReEntrant top fvs args arg_descr) } ------------------------------------------------------------------------ @@ -376,10 +375,9 @@ closureCodeBody :: Bool -- whether this is a top-level binding -> Id -- the closure's name -> ClosureInfo -- Lots of information about this closure -> CostCentreStack -- Optional cost centre attached to closure - -> [NonVoid Id] -- incoming args to the closure - -> Int -- arity, including void args + -> [Id] -- incoming args to the closure -> StgExpr - -> [(NonVoid Id, VirtualHpOffset)] -- the closure's free vars + -> ([(Id, [LocalReg])], [(LocalReg, VirtualHpOffset)]) -- the closure's free vars -> FCode () {- There are two main cases for the code for closures. @@ -395,15 +393,15 @@ closureCodeBody :: Bool -- whether this is a top-level binding argSatisfactionCheck (by calling fetchAndReschedule). There info if Node points to closure is available. -- HWL -} -closureCodeBody top_lvl bndr cl_info cc args arity body fv_details - | length args == 0 -- No args i.e. thunk +closureCodeBody top_lvl bndr cl_info cc args body fv_details + | null args -- No args i.e. thunk = emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $ - \(_, node, _) -> thunkCode cl_info fv_details cc node arity body + \(_, node, _) -> thunkCode cl_info fv_details cc node body where lf_info = closureLFInfo cl_info info_tbl = mkCmmInfo cl_info -closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details +closureCodeBody top_lvl bndr cl_info _cc args body (fv_regs, regs_offsets) = ASSERT( length args > 0 ) do { -- Allocate the global ticky counter, -- and establish the ticky-counter @@ -411,7 +409,7 @@ closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details ; dflags <- getDynFlags ; let platform = targetPlatform dflags ticky_ctr_lbl = closureRednCountsLabel platform cl_info - ; emitTickyCounter cl_info (map stripNV args) + ; emitTickyCounter cl_info args ; setTickyCtrLabel ticky_ctr_lbl $ do ; let @@ -432,20 +430,17 @@ closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details ; granYield arg_regs node_points -- Main payload - ; entryHeapCheck cl_info offset node' arity arg_regs $ do - { fv_bindings <- mapM bind_fv fv_details + ; entryHeapCheck cl_info offset node' False{- not a thunk -} arg_regs $ do + { -- A function closure pointer may be tagged, so we + -- must take it into account when accessing the free variables. + ; mapM_ (uncurry rebindToReg) fv_regs -- Load free vars out of closure *after* -- heap check, to reduce live vars over check - ; if node_points then load_fvs node lf_info fv_bindings + ; if node_points then load_fvs node lf_info regs_offsets else return () ; cgExpr body }} } --- A function closure pointer may be tagged, so we --- must take it into account when accessing the free variables. -bind_fv :: (NonVoid Id, VirtualHpOffset) -> FCode (LocalReg, WordOff) -bind_fv (id, off) = do { reg <- rebindToReg id; return (reg, off) } - load_fvs :: LocalReg -> LambdaFormInfo -> [(LocalReg, WordOff)] -> FCode () load_fvs node lf_info = mapCs (\ (reg, off) -> emit $ mkTaggedObjectLoad reg node off tag) @@ -479,9 +474,9 @@ mkSlowEntryCode cl_info arg_regs -- function closure is already in `Node' | otherwise = return () ----------------------------------------- -thunkCode :: ClosureInfo -> [(NonVoid Id, VirtualHpOffset)] -> CostCentreStack - -> LocalReg -> Int -> StgExpr -> FCode () -thunkCode cl_info fv_details _cc node arity body +thunkCode :: ClosureInfo -> ([(Id, [LocalReg])], [(LocalReg, VirtualHpOffset)]) -> CostCentreStack + -> LocalReg -> StgExpr -> FCode () +thunkCode cl_info (fv_regs, regs_offsets) _cc node body = do { let node_points = nodeMustPointToIt (closureLFInfo cl_info) node' = if node_points then Just node else Nothing ; tickyEnterThunk cl_info @@ -489,7 +484,7 @@ thunkCode cl_info fv_details _cc node arity body ; granThunk node_points -- Heap overflow check - ; entryHeapCheck cl_info 0 node' arity [] $ do + ; entryHeapCheck cl_info 0 node' True{- Is a thunk -} [] $ do { -- Overwrite with black hole if necessary -- but *after* the heap-overflow check ; whenC (blackHoleOnEntry cl_info && node_points) @@ -503,8 +498,8 @@ thunkCode cl_info fv_details _cc node arity body -- subsumed by this enclosing cc do { enterCostCentreThunk (CmmReg nodeReg) ; let lf_info = closureLFInfo cl_info - ; fv_bindings <- mapM bind_fv fv_details - ; load_fvs node lf_info fv_bindings + ; mapM_ (uncurry rebindToReg) fv_regs + ; load_fvs node lf_info regs_offsets ; cgExpr body }}} diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 5c0741a65e..f15b5a60fe 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -21,7 +21,7 @@ module StgCmmClosure ( DynTag, tagForCon, isSmallFamily, ConTagZ, dataConTagZ, - isVoidRep, isGcPtrRep, addIdReps, addArgReps, + isGcPtrRep, addIdReps, argPrimRep, -- * LambdaFormInfo @@ -97,19 +97,12 @@ import DynFlags -- Why are these here? -addIdReps :: [Id] -> [(PrimRep, Id)] +addIdReps :: [Id] -> [([PrimRep], Id)] addIdReps ids = [(idPrimRep id, id) | id <- ids] -addArgReps :: [StgArg] -> [(PrimRep, StgArg)] -addArgReps args = [(argPrimRep arg, arg) | arg <- args] - -argPrimRep :: StgArg -> PrimRep +argPrimRep :: StgArg -> [PrimRep] argPrimRep arg = typePrimRep (stgArgType arg) -isVoidRep :: PrimRep -> Bool -isVoidRep VoidRep = True -isVoidRep _other = False - isGcPtrRep :: PrimRep -> Bool isGcPtrRep PtrRep = True isGcPtrRep _ = False @@ -127,7 +120,7 @@ isGcPtrRep _ = False data LambdaFormInfo = LFReEntrant -- Reentrant closure (a function) TopLevelFlag -- True if top level - !Int -- Arity. Invariant: always > 0 + !Arity -- Arity. INVARIANT: > 0 !Bool -- True <=> no fvs ArgDescr -- Argument descriptor (should really be in ClosureInfo) @@ -188,20 +181,20 @@ data StandardFormInfo -- The code for the thunk just pushes x2..xn on the stack and enters x1. -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled -- in the RTS to save space. - Int -- Arity, n + Arity -- Arity, n ------------------------------------------------------ -- Building LambdaFormInfo ------------------------------------------------------ -mkLFArgument :: Id -> LambdaFormInfo -mkLFArgument id - | isUnLiftedType ty = LFUnLifted - | might_be_a_function ty = LFUnknown True - | otherwise = LFUnknown False - where - ty = idType id +mkLFArgument :: Type -> [LambdaFormInfo] +mkLFArgument ty + | [] <- typePrimRep ty = [] + | Just (tc, tys) <- splitTyConApp_maybe ty + , isUnboxedTupleTyCon tc = concatMap mkLFArgument tys + | isUnLiftedType ty = [LFUnLifted] + | otherwise = [LFUnknown (might_be_a_function ty)] ------------- mkLFLetNoEscape :: LambdaFormInfo @@ -252,21 +245,24 @@ mkApLFInfo id upd_flag arity (might_be_a_function (idType id)) ------------- -mkLFImported :: Id -> LambdaFormInfo + +-- Returns Nothing info for an Id with Void representation +mkLFImported :: Id -> Maybe LambdaFormInfo mkLFImported id | Just con <- isDataConWorkId_maybe id , isNullaryRepDataCon con - = LFCon con -- An imported nullary constructor + = Just $ LFCon con -- An imported nullary constructor -- We assume that the constructor is evaluated so that -- the id really does point directly to the constructor - | arity > 0 - = LFReEntrant TopLevel arity True (panic "arg_descr") + | idArity id > 0 + = Just $ LFReEntrant TopLevel (idArity id) True (panic "arg_descr") | otherwise - = mkLFArgument id -- Not sure of exact arity - where - arity = idArity id + = case mkLFArgument (idType id) of + [] -> Nothing + [lf] -> Just lf -- Not sure of exact arity + _ -> pprPanic "mkLFImported: unboxed-tuple import?" (ppr id) ------------ mkLFBlackHole :: LambdaFormInfo @@ -309,7 +305,7 @@ tagForCon con con_tag = dataConTagZ con fam_size = tyConFamilySize (dataConTyCon con) -tagForArity :: Int -> DynTag +tagForArity :: Arity -> DynTag tagForArity arity | isSmallFamily arity = arity | otherwise = 0 @@ -458,13 +454,13 @@ data CallMethod | DirectEntry -- Jump directly, with args in regs CLabel -- The code label - Int -- Its arity + Arity -- Its arity getCallMethod :: DynFlags - -> Name -- Function being applied - -> CafInfo -- Can it refer to CAF's? - -> LambdaFormInfo -- Its info - -> Int -- Number of available arguments + -> Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? + -> LambdaFormInfo -- Its info + -> Arity -- Number of available arguments -> CallMethod getCallMethod _ _name _ lf_info _n_args @@ -475,10 +471,12 @@ getCallMethod _ _name _ lf_info _n_args EnterIt getCallMethod _ name caf (LFReEntrant _ arity _ _) n_args - | n_args == 0 = ASSERT( arity /= 0 ) - ReturnIt -- No args at all - | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel name caf) arity + | n_args == 0 + = ReturnIt -- No args at all + | n_args < arity + = SlowCall -- Not enough args + | otherwise + = DirectEntry (enterIdLabel name caf) arity getCallMethod _ _name _ LFUnLifted n_args = ASSERT( n_args == 0 ) ReturnIt @@ -513,8 +511,8 @@ getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_arg getCallMethod _ _name _ (LFUnknown True) _n_args = SlowCall -- might be a function -getCallMethod _ name _ (LFUnknown False) n_args - = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) +getCallMethod _ name _ (LFUnknown False) _n_args + = ASSERT2 ( _n_args == 0, ppr name <+> ppr _n_args ) EnterIt -- Not a function getCallMethod _ _name _ LFBlackHole _n_args @@ -744,10 +742,10 @@ closureReEntrant :: ClosureInfo -> Bool closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True closureReEntrant _ = False -closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo :: ClosureInfo -> Maybe (Arity, ArgDescr) closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info -lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo :: LambdaFormInfo -> Maybe (Arity, ArgDescr) lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) lfFunInfo _ = Nothing diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index e17ac4fd32..25b9b4c975 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -41,7 +41,8 @@ import PrelInfo import Outputable import Platform import StaticFlags -import Util ( lengthIs ) +import MonadUtils +import Util ( lengthIs, zipEqual ) import Control.Monad import Data.Char @@ -65,6 +66,7 @@ cgTopRhsCon id con args ; ASSERT( args `lengthIs` dataConRepArity con ) return () -- LAY IT OUT + ; args_reps <- concatMapM addArgReps args ; let name = idName id caffy = idCafInfo id -- any stgArgHasCafRefs args @@ -72,7 +74,7 @@ cgTopRhsCon id con args (tot_wds, -- #ptr_wds + #nonptr_wds ptr_wds, -- #ptr_wds - nv_args_w_offsets) = mkVirtConstrOffsets (addArgReps args) + nv_args_w_offsets) = mkVirtConstrOffsets args_reps nonptr_wds = tot_wds - ptr_wds @@ -81,14 +83,13 @@ cgTopRhsCon id con args -- needs to poke around inside it. info_tbl = mkDataConInfoTable con True ptr_wds nonptr_wds - get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg - ; return lit } - - ; payload <- mapM get_lit nv_args_w_offsets + payload = flip map nv_args_w_offsets $ \(cmm, _offset) -> case cmm of + CmmLit lit -> lit + _ -> panic "cgTopRhsCon" -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs -- NB2: all the amodes should be Lits! - ; let closure_rep = mkStaticClosureFields + closure_rep = mkStaticClosureFields info_tbl dontCareCCS -- Because it's static data caffy -- Has CAF refs @@ -204,13 +205,14 @@ buildDynCon' platform binder _cc con [arg] -------- buildDynCon': the general case ----------- buildDynCon' _ binder ccs con args - = do { let (tot_wds, ptr_wds, args_w_offsets) - = mkVirtConstrOffsets (addArgReps args) + = do { args_reps <- concatMapM addArgReps args + ; let (tot_wds, ptr_wds, args_w_offsets) + = mkVirtConstrOffsets args_reps -- No void args in args_w_offsets nonptr_wds = tot_wds - ptr_wds info_tbl = mkDataConInfoTable con False ptr_wds nonptr_wds - ; (tmp, init) <- allocDynClosure info_tbl lf_info - use_cc blame_cc args_w_offsets + ; (tmp, init) <- allocDynClosureCmm info_tbl lf_info + use_cc blame_cc args_w_offsets ; regIdInfo binder lf_info tmp init } where lf_info = mkConLFInfo con @@ -233,18 +235,19 @@ bindConArgs :: AltCon -> LocalReg -> [Id] -> FCode [LocalReg] -- found a con bindConArgs (DataAlt con) base args = ASSERT(not (isUnboxedTupleCon con)) - mapM bind_arg args_w_offsets + do { args_regs <- mapM (\id -> liftM ((,) id) $ idToReg id) args + ; let (_, _, regs_w_offsets) = mkVirtConstrOffsets [it | (arg, regs) <- args_regs, it <- zipEqual "bindConArgs" (idPrimRep arg) regs] + ; mapM_ initialise_reg regs_w_offsets + -- The binding below forces the masking out of the tag bits + -- when accessing the constructor field. + ; mapM_ (uncurry bindArgToReg) args_regs + ; return (concatMap snd args_regs) } where - (_, _, args_w_offsets) = mkVirtConstrOffsets (addIdReps args) - tag = tagForCon con - -- The binding below forces the masking out of the tag bits - -- when accessing the constructor field. - bind_arg :: (NonVoid Id, VirtualHpOffset) -> FCode LocalReg - bind_arg (arg, offset) - = do { emit $ mkTaggedObjectLoad (idToReg arg) base offset tag - ; bindArgToReg arg } + initialise_reg :: (LocalReg, VirtualHpOffset) -> FCode () + initialise_reg (reg, offset) + = emit $ mkTaggedObjectLoad reg base offset tag bindConArgs _other_con _base args = ASSERT( null args ) return [] diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index d8a7061eec..5a159c4a35 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -16,25 +16,23 @@ module StgCmmEnv ( CgIdInfo, - cgIdInfoId, cgIdInfoLF, + cgIdInfoId, cgIdInfoElems, cgIdInfoSingleElem, + cgIdElemInfoLF, litIdInfo, lneIdInfo, regIdInfo, - idInfoToAmode, - - NonVoid(..), isVoidId, nonVoidIds, + idInfoToAmodes, idElemInfoToAmode, addBindC, addBindsC, bindArgsToRegs, bindToReg, rebindToReg, bindArgToReg, idToReg, - getArgAmode, getNonVoidArgAmodes, + addArgReps, getArgAmodes, getCgIdInfo, maybeLetNoEscape, ) where #include "HsVersions.h" -import TyCon import StgCmmMonad import StgCmmUtils import StgCmmClosure @@ -45,7 +43,9 @@ import BlockId import CmmExpr import CmmUtils import MkGraph (CmmAGraph, mkAssign, (<*>)) +import UniqSupply (uniqsFromSupply) import FastString +import Type (PrimRep) import Id import VarEnv import Control.Monad @@ -53,48 +53,43 @@ import Name import StgSyn import DynFlags import Outputable - -------------------------------------- --- Non-void types -------------------------------------- --- We frequently need the invariant that an Id or a an argument --- is of a non-void type. This type is a witness to the invariant. - -newtype NonVoid a = NonVoid a - deriving (Eq, Show) - -instance (Outputable a) => Outputable (NonVoid a) where - ppr (NonVoid a) = ppr a - -isVoidId :: Id -> Bool -isVoidId = isVoidRep . idPrimRep - -nonVoidIds :: [Id] -> [NonVoid Id] -nonVoidIds ids = [NonVoid id | id <- ids, not (isVoidRep (idPrimRep id))] +import Util ------------------------------------- -- Manipulating CgIdInfo ------------------------------------- +mkCgIdElemInfo :: LambdaFormInfo -> CmmExpr -> CgIdElemInfo +mkCgIdElemInfo lf expr + = CgIdElemInfo { cg_lf = lf + , cg_loc = CmmLoc expr, + cg_tag = lfDynTag lf } + mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo mkCgIdInfo id lf expr - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = CmmLoc expr, - cg_tag = lfDynTag lf } + = CgIdInfo { cg_id = id + , cg_elems = [mkCgIdElemInfo lf expr] + } +-- Used for building info for external names (which are always lifted) +-- and closures/constructors (which are always represented as a single pointer) litIdInfo :: Id -> LambdaFormInfo -> CmmLit -> CgIdInfo litIdInfo id lf lit - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = CmmLoc (addDynTag (CmmLit lit) tag) - , cg_tag = tag } + = CgIdInfo { cg_id = id + , cg_elems = [CgIdElemInfo { cg_lf = lf + , cg_loc = CmmLoc (addDynTag (CmmLit lit) tag) + , cg_tag = tag }] + } where tag = lfDynTag lf lneIdInfo :: Id -> [LocalReg] -> CgIdInfo lneIdInfo id regs - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = LneLoc blk_id regs - , cg_tag = lfDynTag lf } + = CgIdInfo { cg_id = id + , cg_elems = [CgIdElemInfo { cg_lf = lf + , cg_loc = LneLoc blk_id regs + , cg_tag = lfDynTag lf }] + } where lf = mkLFLetNoEscape blk_id = mkBlockId (idUnique id) @@ -105,18 +100,21 @@ lneIdInfo id regs -- a new register in order to keep single-assignment and help out the -- inliner. -- EZY regIdInfo :: Id -> LambdaFormInfo -> LocalReg -> CmmAGraph -> FCode (CgIdInfo, CmmAGraph) -regIdInfo id lf_info reg init +regIdInfo id lf_info reg init = do { reg' <- newTemp (localRegType reg) ; let init' = init <*> mkAssign (CmmLocal reg') (addDynTag (CmmReg (CmmLocal reg)) (lfDynTag lf_info)) ; return (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg')), init') } -idInfoToAmode :: CgIdInfo -> CmmExpr +idElemInfoToAmode :: CgIdElemInfo -> CmmExpr -- Returns a CmmExpr for the *tagged* pointer -idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e }) = e -idInfoToAmode cg_info - = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc +idElemInfoToAmode (CgIdElemInfo { cg_loc = CmmLoc e }) = e +idElemInfoToAmode _cg_info + = panic "idElemInfoToAmode: LneLoc" + +idInfoToAmodes :: CgIdInfo -> [CmmExpr] +idInfoToAmodes = map idElemInfoToAmode . cg_elems addDynTag :: CmmExpr -> DynTag -> CmmExpr -- A tag adds a byte offset to the pointer @@ -125,12 +123,21 @@ addDynTag expr tag = cmmOffsetB expr tag cgIdInfoId :: CgIdInfo -> Id cgIdInfoId = cg_id -cgIdInfoLF :: CgIdInfo -> LambdaFormInfo -cgIdInfoLF = cg_lf +cgIdInfoElems :: CgIdInfo -> [CgIdElemInfo] +cgIdInfoElems = cg_elems -maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg]) -maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) -maybeLetNoEscape _other = Nothing +-- Used for where the caller knows there will only be one alternative (commonly +-- because it knows the info is for a thunk, closure or some data) +cgIdInfoSingleElem :: CgIdInfo -> CgIdElemInfo +cgIdInfoSingleElem (CgIdInfo { cg_elems = [elem_info] }) = elem_info +cgIdInfoSingleElem _ = panic "cgIdInfoSingleElem" + +cgIdElemInfoLF :: CgIdElemInfo -> LambdaFormInfo +cgIdElemInfoLF = cg_lf + +maybeLetNoEscape :: CgIdElemInfo -> Maybe (BlockId, [LocalReg]) +maybeLetNoEscape (CgIdElemInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) +maybeLetNoEscape _other = Nothing @@ -141,6 +148,18 @@ maybeLetNoEscape _other = Nothing -- modifying(modifyBindC) and looking up (getCgIdInfo) bindings. --------------------------------------------------------- +-- Note [CgIdInfo knot] +-- ~~~~~~~~~~~~~~~~~~~~ +-- +-- We can't be too strict in the CgIdInfo, because in e.g. letrecs the CgIdInfo +-- is knot-tied. A loop I build in practice was +-- cgExpr LetRec -> cgRhs StgRhsCon -> buildDynCon' +-- from code like (let xs = (:) y xs in xs) because we fixpoint the CgIdInfo for +-- xs and buildDynCon' is strict in the length of the CgIdElemInfo list. +-- +-- To work around this we try to be yield the length of the CgIdInfo element list +-- lazily by lazily zipping it with the idCgReps. + addBindC :: Id -> CgIdInfo -> FCode () addBindC name stuff_to_bind = do binds <- getBinds @@ -154,9 +173,16 @@ addBindsC new_bindings = do new_bindings setBinds new_binds +-- See: Note [CgIdInfo knot] +etaCgIdInfo :: Id -> CgIdInfo -> CgIdInfo +etaCgIdInfo id ~(CgIdInfo { cg_id = lazy_id, cg_elems = elems }) + = CgIdInfo { cg_id = lazy_id + , cg_elems = zipLazyWith (showPpr (id, idPrimRep id, length elems)) (\_ elem -> elem) (idPrimRep id) elems } + getCgIdInfo :: Id -> FCode CgIdInfo getCgIdInfo id - = do { -- Try local bindings first + = liftM (etaCgIdInfo id) $ + do { -- Try local bindings first ; local_binds <- getBinds ; case lookupVarEnv local_binds id of { Just info -> return info ; @@ -173,8 +199,11 @@ getCgIdInfo id name = idName id in if isExternalName name then do - let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id) - return (litIdInfo id (mkLFImported id) ext_lbl) + { let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id) + ; return $ case mkLFImported id of + Just lf_info -> litIdInfo id lf_info ext_lbl + Nothing -> CgIdInfo id [] } + else -- Bug cgLookupPanic id @@ -197,48 +226,41 @@ cgLookupPanic id -------------------- -getArgAmode :: NonVoid StgArg -> FCode CmmExpr -getArgAmode (NonVoid (StgVarArg var)) = - do { info <- getCgIdInfo var; return (idInfoToAmode info) } -getArgAmode (NonVoid (StgLitArg lit)) = liftM CmmLit $ cgLit lit -getArgAmode (NonVoid (StgTypeArg _)) = panic "getArgAmode: type arg" - -getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr] --- NB: Filters out void args, --- so the result list may be shorter than the argument list -getNonVoidArgAmodes [] = return [] -getNonVoidArgAmodes (arg:args) - | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args - | otherwise = do { amode <- getArgAmode (NonVoid arg) - ; amodes <- getNonVoidArgAmodes args - ; return ( amode : amodes ) } +getArgAmodes :: StgArg -> FCode [CmmExpr] +getArgAmodes (StgVarArg var) = + do { info <- getCgIdInfo var; return (idInfoToAmodes info) } +getArgAmodes (StgLitArg lit) = liftM (return . CmmLit) $ cgLit lit +getArgAmodes (StgTypeArg _) = return [] +addArgReps :: StgArg -> FCode [(PrimRep, CmmExpr)] +addArgReps arg = do + exprs <- getArgAmodes arg + return (zipEqual "addArgReps" (argPrimRep arg) exprs) ------------------------------------------------------------------------ -- Interface functions for binding and re-binding names ------------------------------------------------------------------------ -bindToReg :: NonVoid Id -> LambdaFormInfo -> FCode LocalReg +bindToReg :: Id -> [(LocalReg, LambdaFormInfo)] -> FCode () -- Bind an Id to a fresh LocalReg -bindToReg nvid@(NonVoid id) lf_info - = do { let reg = idToReg nvid - ; addBindC id (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg))) - ; return reg } +bindToReg id regs_lf_infos + = do { addBindC id (CgIdInfo { cg_id = id + , cg_elems = map (\(reg, lf_info) -> mkCgIdElemInfo lf_info (CmmReg (CmmLocal reg))) regs_lf_infos }) } -rebindToReg :: NonVoid Id -> FCode LocalReg +rebindToReg :: Id -> [LocalReg] -> FCode () -- Like bindToReg, but the Id is already in scope, so -- get its LF info from the envt -rebindToReg nvid@(NonVoid id) +rebindToReg id regs = do { info <- getCgIdInfo id - ; bindToReg nvid (cgIdInfoLF info) } + ; bindToReg id (zipEqual "rebindToReg" regs (map cgIdElemInfoLF (cg_elems info))) } -bindArgToReg :: NonVoid Id -> FCode LocalReg -bindArgToReg nvid@(NonVoid id) = bindToReg nvid (mkLFArgument id) +bindArgToReg :: Id -> [LocalReg] -> FCode () +bindArgToReg id regs = bindToReg id (zipEqual "bindArgToReg" regs (mkLFArgument (idType id))) -bindArgsToRegs :: [NonVoid Id] -> FCode [LocalReg] -bindArgsToRegs args = mapM bindArgToReg args +bindArgsToRegs :: [(Id, [LocalReg])] -> FCode () +bindArgsToRegs args = mapM_ (uncurry bindArgToReg) args -idToReg :: NonVoid Id -> LocalReg +idToReg :: Id -> FCode [LocalReg] -- Make a register from an Id, typically a function argument, -- free variable, or case binder -- @@ -246,8 +268,6 @@ idToReg :: NonVoid Id -> LocalReg -- -- By now the Ids should be uniquely named; else one would worry -- about accidental collision -idToReg (NonVoid id) = LocalReg (idUnique id) - (case idPrimRep id of VoidRep -> pprPanic "idToReg" (ppr id) - _ -> primRepCmmType (idPrimRep id)) - - +idToReg id = do + us <- newUniqSupply + return $ zipWith LocalReg (idUnique id : uniqsFromSupply us) (map primRepCmmType (idPrimRep id)) diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 5ea935984d..0490182509 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -45,8 +45,9 @@ import PrimOp import TyCon import Type import CostCentre ( CostCentreStack, currentCCS ) -import Control.Monad (when) +import Control.Monad (when, zipWithM_) import Maybes +import MonadUtils (concatMapM) import Util import FastString import Outputable @@ -129,7 +130,7 @@ cgLetNoEscapeRhs cgLetNoEscapeRhs join_id local_cc bndr rhs = do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs - ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info + ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape (cgIdInfoSingleElem info) ; emit (outOfLine $ mkLabel bid <*> rhs_body <*> mkBranch join_id) ; return info } @@ -140,7 +141,7 @@ cgLetNoEscapeRhsBody -> StgRhs -> FCode CgIdInfo cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body) - = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body + = cgLetNoEscapeClosure bndr local_cc cc args body cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args) = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args) -- For a constructor RHS we want to generate a single chunk of @@ -153,19 +154,19 @@ cgLetNoEscapeClosure :: Id -- binder -> Maybe LocalReg -- Slot for saved current cost centre -> CostCentreStack -- XXX: *** NOT USED *** why not? - -> [NonVoid Id] -- Args (as in \ args -> body) + -> [Id] -- Args (as in \ args -> body) -> StgExpr -- Body (as in above) -> FCode CgIdInfo -cgLetNoEscapeClosure bndr cc_slot _unused_cc args body - = do { arg_regs <- forkProc $ do - { restoreCurrentCostCentre cc_slot - ; arg_regs <- bindArgsToRegs args - ; altHeapCheck arg_regs (cgExpr body) - -- Using altHeapCheck just reduces - -- instructions to save on stack - ; return arg_regs } - ; return $ lneIdInfo bndr arg_regs} +cgLetNoEscapeClosure bndr cc_slot _unused_cc args body = forkProc $ + do { restoreCurrentCostCentre cc_slot + ; arg_regss <- mapM idToReg args + ; bindArgsToRegs (zipEqual "cgLetNoEscapeClosure" args arg_regss) + ; let arg_regs = concat arg_regss + ; altHeapCheck arg_regs (cgExpr body) + -- Using altHeapCheck just reduces + -- instructions to save on stack + ; return (lneIdInfo bndr arg_regs) } ------------------------------------------------------------------------ @@ -312,27 +313,27 @@ cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] -- code that enters the HValue, then we'll get a runtime panic, because -- the HValue really is a MutVar#. The types are compatible though, -- so we can just generate an assignment. -cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts - | isUnLiftedType (idType v) - || reps_compatible - = -- assignment suffices for unlifted types - do { when (not reps_compatible) $ - panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" - ; v_info <- getCgIdInfo v - ; emit (mkAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)) - ; _ <- bindArgsToRegs [NonVoid bndr] - ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts } +cgCase (StgApp v []) bndr _ alt_type alts + | case alt_type of PrimAlt _ -> True; UbxTupAlt _ -> True; _ -> False + = if isUnLiftedType (idType v) || reps_compatible + then -- assignment suffices for unlifted types + do { when (not reps_compatible) $ + panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" + ; v_info <- getCgIdInfo v + ; regs <- idToReg bndr + ; zipWithM_ (\reg expr -> emit (mkAssign (CmmLocal reg) expr)) regs (idInfoToAmodes v_info) + ; bindArgToReg bndr regs + ; cgAlts NoGcInAlts regs bndr alt_type alts } + else -- fail at run-time, not compile-time + do { mb_cc <- maybeSaveCostCentre True + ; regs <- idToReg v + ; withSequel (AssignTo regs False) (cgExpr (StgApp v [])) + ; restoreCurrentCostCentre mb_cc + ; emit $ mkComment $ mkFastString "should be unreachable code" + ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)} where reps_compatible = idPrimRep v == idPrimRep bndr -cgCase scrut@(StgApp v []) _ _ (PrimAlt _) _ - = -- fail at run-time, not compile-time - do { mb_cc <- maybeSaveCostCentre True - ; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut) - ; restoreCurrentCostCentre mb_cc - ; emit $ mkComment $ mkFastString "should be unreachable code" - ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)} - {- case seq# a s of v (# s', a' #) -> e @@ -345,16 +346,16 @@ case a of v (taking advantage of the fact that the return convention for (# State#, a #) is the same as the return convention for just 'a') -} -cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr srt alt_type alts - = -- handle seq#, same return convention as vanilla 'a'. - cgCase (StgApp a []) bndr srt alt_type alts - cgCase scrut bndr srt alt_type alts = -- the general case do { up_hp_usg <- getVirtHp -- Upstream heap usage - ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts - alt_regs = map idToReg ret_bndrs - simple_scrut = isSimpleScrut scrut alt_type + -- handle seq#, same return convention as vanilla 'a'. + ; let scrut' = case scrut of (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) -> StgApp a [] + _ -> scrut + ret_bndrs = chooseReturnBndrs bndr alt_type alts + ; alts_regss <- mapM idToReg ret_bndrs + ; let alt_regs = concat alts_regss + simple_scrut = isSimpleScrut scrut' alt_type gcInAlts | not simple_scrut = True | isSingleton alts = False | up_hp_usg > 0 = False @@ -362,12 +363,12 @@ cgCase scrut bndr srt alt_type alts gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts ; mb_cc <- maybeSaveCostCentre simple_scrut - ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) + ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut') ; restoreCurrentCostCentre mb_cc -- JD: We need Note: [Better Alt Heap Checks] - ; _ <- bindArgsToRegs ret_bndrs - ; cgAlts gc_plan (NonVoid bndr) alt_type alts } + ; bindArgsToRegs (zipEqual "cgCase" ret_bndrs alts_regss) + ; cgAlts gc_plan alt_regs bndr alt_type alts } ----------------- maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) @@ -382,10 +383,11 @@ isSimpleScrut :: StgExpr -> AltType -> Bool -- heap usage from alternatives into the stuff before the case -- NB: if you get this wrong, and claim that the expression doesn't allocate -- when it does, you'll deeply mess up allocation -isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op -isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... } -isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... } -isSimpleScrut _ _ = False +isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op +isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... } +isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... } +isSimpleScrut (StgApp _ []) (UbxTupAlt _) = True -- case x of { (# a, b #) -> .. } +isSimpleScrut _ _ = False isSimpleOp :: StgOp -> Bool -- True iff the op cannot block or allocate @@ -394,39 +396,40 @@ isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op) isSimpleOp (StgPrimCallOp _) = False ----------------- -chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id] +chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [Id] -- These are the binders of a case that are assigned -- by the evaluation of the scrutinee --- Only non-void ones come back chooseReturnBndrs bndr (PrimAlt _) _alts - = nonVoidIds [bndr] + = [bndr] chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)] - = nonVoidIds ids -- 'bndr' is not assigned! + = ids -- 'bndr' will be assigned by cgAlts chooseReturnBndrs bndr (AlgAlt _) _alts - = nonVoidIds [bndr] -- Only 'bndr' is assigned + = [bndr] -- Only 'bndr' is assigned chooseReturnBndrs bndr PolyAlt _alts - = nonVoidIds [bndr] -- Only 'bndr' is assigned + = [bndr] -- Only 'bndr' is assigned chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" -- UbxTupALt has only one alternative ------------------------------------- -cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode () +cgAlts :: GcPlan -> [LocalReg] -> Id -> AltType -> [StgAlt] -> FCode () -- At this point the result of the case are in the binders -cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] +cgAlts gc_plan _alt_regs _bndr PolyAlt [(_, _, _, rhs)] = maybeAltHeapCheck gc_plan (cgExpr rhs) -cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) - -- Here bndrs are *already* in scope, so don't rebind them +cgAlts gc_plan alt_regs bndr (UbxTupAlt _) [(_, _, _, rhs)] + = do { bindArgToReg bndr alt_regs + ; maybeAltHeapCheck gc_plan (cgExpr rhs) } + -- Here alt bndrs are *already* in scope, so don't rebind them, + -- but we do need to set up bndr to expand to the scrutinee result -cgAlts gc_plan bndr (PrimAlt _) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts +cgAlts gc_plan [alt_reg] _bndr (PrimAlt _) alts + = do { tagged_cmms <- cgAltRhss gc_plan alt_reg alts - ; let bndr_reg = CmmLocal (idToReg bndr) + ; let bndr_reg = CmmLocal alt_reg (DEFAULT,deflt) = head tagged_cmms -- PrimAlts always have a DEFAULT case -- and it always comes first @@ -435,11 +438,11 @@ cgAlts gc_plan bndr (PrimAlt _) alts | (LitAlt lit, code) <- tagged_cmms] ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) } -cgAlts gc_plan bndr (AlgAlt tycon) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts +cgAlts gc_plan [alt_reg] _bndr (AlgAlt tycon) alts + = do { tagged_cmms <- cgAltRhss gc_plan alt_reg alts ; let fam_sz = tyConFamilySize tycon - bndr_reg = CmmLocal (idToReg bndr) + bndr_reg = CmmLocal alt_reg mb_deflt = case tagged_cmms of ((DEFAULT,rhs) : _) -> Just rhs _other -> Nothing @@ -464,15 +467,14 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) } -cgAlts _ _ _ _ = panic "cgAlts" +cgAlts _ _ _ _ _ = panic "cgAlts" -- UbxTupAlt and PolyAlt have only one alternative ------------------- -cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] -cgAltRhss gc_plan bndr alts +cgAltRhss :: GcPlan -> LocalReg -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan base_reg alts = forkAlts (map cg_alt alts) where - base_reg = idToReg bndr cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) cg_alt (con, bndrs, _uses, rhs) = getCodeR $ @@ -492,7 +494,7 @@ maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code cgConApp :: DataCon -> [StgArg] -> FCode () cgConApp con stg_args | isUnboxedTupleCon con -- Unboxed tuple: assign and return - = do { arg_exprs <- getNonVoidArgAmodes stg_args + = do { arg_exprs <- concatMapM getArgAmodes stg_args ; tickyUnboxedTupleReturn (length arg_exprs) ; emitReturn arg_exprs } @@ -503,24 +505,32 @@ cgConApp con stg_args -- is "con", which is a bit of a fudge, but it only affects profiling ; emit init - ; emitReturn [idInfoToAmode idinfo] } + ; emitReturn (idInfoToAmodes idinfo) } cgIdApp :: Id -> [StgArg] -> FCode () -cgIdApp fun_id [] | isVoidId fun_id = emitReturn [] cgIdApp fun_id args = do { fun_info <- getCgIdInfo fun_id - ; case maybeLetNoEscape fun_info of - Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args - Nothing -> cgTailCall fun_id fun_info args } + ; case cg_elems fun_info of + -- If we mention an Id with a void representation, return nothing immediately + [] -> ASSERT( null args ) + emitReturn [] + -- Similarly for unboxed tuples, return the components immediately + elem_infos | isUnboxedTupleType (idType fun_id) -> ASSERT( null args ) + emitReturn (map idElemInfoToAmode elem_infos) + -- For standard function application, just try let-no-escape and then tailcall + [fun_info] -> case maybeLetNoEscape fun_info of + Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args + Nothing -> cgTailCall fun_id fun_info args + _ -> panic "cgIdApp" } cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () cgLneJump blk_id lne_regs args -- Join point; discard sequel - = do { cmm_args <- getNonVoidArgAmodes args + = do { cmm_args <- concatMapM getArgAmodes args ; emit (mkMultiAssign lne_regs cmm_args <*> mkBranch blk_id) } -cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () +cgTailCall :: Id -> CgIdElemInfo -> [StgArg] -> FCode () cgTailCall fun_id fun_info args = do dflags <- getDynFlags case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of @@ -555,10 +565,10 @@ cgTailCall fun_id fun_info args = do JumpToIt {} -> panic "cgTailCall" -- ??? where - fun_name = idName fun_id - fun = idInfoToAmode fun_info - lf_info = cgIdInfoLF fun_info - node_points = nodeMustPointToIt lf_info + fun_name = idName fun_id + fun = idElemInfoToAmode fun_info + lf_info = cgIdElemInfoLF fun_info + node_points = nodeMustPointToIt lf_info {- Note [case on Bool] diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index c41832a0ab..d64a2a7640 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -21,7 +21,6 @@ import StgCmmProf import StgCmmEnv import StgCmmMonad import StgCmmUtils -import StgCmmClosure import BlockId import Cmm @@ -35,9 +34,9 @@ import SMRep import ForeignCall import Constants import StaticFlags -import Maybes import Outputable import BasicTypes +import MonadUtils ( concatMapM ) import Control.Monad @@ -278,20 +277,14 @@ currentNursery = CmmGlobal CurrentNursery getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)] -- (a) Drop void args -- (b) Add foreign-call shim code --- It's (b) that makes this differ from getNonVoidArgAmodes +-- It's (b) that makes this differ from getArgsAmodes -getFCallArgs args - = do { mb_cmms <- mapM get args - ; return (catMaybes mb_cmms) } +getFCallArgs args = concatMapM get args where - get arg | isVoidRep arg_rep - = return Nothing - | otherwise - = do { cmm <- getArgAmode (NonVoid arg) - ; return (Just (add_shim arg_ty cmm, hint)) } + get arg = do { cmm <- getArgAmodes arg + ; return (map (add_shim arg_ty) cmm `zip` hint) } where arg_ty = stgArgType arg - arg_rep = typePrimRep arg_ty hint = typeForeignHint arg_ty add_shim :: Type -> CmmExpr -> CmmExpr diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 690b0a9622..68a1658ac1 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -15,13 +15,12 @@ module StgCmmHeap ( mkVirtHeapOffsets, mkVirtConstrOffsets, mkStaticClosureFields, mkStaticClosure, - allocDynClosure, allocDynClosureCmm, emitSetDynHdr + allocDynClosureCmm, emitSetDynHdr ) where #include "HsVersions.h" import CmmType -import StgSyn import CLabel import StgCmmLayout import StgCmmUtils @@ -30,7 +29,6 @@ import StgCmmProf import StgCmmTicky import StgCmmGran import StgCmmClosure -import StgCmmEnv import MkGraph @@ -49,24 +47,12 @@ import DynFlags -- Initialise dynamic heap objects ----------------------------------------------------------- -allocDynClosure - :: CmmInfoTable - -> LambdaFormInfo - -> CmmExpr -- Cost Centre to stick in the object - -> CmmExpr -- Cost Centre to blame for this alloc - -- (usually the same; sometimes "OVERHEAD") - - -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object - -- ie Info ptr has offset zero. - -- No void args in here - -> FCode (LocalReg, CmmAGraph) - allocDynClosureCmm :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> FCode (LocalReg, CmmAGraph) --- allocDynClosure allocates the thing in the heap, +-- allocDynClosureCmm allocates the thing in the heap, -- and modifies the virtual Hp to account for this. -- The second return value is the graph that sets the value of the -- returned LocalReg, which should point to the closure after executing @@ -74,7 +60,7 @@ allocDynClosureCmm -- Note [Return a LocalReg] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr. +-- allocDynClosureCmm returns a LocalReg, not a (Hp+8) CmmExpr. -- Reason: -- ...allocate object... -- obj = Hp + 8 @@ -83,13 +69,6 @@ allocDynClosureCmm -- but Hp+8 means something quite different... -allocDynClosure info_tbl lf_info use_cc _blame_cc args_w_offsets - = do { let (args, offsets) = unzip args_w_offsets - ; cmm_args <- mapM getArgAmode args -- No void args - ; allocDynClosureCmm info_tbl lf_info - use_cc _blame_cc (zip cmm_args offsets) - } - allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do { virt_hp <- getVirtHp @@ -322,17 +301,16 @@ These are used in the following circumstances entryHeapCheck :: ClosureInfo -> Int -- Arg Offset -> Maybe LocalReg -- Function (closure environment) - -> Int -- Arity -- not same as len args b/c of voids + -> Bool -- Heap check for a *thunk*? -> [LocalReg] -- Non-void args (empty for thunk) -> FCode () -> FCode () -entryHeapCheck cl_info offset nodeSet arity args code +entryHeapCheck cl_info offset nodeSet is_thunk args code = do dflags <- getDynFlags let platform = targetPlatform dflags - is_thunk = arity == 0 is_fastf = case closureFunInfo cl_info of Just (_, ArgGen _) -> False _otherwise -> True diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 9afcd029a4..aa7b65d298 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -49,13 +49,14 @@ import CLabel import StgSyn import Id import Name +import BasicTypes ( Arity ) import TyCon ( PrimRep(..) ) -import BasicTypes ( Arity ) import DynFlags import StaticFlags import Constants import Util +import Control.Monad import Data.List import Outputable import FastString ( mkFastString, FastString, fsLit ) @@ -133,76 +134,75 @@ directCall :: CLabel -> Arity -> [StgArg] -> FCode () -- calls f(arg1, ..., argn), and applies the result to the remaining args -- The function f has arity n, and there are guaranteed at least n args -- Both arity and args include void args +-- +-- NB: f is guaranteed to be a function, not a thunk directCall lbl arity stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) } + = do { cmm_args <- mapM addArgReps stg_args + ; direct_call "directCall" lbl arity cmm_args } slowCall :: CmmExpr -> [StgArg] -> FCode () -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; slow_call fun cmm_args (argsReps stg_args) } + = do { cmm_args <- mapM addArgReps stg_args + ; slow_call fun cmm_args } -------------- -direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode () --- NB1: (length args) may be less than (length reps), because --- the args exclude the void ones +direct_call :: String -> CLabel -> Arity -> [[(PrimRep, CmmExpr)]] -> FCode () -- NB2: 'arity' refers to the *reps* -direct_call caller lbl arity args reps - | debugIsOn && arity > length reps -- Too few args +direct_call caller lbl arity arg_reps + | debugIsOn && arity > length arg_reps -- Too few args = do -- Caller should ensure that there enough args! dflags <- getDynFlags let platform = targetPlatform dflags pprPanic "direct_call" (text caller <+> ppr arity - <+> pprPlatform platform lbl <+> ppr (length reps) - <+> pprPlatform platform args <+> ppr reps ) + <+> pprPlatform platform lbl <+> ppr (length arg_reps) + <+> pprPlatform platform (map (map snd) arg_reps) <+> ppr (map (map fst) arg_reps) ) - | null rest_reps -- Precisely the right number of arguments - = emitCall (NativeDirectCall, NativeReturn) target args + | null rest_arg_reps -- Precisely the right number of arguments + = emitCall (NativeDirectCall, NativeReturn) target (concatMap (map snd) arg_reps) | otherwise -- Over-saturated call - = ASSERT( arity == length initial_reps ) + = ASSERT( arity == length fast_arg_reps ) do { pap_id <- newTemp gcWord ; withSequel (AssignTo [pap_id] True) - (emitCall (NativeDirectCall, NativeReturn) target fast_args) + (emitCall (NativeDirectCall, NativeReturn) target (concatMap (map snd) fast_arg_reps)) ; slow_call (CmmReg (CmmLocal pap_id)) - rest_args rest_reps } + rest_arg_reps } where target = CmmLit (CmmLabel lbl) - (initial_reps, rest_reps) = splitAt arity reps - arg_arity = count isNonV initial_reps - (fast_args, rest_args) = splitAt arg_arity args + (fast_arg_reps, rest_arg_reps) = splitAt arity arg_reps -------------- -slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode () -slow_call fun args reps +slow_call :: CmmExpr -> [[(PrimRep, CmmExpr)]] -> FCode () +slow_call fun arg_reps = do dflags <- getDynFlags let platform = targetPlatform dflags - call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps + call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity arg_reps emit $ mkComment $ mkFastString ("slow_call for " ++ showSDoc (pprPlatform platform fun) ++ " with pat " ++ showSDoc (ftext rts_fun)) emit (mkAssign nodeReg fun <*> call) where - (rts_fun, arity) = slowCallPattern reps + (rts_fun, arity) = slowCallPattern (map (map (toArgRep . fst)) arg_reps) -- These cases were found to cover about 99% of all slow calls: -slowCallPattern :: [ArgRep] -> (FastString, Arity) +slowCallPattern :: [[ArgRep]] -> (FastString, Arity) -- Returns the generic apply function and arity -slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6) -slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5) -slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4) -slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4) -slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3) -slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3) -slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2) -slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2) -slowCallPattern (P: _) = (fsLit "stg_ap_p", 1) -slowCallPattern (V: _) = (fsLit "stg_ap_v", 1) -slowCallPattern (N: _) = (fsLit "stg_ap_n", 1) -slowCallPattern (F: _) = (fsLit "stg_ap_f", 1) -slowCallPattern (D: _) = (fsLit "stg_ap_d", 1) -slowCallPattern (L: _) = (fsLit "stg_ap_l", 1) -slowCallPattern [] = (fsLit "stg_ap_0", 0) +slowCallPattern ([P]: [P]: [P]: [P]: [P]: [P]: _) = (fsLit "stg_ap_pppppp", 6) +slowCallPattern ([P]: [P]: [P]: [P]: [P]: _) = (fsLit "stg_ap_ppppp", 5) +slowCallPattern ([P]: [P]: [P]: [P]: _) = (fsLit "stg_ap_pppp", 4) +slowCallPattern ([P]: [P]: [P]: []: _) = (fsLit "stg_ap_pppv", 4) +slowCallPattern ([P]: [P]: [P]: _) = (fsLit "stg_ap_ppp", 3) +slowCallPattern ([P]: [P]: []: _) = (fsLit "stg_ap_ppv", 3) +slowCallPattern ([P]: [P]: _) = (fsLit "stg_ap_pp", 2) +slowCallPattern ([P]: []: _) = (fsLit "stg_ap_pv", 2) +slowCallPattern ([P]: _) = (fsLit "stg_ap_p", 1) +slowCallPattern ([N]: _) = (fsLit "stg_ap_n", 1) +slowCallPattern ([F]: _) = (fsLit "stg_ap_f", 1) +slowCallPattern ([D]: _) = (fsLit "stg_ap_d", 1) +slowCallPattern ([L]: _) = (fsLit "stg_ap_l", 1) +slowCallPattern ([]: _) = (fsLit "stg_ap_v", 1) +slowCallPattern (rs: _) = (error "FIXME" rs, 1) +slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- @@ -215,19 +215,16 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) data ArgRep = P -- GC Ptr | N -- One-word non-ptr | L -- Two-word non-ptr (long) - | V -- Void | F -- Float | D -- Double instance Outputable ArgRep where ppr P = text "P" ppr N = text "N" ppr L = text "L" - ppr V = text "V" ppr F = text "F" ppr D = text "D" toArgRep :: PrimRep -> ArgRep -toArgRep VoidRep = V toArgRep PtrRep = P toArgRep IntRep = N toArgRep WordRep = N @@ -237,23 +234,15 @@ toArgRep Word64Rep = L toArgRep FloatRep = F toArgRep DoubleRep = D -isNonV :: ArgRep -> Bool -isNonV V = False -isNonV _ = True - -argsReps :: [StgArg] -> [ArgRep] -argsReps = map (toArgRep . argPrimRep) - argRepSizeW :: ArgRep -> WordOff -- Size in words argRepSizeW N = 1 argRepSizeW P = 1 argRepSizeW F = 1 argRepSizeW L = wORD64_SIZE `quot` wORD_SIZE argRepSizeW D = dOUBLE_SIZE `quot` wORD_SIZE -argRepSizeW V = 0 -idArgRep :: Id -> ArgRep -idArgRep = toArgRep . idPrimRep +idArgRep :: Id -> [ArgRep] +idArgRep = map toArgRep . idPrimRep ------------------------------------------------------------------------- ---- Laying out objects on the heap and stack @@ -275,7 +264,7 @@ mkVirtHeapOffsets -> [(PrimRep,a)] -- Things to make offsets for -> (WordOff, -- _Total_ number of words allocated WordOff, -- Number of words allocated for *pointers* - [(NonVoid a, VirtualHpOffset)]) + [(a, VirtualHpOffset)]) -- Things with their offsets from start of object in order of -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER @@ -288,8 +277,7 @@ mkVirtHeapOffsets -- than the unboxed things mkVirtHeapOffsets is_thunk things - = let non_void_things = filterOut (isVoidRep . fst) things - (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things + = let (ptrs, non_ptrs) = partition (isGcPtrRep . fst) things (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs in @@ -300,9 +288,9 @@ mkVirtHeapOffsets is_thunk things computeOffset wds_so_far (rep, thing) = (wds_so_far + argRepSizeW (toArgRep rep), - (NonVoid thing, hdr_size + wds_so_far)) + (thing, hdr_size + wds_so_far)) -mkVirtConstrOffsets :: [(PrimRep,a)] -> (WordOff, WordOff, [(NonVoid a, VirtualHpOffset)]) +mkVirtConstrOffsets :: [(PrimRep,a)] -> (WordOff, WordOff, [(a, VirtualHpOffset)]) -- Just like mkVirtHeapOffsets, but for constructors mkVirtConstrOffsets = mkVirtHeapOffsets False @@ -329,7 +317,7 @@ mkArgDescr _nm args Nothing -> return (ArgGen arg_bits) where arg_bits = argBits arg_reps - arg_reps = filter isNonV (map idArgRep args) + arg_reps = concatMap idArgRep args -- Getting rid of voids eases matching of standard patterns argBits :: [ArgRep] -> [Bool] -- True for non-ptr, False for ptr @@ -384,19 +372,20 @@ emitClosureProcAndInfoTable :: Bool -- top-level? -> Id -- name of the closure -> LambdaFormInfo -> CmmInfoTable - -> [NonVoid Id] -- incoming arguments + -> [Id] -- incoming arguments -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body -> FCode () emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body - = do { + = do { [node] <- idToReg bndr -- Bind the binder itself, but only if it's not a top-level -- binding. We need non-top let-bindings to refer to the -- top-level binding, which this binding would incorrectly shadow. - ; node <- if top_lvl then return $ idToReg (NonVoid bndr) - else bindToReg (NonVoid bndr) lf_info + ; unless top_lvl $ bindToReg bndr [(node, lf_info)] ; let node_points = nodeMustPointToIt lf_info - ; arg_regs <- bindArgsToRegs args - ; let args' = if node_points then (node : arg_regs) else arg_regs + ; args_regs <- mapM idToReg args + ; bindArgsToRegs (args `zip` args_regs) + ; let arg_regs = concat args_regs + args' = if node_points then (node : arg_regs) else arg_regs conv = if nodeMustPointToIt lf_info then NativeNodeCall else NativeDirectCall (offset, _) = mkCallEntry conv args' diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 71457c530c..80200f16bf 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -47,7 +47,7 @@ module StgCmmMonad ( getState, setState, getInfoDown, getDynFlags, getThisPackage, -- more localised access to monad state - CgIdInfo(..), CgLoc(..), + CgIdInfo(..), CgIdElemInfo(..), CgLoc(..), getBinds, setBinds, getStaticBinds, -- out of general friendliness, we also export ... @@ -178,11 +178,25 @@ data CgInfoDownwards -- information only passed *downwards* by the monad type CgBindings = IdEnv CgIdInfo data CgIdInfo - = CgIdInfo - { cg_id :: Id -- Id that this is the info for - -- Can differ from the Id at occurrence sites by - -- virtue of being externalised, for splittable C - , cg_lf :: LambdaFormInfo + = CgIdInfo + { cg_id :: Id -- Id that this is the info for + -- Can differ from the Id at occurrence sites by + -- virtue of being externalised, for splittable C + -- + -- This is only really meaningful for cases where the + -- IdInfo is a singleton, because only top-level names + -- get externalised and all top-level names are lifted. + -- However, we keep it around even in the other cases + -- as it is useful for debugging purposes. + , cg_elems :: [CgIdElemInfo] -- Info for each of the things the Id expands to during + -- code generation. Most Ids expand to a single thing, + -- but ones of void representation expand to nothing + -- and unboxed tuples expand to an arbitrary number. + } + +data CgIdElemInfo + = CgIdElemInfo + { cg_lf :: LambdaFormInfo , cg_loc :: CgLoc -- CmmExpr for the *tagged* value , cg_tag :: {-# UNPACK #-} !DynTag -- Cache for (lfDynTag cg_lf) } @@ -198,8 +212,12 @@ data CgLoc -- and branch to the block id instance PlatformOutputable CgIdInfo where - pprPlatform platform (CgIdInfo { cg_id = id, cg_loc = loc }) - = ppr id <+> ptext (sLit "-->") <+> pprPlatform platform loc + pprPlatform platform (CgIdInfo { cg_id = id, cg_elems = elems }) + = ppr id <+> ptext (sLit "-->") <+> hsep (map (pprPlatform platform) elems) + +instance PlatformOutputable CgIdElemInfo where + pprPlatform platform (CgIdElemInfo { cg_loc = loc }) + = pprPlatform platform loc instance PlatformOutputable CgLoc where pprPlatform platform (CmmLoc e) = ptext (sLit "cmm") <+> pprPlatform platform e diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 6518c5b5b0..b81479caa8 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -36,6 +36,7 @@ import Type ( Type, tyConAppTyCon ) import TyCon import CLabel import CmmUtils +import MonadUtils import PrimOp import SMRep import Constants @@ -61,7 +62,7 @@ might be a Haskell closure pointer, we don't want to evaluate it. -} ---------------------------------- cgOpApp :: StgOp -- The op -> [StgArg] -- Arguments - -> Type -- Result type (always an unboxed tuple) + -> Type -- Result type -> FCode () -- Foreign calls @@ -79,7 +80,7 @@ cgOpApp (StgFCallOp fcall _) stg_args res_ty cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty = ASSERT(isEnumerationTyCon tycon) - do { args' <- getNonVoidArgAmodes [arg] + do { args' <- getArgAmodes arg ; let amode = case args' of [amode] -> amode _ -> panic "TagToEnumOp had void arg" ; emitReturn [tagToClosure tycon amode] } @@ -91,25 +92,16 @@ cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty -- That won't work. tycon = tyConAppTyCon res_ty -cgOpApp (StgPrimOp primop) args res_ty +cgOpApp (StgPrimOp primop) args _res_ty | primOpOutOfLine primop - = do { cmm_args <- getNonVoidArgAmodes args + = do { cmm_args <- concatMapM getArgAmodes args ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop)) ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args } - | ReturnsPrim VoidRep <- result_info - = do cgPrimOp [] primop args - emitReturn [] - | ReturnsPrim rep <- result_info - = do res <- newTemp (primRepCmmType rep) - cgPrimOp [res] primop args - emitReturn [CmmReg (CmmLocal res)] - - | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon - = do (regs, _hints) <- newUnboxedTupleRegs res_ty - cgPrimOp regs primop args - emitReturn (map (CmmReg . CmmLocal) regs) + = do (res, _hints) <- newSequelRegs rep + cgPrimOp res primop args + emitReturn (map (CmmReg . CmmLocal) res) | ReturnsAlg tycon <- result_info , isEnumerationTyCon tycon @@ -124,7 +116,7 @@ cgOpApp (StgPrimOp primop) args res_ty result_info = getPrimOpResultInfo primop cgOpApp (StgPrimCallOp primcall) args _res_ty - = do { cmm_args <- getNonVoidArgAmodes args + = do { cmm_args <- concatMapM getArgAmodes args ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall)) ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args } @@ -135,7 +127,7 @@ cgPrimOp :: [LocalReg] -- where to put the results -> FCode () cgPrimOp results op args - = do arg_exprs <- getNonVoidArgAmodes args + = do arg_exprs <- concatMapM getArgAmodes args emitPrimOp results op arg_exprs diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index a6c592cfd8..0aa949b7a8 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -36,7 +36,7 @@ module StgCmmTicky ( tickyUpdateBhCaf, tickyBlackHole, - tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyUnboxedTupleReturn, tickyReturnOldCon, tickyReturnNewCon, tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, @@ -67,6 +67,7 @@ import BasicTypes import FastString import Constants import Outputable +import Maybes import DynFlags @@ -76,8 +77,6 @@ import TcType import Type import TyCon -import Data.Maybe - ----------------------------------------------------------------------------- -- -- Ticky-ticky profiling @@ -205,16 +204,11 @@ tickyReturnNewCon arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr") ; bumpHistogram (fsLit "RET_NEW_hst") arity } -tickyUnboxedTupleReturn :: Int -> FCode () +tickyUnboxedTupleReturn :: Arity -> FCode () tickyUnboxedTupleReturn arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr") ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity } -tickyVectoredReturn :: Int -> FCode () -tickyVectoredReturn family_size - = ifTicky $ do { bumpTickyCounter (fsLit "VEC_RETURN_ctr") - ; bumpHistogram (fsLit "RET_VEC_RETURN_hst") family_size } - -- ----------------------------------------------------------------------------- -- Ticky calls @@ -223,7 +217,7 @@ tickyDirectCall :: Arity -> [StgArg] -> FCode () tickyDirectCall arity args | arity == length args = tickyKnownCallExact | otherwise = do tickyKnownCallExtraArgs - tickySlowCallPat (map argPrimRep (drop arity args)) + tickySlowCallPat (concatMap argPrimRep (drop arity args)) tickyKnownCallTooFewArgs :: FCode () tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_TOO_FEW_ARGS_ctr") @@ -246,7 +240,7 @@ tickySlowCall lf_info args = do { if (isKnownFun lf_info) then tickyKnownCallTooFewArgs else tickyUnknownCall - ; tickySlowCallPat (map argPrimRep args) } + ; tickySlowCallPat (concatMap argPrimRep args) } tickySlowCallPat :: [PrimRep] -> FCode () tickySlowCallPat _args = return () diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index c3327138b3..9a2e82daf5 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -20,7 +20,7 @@ module StgCmmUtils ( emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, emitRtsCallGen, assignTemp, newTemp, withTemp, - newUnboxedTupleRegs, + newUnboxedTupleRegs, newSequelRegs, mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch, emitSwitch, @@ -447,25 +447,28 @@ newTemp rep = do { uniq <- newUnique ; return (LocalReg uniq rep) } newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint]) --- Choose suitable local regs to use for the components --- of an unboxed tuple that we are about to return to --- the Sequel. If the Sequel is a join point, using the --- regs it wants will save later assignments. newUnboxedTupleRegs res_ty = ASSERT( isUnboxedTupleType res_ty ) - do { sequel <- getSequel - ; regs <- choose_regs sequel - ; ASSERT( regs `equalLength` reps ) - return (regs, map primRepForeignHint reps) } + newSequelRegs reps where ty_args = tyConAppArgs (repType res_ty) reps = [ rep | ty <- ty_args - , let rep = typePrimRep ty - , not (isVoidRep rep) ] - choose_regs (AssignTo regs _) = return regs - choose_regs _other = mapM (newTemp . primRepCmmType) reps + , rep <- typePrimRep ty ] +newSequelRegs :: [PrimRep] -> FCode ([LocalReg], [ForeignHint]) +-- Choose suitable local regs to use for the components +-- of e.g. an unboxed tuple that we are about to return to +-- the Sequel. If the Sequel is a join point, using the +-- regs it wants will save later assignments. +newSequelRegs reps + = do { sequel <- getSequel + ; regs <- choose_regs sequel + ; ASSERT( regs `equalLength` reps ) + return (regs, map primRepForeignHint reps) } + where + choose_regs (AssignTo regs _) = return regs + choose_regs _other = mapM (newTemp . primRepCmmType) reps ------------------------------------------------------------------------- |