diff options
Diffstat (limited to 'compiler/codeGen')
30 files changed, 1014 insertions, 931 deletions
diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs index 198e192f5c..12fae4888b 100644 --- a/compiler/codeGen/CgBindery.lhs +++ b/compiler/codeGen/CgBindery.lhs @@ -7,14 +7,16 @@ \begin{code} module CgBindery ( - CgBindings, CgIdInfo, + CgBindings, CgIdInfo, CgIdElemInfo, StableLoc, VolatileLoc, - cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF, + cgIdInfoId, cgIdInfoElems, cgIdElemInfoArgRep, cgIdElemInfoLF, + cgIdInfoSingleElem, stableIdInfo, heapIdInfo, taggedStableIdInfo, taggedHeapIdInfo, - letNoEscapeIdInfo, idInfoToAmode, + letNoEscapeIdInfo, + idInfoToAmodes, idElemInfoToAmode, addBindC, addBindsC, @@ -23,15 +25,17 @@ module CgBindery ( getLiveStackSlots, getLiveStackBindings, - bindArgsToStack, rebindToStack, - bindNewToNode, bindNewToUntagNode, bindNewToReg, bindArgsToRegs, - bindNewToTemp, - getArgAmode, getArgAmodes, + rebindToStack, bindArgsToRegOrStack, + bindNewToNode, bindNewToUntagNode, bindNewToReg, + bindNewToTemp, bindToRegs, + getArgAmodes, getCgIdInfo, - getCAddrModeIfVolatile, getVolatileRegs, + getVolatilesCAddrModes, getVolatileRegs, maybeLetNoEscape, ) where +#include "HsVersions.h" + import CgMonad import CgHeapery import CgStackery @@ -55,6 +59,11 @@ import Unique import UniqSet import Outputable import FastString +import Util +import UniqSupply + +import Control.Monad +import Data.List \end{code} @@ -80,36 +89,32 @@ data CgIdInfo { cg_id :: Id -- Id that this is the info for -- Can differ from the Id at occurrence sites by -- virtue of being externalised, for splittable C - , cg_rep :: CgRep + , cg_elems :: [CgIdElemInfo] + } + +data CgIdElemInfo + = CgIdElemInfo + { cg_rep :: CgRep , cg_vol :: VolatileLoc , cg_stb :: StableLoc , cg_lf :: LambdaFormInfo , cg_tag :: {-# UNPACK #-} !Int -- tag to be added in idInfoToAmode } +-- Used only for Id with a guaranteed-unary CgRep mkCgIdInfo :: Id -> VolatileLoc -> StableLoc -> LambdaFormInfo -> CgIdInfo mkCgIdInfo id vol stb lf - = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, - cg_lf = lf, cg_rep = idCgRep id, cg_tag = tag } + = CgIdInfo { cg_id = id + , cg_elems = [mkCgIdElemInfo rep vol stb lf] + } where - tag - | Just con <- isDataConWorkId_maybe id, - {- Is this an identifier for a static constructor closure? -} - isNullaryRepDataCon con - {- If yes, is this a nullary constructor? - If yes, we assume that the constructor is evaluated and can - be tagged. - -} - = tagForCon con + rep = case idCgRep id of [rep] -> rep; _ -> panic "mkCgIdInfo" - | otherwise - = funTagLFInfo lf - -voidIdInfo :: Id -> CgIdInfo -voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc - , cg_stb = VoidLoc, cg_lf = mkLFArgument id - , cg_rep = VoidArg, cg_tag = 0 } - -- Used just for VoidRep things +mkCgIdElemInfo :: CgRep -> VolatileLoc -> StableLoc -> LambdaFormInfo -> CgIdElemInfo +mkCgIdElemInfo rep vol stb lf + = CgIdElemInfo { cg_vol = vol, cg_stb = stb + , cg_lf = lf, cg_rep = rep, cg_tag = funTagLFInfo lf } + where data VolatileLoc -- These locations die across a call = NoVolatileLoc @@ -120,11 +125,13 @@ data VolatileLoc -- These locations die across a call -- NB. Byte offset, because we subtract R1's -- tag from the offset. -mkTaggedCgIdInfo :: Id -> VolatileLoc -> StableLoc -> LambdaFormInfo -> DataCon - -> CgIdInfo -mkTaggedCgIdInfo id vol stb lf con - = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, - cg_lf = lf, cg_rep = idCgRep id, cg_tag = tagForCon con } +-- Used only for Id with a guaranteed-unary CgRep +mkTaggedCgIdElemInfo :: Id -> VolatileLoc -> StableLoc -> LambdaFormInfo -> DataCon + -> CgIdElemInfo +mkTaggedCgIdElemInfo id vol stb lf con + = CgIdElemInfo { cg_rep = rep, cg_vol = vol, cg_stb = stb + , cg_lf = lf, cg_tag = tagForCon con } + where rep = case idCgRep id of [rep] -> rep; _ -> panic "mkTaggedCgIdElemInfo" \end{code} @StableLoc@ encodes where an Id can be found, used by @@ -142,14 +149,15 @@ data StableLoc -- (as opposed to the contents of the slot) | StableLoc CmmExpr - | VoidLoc -- Used only for VoidRep variables. They never need to - -- be saved, so it makes sense to treat treat them as - -- having a stable location instance PlatformOutputable CgIdInfo where - pprPlatform platform (CgIdInfo id _ vol stb _ _) + pprPlatform platform (CgIdInfo id elems) + = ppr id <+> ptext (sLit "-->") <+> vcat (map (pprPlatform platform) elems) + +instance PlatformOutputable CgIdElemInfo where + pprPlatform platform (CgIdElemInfo _ vol stb _ _) -- TODO, pretty pring the tag info - = ppr id <+> ptext (sLit "-->") <+> vcat [ppr vol, pprPlatform platform stb] + = vcat [ppr vol, pprPlatform platform stb] instance Outputable VolatileLoc where ppr NoVolatileLoc = empty @@ -159,7 +167,6 @@ instance Outputable VolatileLoc where instance PlatformOutputable StableLoc where pprPlatform _ NoStableLoc = empty - pprPlatform _ VoidLoc = ptext (sLit "void") pprPlatform _ (VirStkLoc v) = ptext (sLit "vs") <+> ppr v pprPlatform _ (VirStkLNE v) = ptext (sLit "lne") <+> ppr v pprPlatform platform (StableLoc a) = ptext (sLit "amode") <+> pprPlatform platform a @@ -181,31 +188,33 @@ heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf letNoEscapeIdInfo :: Id -> VirtualSpOffset -> LambdaFormInfo -> CgIdInfo letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info -stackIdInfo :: Id -> VirtualSpOffset -> LambdaFormInfo -> CgIdInfo -stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info +stackIdElemInfo :: CgRep -> VirtualSpOffset -> LambdaFormInfo -> CgIdElemInfo +stackIdElemInfo rep sp lf_info = mkCgIdElemInfo rep NoVolatileLoc (VirStkLoc sp) lf_info + +nodeIdElemInfo :: CgRep -> VirtualHpOffset -> LambdaFormInfo -> CgIdElemInfo +nodeIdElemInfo rep offset lf_info = mkCgIdElemInfo rep (VirNodeLoc (wORD_SIZE*offset)) NoStableLoc lf_info -nodeIdInfo :: Id -> Int -> LambdaFormInfo -> CgIdInfo -nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset)) NoStableLoc lf_info +untagNodeIdElemInfo :: CgRep -> VirtualHpOffset -> LambdaFormInfo -> Int -> CgIdElemInfo +untagNodeIdElemInfo rep offset lf_info tag + = mkCgIdElemInfo rep (VirNodeLoc (wORD_SIZE*offset - tag)) NoStableLoc lf_info -regIdInfo :: Id -> CmmReg -> LambdaFormInfo -> CgIdInfo -regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info +regIdElemInfo :: CgRep -> CmmReg -> LambdaFormInfo -> CgIdElemInfo +regIdElemInfo rep reg lf_info = mkCgIdElemInfo rep (RegLoc reg) NoStableLoc lf_info taggedStableIdInfo :: Id -> CmmExpr -> LambdaFormInfo -> DataCon -> CgIdInfo taggedStableIdInfo id amode lf_info con - = mkTaggedCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info con + = CgIdInfo id [mkTaggedCgIdElemInfo id NoVolatileLoc (StableLoc amode) lf_info con] -taggedHeapIdInfo :: Id -> VirtualHpOffset -> LambdaFormInfo -> DataCon - -> CgIdInfo +taggedHeapIdInfo :: Id -> VirtualHpOffset -> LambdaFormInfo -> DataCon -> CgIdInfo taggedHeapIdInfo id offset lf_info con - = mkTaggedCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info con + = CgIdInfo id [mkTaggedCgIdElemInfo id (VirHpLoc offset) NoStableLoc lf_info con] -untagNodeIdInfo :: Id -> Int -> LambdaFormInfo -> Int -> CgIdInfo -untagNodeIdInfo id offset lf_info tag - = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset - tag)) NoStableLoc lf_info +idInfoToAmodes :: CgIdInfo -> FCode [CmmExpr] +idInfoToAmodes = mapM idElemInfoToAmode . cg_elems -idInfoToAmode :: CgIdInfo -> FCode CmmExpr -idInfoToAmode info +idElemInfoToAmode :: CgIdElemInfo -> FCode CmmExpr +idElemInfoToAmode info = case cg_vol info of { RegLoc reg -> returnFC (CmmReg reg) ; VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetB (CmmReg nodeReg) nd_off) @@ -221,12 +230,7 @@ idInfoToAmode info VirStkLNE sp_off -> getSpRelOffset sp_off - VoidLoc -> return $ pprPanic "idInfoToAmode: void" (ppr (cg_id info)) - -- We return a 'bottom' amode, rather than panicing now - -- In this way getArgAmode returns a pair of (VoidArg, bottom) - -- and that's exactly what we want - - NoStableLoc -> pprPanic "idInfoToAmode: no loc" (ppr (cg_id info)) + NoStableLoc -> panic "idInfoToAmode: no loc" } where mach_rep = argMachRep (cg_rep info) @@ -239,15 +243,22 @@ idInfoToAmode info cgIdInfoId :: CgIdInfo -> Id cgIdInfoId = cg_id -cgIdInfoLF :: CgIdInfo -> LambdaFormInfo -cgIdInfoLF = cg_lf +cgIdInfoElems :: CgIdInfo -> [CgIdElemInfo] +cgIdInfoElems = cg_elems + +cgIdInfoSingleElem :: String -> CgIdInfo -> CgIdElemInfo +cgIdInfoSingleElem _ (CgIdInfo { cg_elems = [elem] }) = elem +cgIdInfoSingleElem msg _ = panic $ "cgIdInfoSingleElem: " ++ msg -cgIdInfoArgRep :: CgIdInfo -> CgRep -cgIdInfoArgRep = cg_rep +cgIdElemInfoLF :: CgIdElemInfo -> LambdaFormInfo +cgIdElemInfoLF = cg_lf -maybeLetNoEscape :: CgIdInfo -> Maybe VirtualSpOffset -maybeLetNoEscape (CgIdInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off -maybeLetNoEscape _ = Nothing +cgIdElemInfoArgRep :: CgIdElemInfo -> CgRep +cgIdElemInfoArgRep = cg_rep + +maybeLetNoEscape :: CgIdElemInfo -> Maybe VirtualSpOffset +maybeLetNoEscape (CgIdElemInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off +maybeLetNoEscape _ = Nothing \end{code} %************************************************************************ @@ -262,6 +273,17 @@ There are three basic routines, for adding (@addBindC@), modifying A @Id@ is bound to a @(VolatileLoc, StableLoc)@ triple. The name should not already be bound. (nice ASSERT, eh?) +Note [CgIdInfo knot] +~~~~~~~~~~~~~~~~~~~~ + +We can't be too strict in the CgIdInfo, because in e.g. letrecs the CgIdInfo +is knot-tied. A loop I build in practice was + cgExpr LetRec -> cgRhs StgRhsCon -> buildDynCon' +from code like (let xs = (:) y xs in xs) because we fixpoint the CgIdInfo for +xs and buildDynCon' is strict in the length of the CgIdElemInfo list. + +To work around this we try to be yield the length of the CgIdInfo element list +lazily by lazily zipping it with the idCgReps. \begin{code} addBindC :: Id -> CgIdInfo -> Code addBindC name stuff_to_bind = do @@ -281,9 +303,17 @@ modifyBindC name mangle_fn = do binds <- getBinds setBinds $ modifyVarEnv mangle_fn binds name +-- See: Note [CgIdInfo knot] +etaCgIdInfo :: Id -> CgIdInfo -> CgIdInfo +etaCgIdInfo id ~(CgIdInfo { cg_id = lazy_id, cg_elems = elems }) + = CgIdInfo { cg_id = lazy_id + , cg_elems = zipLazyWith (showPpr (id, idCgRep id, length elems)) (\_ elem -> elem) (idCgRep id) elems } + +-- Note eta-expansion of CgIdInfo: getCgIdInfo :: Id -> FCode CgIdInfo getCgIdInfo id - = do { -- Try local bindings first + = liftM (etaCgIdInfo id) $ + do { -- Try local bindings first ; local_binds <- getBinds ; case lookupVarEnv local_binds id of { Just info -> return info ; @@ -301,11 +331,9 @@ getCgIdInfo id in if isExternalName name then do let ext_lbl = CmmLit (CmmLabel (mkClosureLabel name $ idCafInfo id)) - return (stableIdInfo id ext_lbl (mkLFImported id)) - else - if isVoidArg (idCgRep id) then - -- Void things are never in the environment - return (voidIdInfo id) + return $ case mkLFImported id of + Nothing -> CgIdInfo id [] + Just lf_info -> stableIdInfo id ext_lbl lf_info else -- Bug cgLookupPanic id @@ -339,11 +367,17 @@ we don't leave any (NoVolatile, NoStable) binds around... \begin{code} nukeVolatileBinds :: CgBindings -> CgBindings nukeVolatileBinds binds - = mkVarEnv (foldr keep_if_stable [] (varEnvElts binds)) + = mkVarEnv (foldr (\info acc -> case keep_if_stable (cg_elems info) of Just infos -> (cg_id info, info { cg_elems = infos }) : acc; Nothing -> acc) [] (varEnvElts binds)) where - keep_if_stable (CgIdInfo { cg_stb = NoStableLoc }) acc = acc - keep_if_stable info acc - = (cg_id info, info { cg_vol = NoVolatileLoc }) : acc + has_no_stable_loc (CgIdElemInfo { cg_stb = NoStableLoc }) = True + has_no_stable_loc _ = False + + keep_if_stable infos + | any has_no_stable_loc infos + = ASSERT(all has_no_stable_loc infos) + Nothing + | otherwise + = Just (map (\info -> info { cg_vol = NoVolatileLoc }) infos) \end{code} @@ -354,14 +388,13 @@ nukeVolatileBinds binds %************************************************************************ \begin{code} -getCAddrModeIfVolatile :: Id -> FCode (Maybe CmmExpr) -getCAddrModeIfVolatile id +getVolatilesCAddrModes :: Id -> FCode [Maybe (CgRep, CmmExpr)] +getVolatilesCAddrModes id = do { info <- getCgIdInfo id - ; case cg_stb info of - NoStableLoc -> do -- Aha! So it is volatile! - amode <- idInfoToAmode info - return $ Just amode - _ -> return Nothing } + ; forM (cg_elems info) $ \elem_info -> case cg_stb elem_info of + NoStableLoc -> liftM (\expr -> Just (cg_rep elem_info, expr)) + (idElemInfoToAmode elem_info) + _ -> return Nothing } \end{code} @getVolatileRegs@ gets a set of live variables, and returns a list of @@ -375,51 +408,39 @@ forget the volatile one. getVolatileRegs :: StgLiveVars -> FCode [GlobalReg] getVolatileRegs vars = do do { stuff <- mapFCs snaffle_it (varSetElems vars) - ; returnFC $ catMaybes stuff } + ; returnFC $ concat stuff } where snaffle_it var = do { info <- getCgIdInfo var - ; let - -- commoned-up code... - consider_reg reg - = -- We assume that all regs can die across C calls - -- We leave it to the save-macros to decide which - -- regs *really* need to be saved. - case cg_stb info of - NoStableLoc -> returnFC (Just reg) -- got one! - _ -> do - { -- has both volatile & stable locations; - -- force it to rely on the stable location - modifyBindC var nuke_vol_bind - ; return Nothing } - - ; case cg_vol info of - RegLoc (CmmGlobal reg) -> consider_reg reg - VirNodeLoc _ -> consider_reg node - _ -> returnFC Nothing -- Local registers + ; let (vol_regs, elems') = unzip $ flip map (cg_elems info) $ \elem_info -> + let -- commoned-up code... + consider_reg reg + = -- We assume that all regs can die across C calls + -- We leave it to the save-macros to decide which + -- regs *really* need to be saved. + case cg_stb elem_info of + NoStableLoc -> (Just reg, elem_info) -- got one! + -- has both volatile & stable locations; + -- force it to rely on the stable location + _ -> (Nothing, elem_info { cg_vol = NoVolatileLoc }) + in case cg_vol elem_info of + RegLoc (CmmGlobal reg) -> consider_reg reg + VirNodeLoc _ -> consider_reg node + _ -> (Nothing, elem_info) -- Local registers + ; modifyBindC var (const info { cg_elems = elems' }) + ; return (catMaybes vol_regs) } - nuke_vol_bind info = info { cg_vol = NoVolatileLoc } - -getArgAmode :: StgArg -> FCode (CgRep, CmmExpr) -getArgAmode (StgVarArg var) +getArgAmodes :: StgArg -> FCode [(CgRep, CmmExpr)] +getArgAmodes (StgVarArg var) = do { info <- getCgIdInfo var - ; amode <- idInfoToAmode info - ; return (cgIdInfoArgRep info, amode ) } - -getArgAmode (StgLitArg lit) + ; forM (cg_elems info) $ \elem_info -> do + amode <- idElemInfoToAmode elem_info + return (cg_rep elem_info, amode) } +getArgAmodes (StgLitArg lit) = do { cmm_lit <- cgLit lit - ; return (typeCgRep (literalType lit), CmmLit cmm_lit) } - -getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg" - -getArgAmodes :: [StgArg] -> FCode [(CgRep, CmmExpr)] -getArgAmodes [] = returnFC [] -getArgAmodes (atom:atoms) - | isStgTypeArg atom = getArgAmodes atoms - | otherwise = do { amode <- getArgAmode atom - ; amodes <- getArgAmodes atoms - ; return ( amode : amodes ) } + ; return $ zipEqual "getArgAmodes" (typeCgRep (literalType lit)) [CmmLit cmm_lit] } +getArgAmodes (StgTypeArg _) = return [] \end{code} %************************************************************************ @@ -429,50 +450,60 @@ getArgAmodes (atom:atoms) %************************************************************************ \begin{code} -bindArgsToStack :: [(Id, VirtualSpOffset)] -> Code -bindArgsToStack args - = mapCs bind args - where - bind(id, offset) = addBindC id (stackIdInfo id offset (mkLFArgument id)) - -bindArgsToRegs :: [(Id, GlobalReg)] -> Code -bindArgsToRegs args - = mapCs bind args +bindArgsToRegOrStack :: [(Id, [Either GlobalReg VirtualSpOffset])] -> Code +bindArgsToRegOrStack = mapCs bind where - bind (arg, reg) = bindNewToReg arg (CmmGlobal reg) (mkLFArgument arg) - -bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code -bindNewToNode id offset lf_info - = addBindC id (nodeIdInfo id offset lf_info) - -bindNewToUntagNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Int -> Code -bindNewToUntagNode id offset lf_info tag - = addBindC id (untagNodeIdInfo id offset lf_info tag) + bind (id, ei_reg_offs) = addBindC id $ CgIdInfo id $ + zipWith3Equal "bindArgsToRegOrStack" + (\rep lf_info ei_reg_off -> case ei_reg_off of + Left reg -> regIdElemInfo rep (CmmGlobal reg) lf_info + Right off -> stackIdElemInfo rep off lf_info) + (idCgRep id) (mkLFArgument (idType id)) ei_reg_offs + +bindNewToNode :: Id -> [(VirtualHpOffset, LambdaFormInfo)] -> Code +bindNewToNode id offset_lf_infos + = addBindC id (CgIdInfo id $ zipWithEqual "bindNewToNode" (\rep (offset, lf_info) -> nodeIdElemInfo rep offset lf_info) (idCgRep id) offset_lf_infos) + +-- NB: the tag is for the *node*, not the thing we load from it, so it is shared amongst elements +bindNewToUntagNode :: Id -> [(VirtualHpOffset, LambdaFormInfo)] -> Int -> Code +bindNewToUntagNode id offset_lf_infos tag + = addBindC id (CgIdInfo id $ zipWithEqual "bindNewToUntagNode" (\rep (offset, lf_info) -> untagNodeIdElemInfo rep offset lf_info tag) (idCgRep id) offset_lf_infos) + +idRegs :: Id -> FCode [LocalReg] +idRegs id = do + us <- newUniqSupply + let cg_reps = idCgRep id + temp_regs = zipWith LocalReg (getUnique id : uniqsFromSupply us) (map argMachRep cg_reps) + return temp_regs -- Create a new temporary whose unique is that in the id, -- bind the id to it, and return the addressing mode for the -- temporary. -bindNewToTemp :: Id -> FCode LocalReg +bindNewToTemp :: Id -> FCode [LocalReg] bindNewToTemp id - = do addBindC id (regIdInfo id (CmmLocal temp_reg) lf_info) - return temp_reg + = do temp_regs <- idRegs id + bindToRegs id temp_regs + return temp_regs + +bindToRegs :: Id -> [LocalReg] -> FCode () +bindToRegs id temp_regs + = addBindC id $ CgIdInfo id $ zipWith3Equal "bindNewToTemp" (\rep temp_reg lf_info -> regIdElemInfo rep (CmmLocal temp_reg) lf_info) (idCgRep id) temp_regs lf_infos where - uniq = getUnique id - temp_reg = LocalReg uniq (argMachRep (idCgRep id)) - lf_info = mkLFArgument id -- Always used of things we - -- know nothing about - -bindNewToReg :: Id -> CmmReg -> LambdaFormInfo -> Code -bindNewToReg name reg lf_info - = addBindC name info + lf_infos = mkLFArgument (idType id) -- Always used of things we + -- know nothing about + +bindNewToReg :: Id -> [(CmmReg, LambdaFormInfo)] -> Code +bindNewToReg name regs_lf_infos + = addBindC name (CgIdInfo name elem_infos) where - info = mkCgIdInfo name (RegLoc reg) NoStableLoc lf_info + elem_infos = zipWithEqual "bindNewToReg" (\rep (reg, lf_info) -> regIdElemInfo rep reg lf_info) + (idCgRep name) regs_lf_infos -rebindToStack :: Id -> VirtualSpOffset -> Code -rebindToStack name offset +rebindToStack :: Id -> [Maybe VirtualSpOffset] -> Code +rebindToStack name offsets = modifyBindC name replace_stable_fn where - replace_stable_fn info = info { cg_stb = VirStkLoc offset } + replace_stable_fn info = info { cg_elems = zipWithEqual "rebindToStack" (\elem_info mb_offset -> case mb_offset of Just offset -> elem_info { cg_stb = VirStkLoc offset }; Nothing -> elem_info) (cg_elems info) offsets } \end{code} %************************************************************************ @@ -503,7 +534,7 @@ nukeDeadBindings live_vars = do binds <- getBinds let (dead_stk_slots, bs') = dead_slots live_vars - [] [] + [] [] [] [ (cg_id b, b) | b <- varEnvElts binds ] setBinds $ mkVarEnv bs' freeStackSlots dead_stk_slots @@ -511,50 +542,56 @@ nukeDeadBindings live_vars = do Several boring auxiliary functions to do the dirty work. +Note that some stack slots can be mentioned in *more than one* CgIdInfo. +This commonly happens where the stack slots for the case binders of an +unboxed tuple case are a subset of the stack slots for the unboxed tuple case binder. + \begin{code} dead_slots :: StgLiveVars -> [(Id,CgIdInfo)] -> [VirtualSpOffset] + -> [VirtualSpOffset] -> [(Id,CgIdInfo)] -> ([VirtualSpOffset], [(Id,CgIdInfo)]) -- dead_slots carries accumulating parameters for --- filtered bindings, dead slots -dead_slots _ fbs ds [] - = (ds, reverse fbs) -- Finished; rm the dups, if any +-- filtered bindings, possibly-dead slots, live slots +dead_slots _ fbs ds ls [] + = (ds \\ ls, reverse fbs) -- Finished; rm the dups, if any -dead_slots live_vars fbs ds ((v,i):bs) +dead_slots live_vars fbs ds ls ((v,i):bs) | v `elementOfUniqSet` live_vars - = dead_slots live_vars ((v,i):fbs) ds bs + = dead_slots live_vars ((v,i):fbs) ds (infoLiveSlots i ++ ls) bs -- Live, so don't record it in dead slots -- Instead keep it in the filtered bindings | otherwise - = case cg_stb i of - VirStkLoc offset - | size > 0 - -> dead_slots live_vars fbs ([offset-size+1 .. offset] ++ ds) bs + = dead_slots live_vars fbs (infoLiveSlots i ++ ds) ls bs - _ -> dead_slots live_vars fbs ds bs - where - size :: WordOff - size = cgRepSizeW (cg_rep i) +infoLiveSlots :: CgIdInfo -> [WordOff] +infoLiveSlots i = [free | elem_i <- cg_elems i + , VirStkLoc offset <- [cg_stb elem_i] + , let size = cgRepSizeW (cg_rep elem_i) :: WordOff + , size > 0 + , free <- [offset-size+1 .. offset]] getLiveStackSlots :: FCode [VirtualSpOffset] -- Return the offsets of slots in stack containig live pointers getLiveStackSlots = do { binds <- getBinds - ; return [off | CgIdInfo { cg_stb = VirStkLoc off, - cg_rep = rep } <- varEnvElts binds, - isFollowableArg rep] } + ; return [off | info <- varEnvElts binds + , CgIdElemInfo { cg_stb = VirStkLoc off + , cg_rep = rep } <- cg_elems info + , isFollowableArg rep] } -getLiveStackBindings :: FCode [(VirtualSpOffset, CgIdInfo)] +getLiveStackBindings :: FCode [(VirtualSpOffset, CgRep)] getLiveStackBindings = do { binds <- getBinds - ; return [(off, bind) | - bind <- varEnvElts binds, - CgIdInfo { cg_stb = VirStkLoc off, - cg_rep = rep} <- [bind], + ; return [(off, rep) | + info <- varEnvElts binds, + elem_info <- cg_elems info, + CgIdElemInfo { cg_stb = VirStkLoc off, + cg_rep = rep} <- [elem_info], isFollowableArg rep] } \end{code} diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index c65194b62f..958f65ea09 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -36,7 +36,7 @@ import CLabel import Constants import CgStackery -import ClosureInfo( CgRep(..), nonVoidArg, idCgRep, cgRepSizeW, isFollowableArg ) +import ClosureInfo( CgRep(..), idCgRep, cgRepSizeW, isFollowableArg ) import OldCmmUtils import Maybes import Id @@ -45,7 +45,6 @@ import Util import StaticFlags import Module import FastString -import Outputable import Data.Bits ------------------------------------------------------------------------- @@ -71,8 +70,7 @@ mkArgDescr _nm args Nothing -> return (ArgGen arg_bits) where arg_bits = argBits arg_reps - arg_reps = filter nonVoidArg (map idCgRep args) - -- Getting rid of voids eases matching of standard patterns + arg_reps = concatMap idCgRep args argBits :: [CgRep] -> [Bool] -- True for non-ptr, False for ptr argBits [] = [] @@ -118,7 +116,7 @@ stdPattern _ = Nothing -- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS(). ------------------------------------------------------------------------- -mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord +mkRegLiveness :: [(CgRep, GlobalReg)] -> Int -> Int -> StgWord mkRegLiveness regs ptrs nptrs = (fromIntegral nptrs `shiftL` 16) .|. (fromIntegral ptrs `shiftL` 24) .|. @@ -127,7 +125,7 @@ mkRegLiveness regs ptrs nptrs all_non_ptrs = 0xff reg_bits [] = 0 - reg_bits ((id, VanillaReg i _) : regs) | isFollowableArg (idCgRep id) + reg_bits ((cgrep, VanillaReg i _) : regs) | isFollowableArg cgrep = (1 `shiftL` (i - 1)) .|. reg_bits regs reg_bits (_ : regs) = reg_bits regs @@ -141,10 +139,10 @@ mkRegLiveness regs ptrs nptrs -- For a slow call, we must take a bunch of arguments and intersperse -- some stg_ap_<pattern>_ret_info return addresses. constructSlowCall - :: [(CgRep,CmmExpr)] + :: [[(CgRep,CmmExpr)]] -> (CLabel, -- RTS entry point for call [(CgRep,CmmExpr)], -- args to pass to the entry point - [(CgRep,CmmExpr)]) -- stuff to save on the stack + [[(CgRep,CmmExpr)]]) -- stuff to save on the stack -- don't forget the zero case constructSlowCall [] @@ -159,7 +157,7 @@ constructSlowCall amodes -- | 'slowArgs' takes a list of function arguments and prepares them for -- pushing on the stack for "extra" arguments to a function which requires -- fewer arguments than we currently have. -slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)] +slowArgs :: [[(CgRep,CmmExpr)]] -> [(CgRep,CmmExpr)] slowArgs [] = [] slowArgs amodes | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest @@ -171,29 +169,30 @@ slowArgs amodes save_cccs = [(NonPtrArg, mkLblExpr save_cccs_lbl), (NonPtrArg, curCCS)] save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") -matchSlowPattern :: [(CgRep,CmmExpr)] - -> (FastString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)]) -matchSlowPattern amodes = (arg_pat, these, rest) - where (arg_pat, n) = slowCallPattern (map fst amodes) +matchSlowPattern :: [[(CgRep,CmmExpr)]] + -> (FastString, [(CgRep,CmmExpr)], [[(CgRep,CmmExpr)]]) +matchSlowPattern amodes = (arg_pat, concat these, rest) + where (arg_pat, n) = slowCallPattern (map (map fst) amodes) (these, rest) = splitAt n amodes -- These cases were found to cover about 99% of all slow calls: -slowCallPattern :: [CgRep] -> (FastString, Int) -slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (fsLit "stg_ap_pppppp", 6) -slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (fsLit "stg_ap_ppppp", 5) -slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: _) = (fsLit "stg_ap_pppp", 4) -slowCallPattern (PtrArg: PtrArg: PtrArg: VoidArg: _) = (fsLit "stg_ap_pppv", 4) -slowCallPattern (PtrArg: PtrArg: PtrArg: _) = (fsLit "stg_ap_ppp", 3) -slowCallPattern (PtrArg: PtrArg: VoidArg: _) = (fsLit "stg_ap_ppv", 3) -slowCallPattern (PtrArg: PtrArg: _) = (fsLit "stg_ap_pp", 2) -slowCallPattern (PtrArg: VoidArg: _) = (fsLit "stg_ap_pv", 2) -slowCallPattern (PtrArg: _) = (fsLit "stg_ap_p", 1) -slowCallPattern (VoidArg: _) = (fsLit "stg_ap_v", 1) -slowCallPattern (NonPtrArg: _) = (fsLit "stg_ap_n", 1) -slowCallPattern (FloatArg: _) = (fsLit "stg_ap_f", 1) -slowCallPattern (DoubleArg: _) = (fsLit "stg_ap_d", 1) -slowCallPattern (LongArg: _) = (fsLit "stg_ap_l", 1) -slowCallPattern _ = panic "CgStackery.slowCallPattern" +slowCallPattern :: [[CgRep]] -> (FastString, Int) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_pppppp", 6) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_ppppp", 5) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_pppp", 4) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: []: _) = (fsLit "stg_ap_pppv", 4) +slowCallPattern ([PtrArg]: [PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_ppp", 3) +slowCallPattern ([PtrArg]: [PtrArg]: []: _) = (fsLit "stg_ap_ppv", 3) +slowCallPattern ([PtrArg]: [PtrArg]: _) = (fsLit "stg_ap_pp", 2) +slowCallPattern ([PtrArg]: []: _) = (fsLit "stg_ap_pv", 2) +slowCallPattern ([PtrArg]: _) = (fsLit "stg_ap_p", 1) +slowCallPattern ([NonPtrArg]: _) = (fsLit "stg_ap_n", 1) +slowCallPattern ([FloatArg]: _) = (fsLit "stg_ap_f", 1) +slowCallPattern ([DoubleArg]: _) = (fsLit "stg_ap_d", 1) +slowCallPattern ([LongArg]: _) = (fsLit "stg_ap_l", 1) +slowCallPattern ([]: _) = (fsLit "stg_ap_v", 1) +slowCallPattern (rs: _) = (error "FIXME" rs, 1) +slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- -- @@ -207,7 +206,6 @@ dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1 VNonGcPtr) dataReturnConvPrim LongArg = CmmGlobal (LongReg 1) dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1) dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1) -dataReturnConvPrim VoidArg = panic "dataReturnConvPrim: void" -- getSequelAmode returns an amode which refers to an info table. The info @@ -281,14 +279,12 @@ assignReturnRegs args -- Also, the bytecode compiler assumes this when compiling -- case expressions and ccalls, so it only needs to know one set of -- return conventions. - | [(rep,arg)] <- non_void_args, CmmGlobal r <- dataReturnConvPrim rep + | [(rep,arg)] <- args, CmmGlobal r <- dataReturnConvPrim rep = ([(arg, r)], []) | otherwise = assign_regs args (mkRegTbl []) -- For returning unboxed tuples etc, - -- we use all regs - where - non_void_args = filter ((/= VoidArg).fst) args + -- we use all r assign_regs :: [(CgRep,a)] -- Arg or result values to assign -> AvailRegs -- Regs still avail: Vanilla, Float, Double, Longs @@ -297,8 +293,6 @@ assign_regs args supply = go args [] supply where go [] acc _ = (acc, []) -- Return the results reversed (doesn't matter) - go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and - = go args acc supply -- there's nothing to bind them to go ((rep,arg) : args) acc supply = case assign_reg rep supply of Just (reg, supply') -> go args ((arg,reg):acc) supply' diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs index dd607de1fc..043934af10 100644 --- a/compiler/codeGen/CgCase.lhs +++ b/compiler/codeGen/CgCase.lhs @@ -42,10 +42,12 @@ import PrimOp import Type import TyCon import Util +import UniqSupply +import MonadUtils import Outputable import FastString -import Control.Monad (when) +import Control.Monad \end{code} \begin{code} @@ -110,10 +112,10 @@ Special case #1: case of literal. \begin{code} cgCase (StgLit lit) _live_in_whole_case _live_in_alts bndr alt_type@(PrimAlt _) alts - = do { tmp_reg <- bindNewToTemp bndr + = do { [tmp_reg] <- bindNewToTemp bndr ; cm_lit <- cgLit lit ; stmtC (CmmAssign (CmmLocal tmp_reg) (CmmLit cm_lit)) - ; cgPrimAlts NoGC alt_type (CmmLocal tmp_reg) alts } + ; cgPrimAlts NoGC alt_type [CmmLocal tmp_reg] alts } \end{code} Special case #2: scrutinising a primitive-typed variable. No @@ -124,15 +126,9 @@ allocating more heap than strictly necessary, but it will sometimes eliminate a heap check altogether. \begin{code} -cgCase (StgApp _v []) _live_in_whole_case _live_in_alts bndr - (PrimAlt _) [(DEFAULT,bndrs,_,rhs)] - | isVoidArg (idCgRep bndr) - = ASSERT( null bndrs ) - WARN( True, ptext (sLit "Case of void constant; missing optimisation somewhere") <+> ppr bndr) - cgExpr rhs - cgCase (StgApp v []) _live_in_whole_case _live_in_alts bndr - alt_type@(PrimAlt _) alts + alt_type alts + | case alt_type of PrimAlt _ -> True; UbxTupAlt _ -> True; _ -> False -- Note [ticket #3132]: we might be looking at a case of a lifted Id -- that was cast to an unlifted type. The Id will always be bottom, -- but we don't want the code generator to fall over here. If we @@ -140,7 +136,7 @@ cgCase (StgApp v []) _live_in_whole_case _live_in_alts bndr -- type-incorrect Cmm. Hence we check that the types match, and if -- they don't we'll fall through and emit the usual enter/return -- code. Test case: codeGen/should_compile/3132.hs - | isUnLiftedType (idType v) + , isUnLiftedType (idType v) -- However, we also want to allow an assignment to be generated -- in the case when the types are compatible, because this allows @@ -151,19 +147,31 @@ cgCase (StgApp v []) _live_in_whole_case _live_in_alts bndr -- the HValue really is a MutVar#. The types are compatible though, -- so we can just generate an assignment. || reps_compatible - = do { when (not reps_compatible) $ + = WARN( null (idCgRep v), ptext (sLit "Case of void constant; missing optimisation somewhere") <+> ppr bndr) + do { when (not reps_compatible) $ panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" - -- Careful! we can't just bind the default binder to the same thing - -- as the scrutinee, since it might be a stack location, and having - -- two bindings pointing at the same stack locn doesn't work (it - -- confuses nukeDeadBindings). Hence, use a new temp. - ; v_info <- getCgIdInfo v - ; amode <- idInfoToAmode v_info - ; tmp_reg <- bindNewToTemp bndr - ; stmtC (CmmAssign (CmmLocal tmp_reg) amode) + -- TODO: could just bind the default binder to the same thing as the scrutinee, + -- rather than allocating these temporaries. + -- Having two Ids share locations doesn't confuse nukeDeadBindings any longer. + ; (tmp_regs, do_rhs) <- case alt_type of + PrimAlt _ -> do + tmp_regs <- bindNewToTemp bndr + return (tmp_regs, cgPrimAlts NoGC alt_type (map CmmLocal tmp_regs) alts) + UbxTupAlt _ + | [(DEFAULT, [], _, rhs)] <- alts -> do + tmp_regs <- bindNewToTemp bndr + return (tmp_regs, cgExpr rhs) + | [(DataAlt _, args, _, rhs)] <- alts -> do + tmp_regss <- mapM bindNewToTemp args + bindToRegs bndr (concat tmp_regss) + return (concat tmp_regss, cgExpr rhs) + _ -> panic "cgCase: weird UbxTupAlt?" - ; cgPrimAlts NoGC alt_type (CmmLocal tmp_reg) alts } + ; v_info <- getCgIdInfo v + ; amodes <- idInfoToAmodes v_info + ; forM_ (zipEqual "cgCase" tmp_regs amodes) $ \(tmp_reg, amode) -> stmtC (CmmAssign (CmmLocal tmp_reg) amode) + ; do_rhs } where reps_compatible = idCgRep v == idCgRep bndr \end{code} @@ -211,13 +219,12 @@ cgCase (StgOpApp (StgFCallOp fcall _) args _) = ASSERT( isSingleton alts ) do -- *must* be an unboxed tuple alt. -- exactly like the cgInlinePrimOp case for unboxed tuple alts.. - { res_tmps <- mapFCs bindNewToTemp non_void_res_ids - ; let res_hints = map (typeForeignHint.idType) non_void_res_ids - ; cgForeignCall (zipWith CmmHinted res_tmps res_hints) fcall args live_in_alts + { res_tmps <- concatMapM bindNewToTemp res_ids + ; let res_hints = concatMap (typeForeignHint.idType) res_ids + ; cgForeignCall (zipWithEqual "cgCase" CmmHinted res_tmps res_hints) fcall args live_in_alts ; cgExpr rhs } where (_, res_ids, _, rhs) = head alts - non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids unsafe_foreign_call = case fcall of @@ -232,7 +239,7 @@ we can reuse/trim the stack slot holding the variable (if it is in one). cgCase (StgApp fun args) _live_in_whole_case live_in_alts bndr alt_type alts = do { fun_info <- getCgIdInfo fun - ; arg_amodes <- getArgAmodes args + ; arg_amodes <- mapM getArgAmodes args -- Nuking dead bindings *before* calculating the saves is the -- value-add here. We might end up freeing up some slots currently @@ -327,36 +334,28 @@ anywhere within the record). cgInlinePrimOp :: PrimOp -> [StgArg] -> Id -> AltType -> StgLiveVars -> [(AltCon, [Id], [Bool], StgExpr)] -> Code -cgInlinePrimOp primop args bndr (PrimAlt _) live_in_alts alts - | isVoidArg (idCgRep bndr) - = ASSERT( con == DEFAULT && isSingleton alts && null bs ) - do { -- VOID RESULT; just sequencing, - -- so get in there and do it - -- The bndr should not occur, so no need to bind it - cgPrimOp [] primop args live_in_alts - ; cgExpr rhs } - where - (con,bs,_,rhs) = head alts - cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts - = do { -- PRIMITIVE ALTS, with non-void result - tmp_reg <- bindNewToTemp bndr - ; cgPrimOp [tmp_reg] primop args live_in_alts - ; cgPrimAlts NoGC (PrimAlt tycon) (CmmLocal tmp_reg) alts } + = do { -- PRIMITIVE ALTS, with void OR non-void result + tmp_regs <- bindNewToTemp bndr + ; cgPrimOp tmp_regs primop args live_in_alts + ; cgPrimAlts NoGC (PrimAlt tycon) (map CmmLocal tmp_regs) alts } -cgInlinePrimOp primop args _ (UbxTupAlt _) live_in_alts alts - = ASSERT( isSingleton alts ) - do { -- UNBOXED TUPLE ALTS +cgInlinePrimOp primop args bndr (UbxTupAlt _) live_in_alts alts + = do { -- UNBOXED TUPLE ALTS -- No heap check, no yield, just get in there and do it. - -- NB: the case binder isn't bound to anything; - -- it has a unboxed tuple type - res_tmps <- mapFCs bindNewToTemp non_void_res_ids + ; (res_tmps, rhs) <- case alts of + [(DEFAULT, [], _, rhs)] | Just (_, tys) <- splitTyConApp_maybe (idType bndr) -> do + us <- newUniqSupply + let res_tmps = zipWith LocalReg (uniqsFromSupply us) (concatMap (map (argMachRep . primRepToCgRep) . typePrimRep) tys) + return (res_tmps, rhs) + [(DataAlt _, res_ids, _, rhs)] -> do + res_tmps <- concatMapM bindNewToTemp res_ids + return (res_tmps, rhs) + _ -> panic "cgInlinePrimOp" + ; bindToRegs bndr res_tmps ; cgPrimOp res_tmps primop args live_in_alts ; cgExpr rhs } - where - (_, res_ids, _, rhs) = head alts - non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts = do { -- ENUMERATION TYPE RETURN @@ -370,7 +369,7 @@ cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts -- (avoiding it avoids the assignment) -- The deadness info is set by StgVarInfo ; whenC (not (isDeadBinder bndr)) - (do { tmp_reg <- bindNewToTemp bndr + (do { [tmp_reg] <- bindNewToTemp bndr ; stmtC (CmmAssign (CmmLocal tmp_reg) (tagToClosure tycon tag_amode)) }) @@ -387,7 +386,7 @@ cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result do_enum_primop TagToEnumOp -- No code! | [arg] <- args = do - (_,e) <- getArgAmode arg + [(_,e)] <- getArgAmodes arg return e do_enum_primop primop = do tmp <- newTemp bWord @@ -418,32 +417,34 @@ cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, -- without risk of duplicating code cgEvalAlts cc_slot bndr alt_type@(PrimAlt tycon) alts - = do { let rep = tyConCgRep tycon - reg = dataReturnConvPrim rep -- Bottom for voidRep + = do { let reps = tyConCgRep tycon + regs = map dataReturnConvPrim reps ; abs_c <- forkProc $ do - { -- Bind the case binder, except if it's void - -- (reg is bottom in that case) - whenC (nonVoidArg rep) $ - bindNewToReg bndr reg (mkLFArgument bndr) + { -- Bind the case binder + bindNewToReg bndr (zipEqual "cgEvalAlts" regs (mkLFArgument (idType bndr))) ; restoreCurrentCostCentre cc_slot True - ; cgPrimAlts GCMayHappen alt_type reg alts } + ; cgPrimAlts GCMayHappen alt_type regs alts } ; lbl <- emitReturnTarget (idName bndr) abs_c ; returnFC (CaseAlts lbl Nothing bndr) } cgEvalAlts cc_slot bndr (UbxTupAlt _) [(con,args,_,rhs)] - = -- Unboxed tuple case - -- By now, the simplifier should have have turned it - -- into case e of (# a,b #) -> e - -- There shouldn't be a - -- case e of DEFAULT -> e - ASSERT2( case con of { DataAlt _ -> True; _ -> False }, - text "cgEvalAlts: dodgy case of unboxed tuple type" ) - do { -- forkAbsC for the RHS, so that the envt is + = do { -- forkAbsC for the RHS, so that the envt is -- not changed for the emitReturn call abs_c <- forkProc $ do - { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args + { (flat_arg_locs, live_regs, ptrs, nptrs) <- case con of + DEFAULT + | Just (_, tys) <- splitTyConApp_maybe (idType bndr) + , [] <- args -> do + (arg_locs, live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents [((), typeCgRep ty) | ty <- tys] + return (concatMap snd arg_locs, live_regs, ptrs, nptrs) + DataAlt _ -> do + (arg_locs, live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents [(arg, idCgRep arg) | arg <- args] + bindArgsToRegOrStack arg_locs + return (concatMap snd arg_locs, live_regs, ptrs, nptrs) + _ -> panic "cgEvalAlts" + ; bindArgsToRegOrStack [(bndr, flat_arg_locs)] -- Restore the CC *after* binding the tuple components, -- so that we get the stack offset of the saved CC right. ; restoreCurrentCostCentre cc_slot True @@ -457,7 +458,7 @@ cgEvalAlts cc_slot bndr (UbxTupAlt _) [(con,args,_,rhs)] cgEvalAlts cc_slot bndr alt_type alts = -- Algebraic and polymorphic case do { -- Bind the default binder - bindNewToReg bndr nodeReg (mkLFArgument bndr) + bindNewToReg bndr [(nodeReg, only (mkLFArgument (idType bndr)))] -- Generate sequel info for use downstream -- At the moment, we only do it if the type is vector-returnable. @@ -559,7 +560,7 @@ As usual, no binders in the alternatives are yet bound. \begin{code} cgPrimAlts :: GCFlag -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck - -> CmmReg -- Scrutinee + -> [CmmReg] -- Scrutinee registers: either unary or nullary (if void) -> [StgAlt] -- Alternatives -> Code -- NB: cgPrimAlts emits code that does the case analysis. @@ -568,11 +569,14 @@ cgPrimAlts :: GCFlag -- different to cgAlgAlts -- -- INVARIANT: the default binder is already bound -cgPrimAlts gc_flag alt_type scrutinee alts +cgPrimAlts gc_flag alt_type scrutinees alts = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts) ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others] - ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC } + ; case scrutinees of + [] -> emitCgStmts deflt_absC + [scrut] -> emitLitSwitch (CmmReg scrut) alt_absCs deflt_absC + _ -> panic "cgPrimAlts: unboxed tuple scrutinee" } cgPrimAlt :: GCFlag -> AltType @@ -621,21 +625,19 @@ saveVolatileVars :: StgLiveVars -- Vars which should be made safe -> FCode CmmStmts -- Assignments to to the saves saveVolatileVars vars - = do { stmts_s <- mapFCs save_it (varSetElems vars) + = do { stmts_s <- concatMapM save_it (varSetElems vars) ; return (foldr plusStmts noStmts stmts_s) } where save_it var - = do { v <- getCAddrModeIfVolatile var - ; case v of - Nothing -> return noStmts -- Non-volatile - Just vol_amode -> save_var var vol_amode -- Aha! It's volatile - } - - save_var var vol_amode - = do { slot <- allocPrimStack (idCgRep var) - ; rebindToStack var slot - ; sp_rel <- getSpRelOffset slot - ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) } + = do { vol_amodes <- getVolatilesCAddrModes var -- If non-volatile, empty list + ; (stmts, slots) <- liftM unzip $ forM vol_amodes $ \mb_vol_amode -> case mb_vol_amode of + Nothing -> return (noStmts, Nothing) + Just (rep, vol_amode) -> do + slot <- allocPrimStack rep + sp_rel <- getSpRelOffset slot + returnFC (oneStmt (CmmStore sp_rel vol_amode), Just slot) + ; rebindToStack var slots + ; return stmts } \end{code} --------------------------------------------------------------------------- diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index 4d1ce50099..78308854b0 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -53,7 +53,10 @@ import StaticFlags import DynFlags import Outputable import FastString +import MonadUtils +import Control.Arrow (second) +import Control.Monad import Data.List \end{code} @@ -118,7 +121,7 @@ cgStdRhsClosure cgStdRhsClosure bndr _cc _bndr_info _fvs _args _body lf_info payload = do -- AHA! A STANDARD-FORM THUNK { -- LAY OUT THE OBJECT - amodes <- getArgAmodes payload + amodes <- concatMapM getArgAmodes payload ; mod_name <- getModuleName ; let (tot_wds, ptr_wds, amodes_w_offsets) = mkVirtHeapOffsets (isLFThunk lf_info) amodes @@ -169,11 +172,20 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do ; fv_infos <- mapFCs getCgIdInfo reduced_fvs ; srt_info <- getSRTInfo ; mod_name <- getModuleName - ; let bind_details :: [(CgIdInfo, VirtualHpOffset)] - (tot_wds, ptr_wds, bind_details) - = mkVirtHeapOffsets (isLFThunk lf_info) (map add_rep fv_infos) - - add_rep info = (cgIdInfoArgRep info, info) + ; let flat_bind_details :: [((Id, CgIdElemInfo), VirtualHpOffset)] + (tot_wds, ptr_wds, flat_bind_details) + = mkVirtHeapOffsets (isLFThunk lf_info) + [(cgIdElemInfoArgRep elem_info, + (cgIdInfoId info, elem_info)) + | info <- fv_infos + , elem_info <- cgIdInfoElems info] + + bind_details :: [(Id, [(VirtualHpOffset, CgIdElemInfo)])] + bind_details = [(info_id, [ (offset, elem_info) + | ((id, elem_info), offset) <- flat_bind_details + , id == info_id ]) + | info <- fv_infos + , let info_id = cgIdInfoId info] descr = closureDescription mod_name name closure_info = mkClosureInfo False -- Not static @@ -187,25 +199,26 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do -- A function closure pointer may be tagged, so we -- must take it into account when accessing the free variables. mbtag = tagForArity (length args) - bind_fv (info, offset) + bind_fv (id, offset_infos) | Just tag <- mbtag - = bindNewToUntagNode (cgIdInfoId info) offset (cgIdInfoLF info) tag + = bindNewToUntagNode id (map (second cgIdElemInfoLF) offset_infos) tag | otherwise - = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info) + = bindNewToNode id (map (second cgIdElemInfoLF) offset_infos) ; mapCs bind_fv bind_details -- Bind the binder itself, if it is a free var - ; whenC bndr_is_a_fv (bindNewToReg bndr nodeReg lf_info) + ; whenC bndr_is_a_fv (bindNewToReg bndr [(nodeReg, lf_info)]) -- Compile the body ; closureCodeBody bndr_info closure_info cc args body }) -- BUILD THE OBJECT - ; let - to_amode (info, offset) = do { amode <- idInfoToAmode info - ; return (amode, offset) } + ; let -- info_offsets :: [(CgIdElemInfo, LambdaFormInfo)] + to_amode (_id, offset_infos) = forM offset_infos $ \(offset, info) -> do + amode <- idElemInfoToAmode info + return (amode, offset) -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body - ; amodes_w_offsets <- mapFCs to_amode bind_details + ; amodes_w_offsets <- concatMapM to_amode bind_details ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets -- RETURN @@ -274,7 +287,8 @@ closureCodeBody _binder_info cl_info cc args body do { -- Get the current virtual Sp (it might not be zero, -- eg. if we're compiling a let-no-escape). vSp <- getVirtSp - ; let (reg_args, other_args) = assignCallRegs (addIdReps args) + ; let args_with_reps = addIdReps args + (reg_args, other_args) = assignCallRegs args_with_reps (sp_top, stk_args) = mkVirtStkOffsets vSp other_args -- Allocate the global ticky counter @@ -286,34 +300,35 @@ closureCodeBody _binder_info cl_info cc args body ; setTickyCtrLabel ticky_ctr_lbl $ do -- Emit the slow-entry code - { reg_save_code <- mkSlowEntryCode cl_info reg_args + { reg_save_code <- mkSlowEntryCode cl_info [(idCgRep arg !! i , reg) | ((arg, i), reg) <- reg_args] -- Emit the main entry code ; blks <- forkProc $ - mkFunEntryCode cl_info cc reg_args stk_args + mkFunEntryCode cl_info cc (lookupArgLocs reg_args stk_args args) sp_top reg_save_code body ; emitClosureCodeAndInfoTable cl_info [] blks }} - mkFunEntryCode :: ClosureInfo -> CostCentreStack - -> [(Id,GlobalReg)] -- Args in regs - -> [(Id,VirtualSpOffset)] -- Args on stack + -> [(Id,[Either GlobalReg VirtualSpOffset])] -- Args in regs/stack -> VirtualSpOffset -- Last allocated word on stack -> CmmStmts -- Register-save code in case of GC -> StgExpr -> Code -- The main entry code for the closure -mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do +mkFunEntryCode cl_info cc args sp_top reg_save_code body = do { -- Bind args to regs/stack as appropriate, -- and record expected position of sps - ; bindArgsToRegs reg_args - ; bindArgsToStack stk_args + ; bindArgsToRegOrStack args ; setRealAndVirtualSp sp_top -- Do the business + ; let reg_args :: [(CgRep, GlobalReg)] + reg_args = [ (rep, reg) + | (id, ei_reg_offs) <- args + , (rep, Left reg) <- zipEqual "mkFunEntryCode" (idCgRep id) ei_reg_offs ] ; funWrapper cl_info reg_args reg_save_code $ do { tickyEnterFun cl_info ; enterCostCentreFun cc @@ -337,7 +352,7 @@ The slow entry point is used in two places: (b) returning from a heap-check failure \begin{code} -mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts +mkSlowEntryCode :: ClosureInfo -> [(CgRep,GlobalReg)] -> FCode CmmStmts -- If this function doesn't have a specialised ArgDescr, we need -- to generate the function's arg bitmap, slow-entry code, and -- register-save code for the heap-check failure @@ -357,7 +372,7 @@ mkSlowEntryCode cl_info reg_args save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts reps_w_regs :: [(CgRep,GlobalReg)] - reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args] + reps_w_regs = reverse $ reg_args (final_stk_offset, stk_offsets) = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off)) 0 reps_w_regs @@ -407,10 +422,10 @@ thunkWrapper closure_info thunk_code = do -- setupUpdate *encloses* the thunk_code } -funWrapper :: ClosureInfo -- Closure whose code body this is - -> [(Id,GlobalReg)] -- List of argument registers (if any) - -> CmmStmts -- reg saves for the heap check failure - -> Code -- Body of function being compiled +funWrapper :: ClosureInfo -- Closure whose code body this is + -> [(CgRep,GlobalReg)] -- List of argument registers (if any) + -> CmmStmts -- reg saves for the heap check failure + -> Code -- Body of function being compiled -> Code funWrapper closure_info arg_regs reg_save_code fun_body = do { let node_points = nodeMustPointToIt (closureLFInfo closure_info) diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index 9049504dca..22a7c792c7 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -41,7 +41,6 @@ import TyCon import DataCon import Id import IdInfo -import Type import PrelInfo import Outputable import ListSetOps @@ -51,6 +50,7 @@ import DynFlags import FastString import Platform import StaticFlags +import MonadUtils import Control.Monad \end{code} @@ -75,7 +75,7 @@ cgTopRhsCon id con args ; ASSERT( args `lengthIs` dataConRepArity con ) return () -- LAY IT OUT - ; amodes <- getArgAmodes args + ; amodes <- concatMapM getArgAmodes args ; let platform = targetPlatform dflags @@ -250,11 +250,13 @@ bindConArgs con args let -- The binding below forces the masking out of the tag bits -- when accessing the constructor field. - bind_arg (arg, offset) = bindNewToUntagNode arg offset (mkLFArgument arg) (tagForCon con) (_, args_w_offsets) = layOutDynConstr con (addIdReps args) -- ASSERT(not (isUnboxedTupleCon con)) return () - mapCs bind_arg args_w_offsets + forM_ args $ \arg -> do + let offset_lf_infos = zipWith (\i lf_info -> (assoc "bindConArgs" args_w_offsets (arg, i), lf_info)) + [0..] (mkLFArgument (idType arg)) + bindNewToUntagNode arg offset_lf_infos (tagForCon con) \end{code} Unboxed tuples are handled slightly differently - the object is @@ -262,20 +264,21 @@ returned in registers and on the stack instead of the heap. \begin{code} bindUnboxedTupleComponents - :: [Id] -- Args - -> FCode ([(Id,GlobalReg)], -- Regs assigned + :: [(a, [CgRep])] -- Arg reps + -> FCode ([(a, [Either GlobalReg VirtualSpOffset])], -- Argument locations + [(CgRep,GlobalReg)], -- Regs assigned WordOff, -- Number of pointer stack slots WordOff, -- Number of non-pointer stack slots VirtualSpOffset) -- Offset of return address slot -- (= realSP on entry) -bindUnboxedTupleComponents args +bindUnboxedTupleComponents repss = do { vsp <- getVirtSp ; rsp <- getRealSp -- Assign as many components as possible to registers - ; let (reg_args, stk_args) = assignReturnRegs (addIdReps args) + ; let (reg_args, stk_args) = assignReturnRegs $ addIdReps' (map snd repss) -- Separate the rest of the args into pointers and non-pointers (ptr_args, nptr_args) = separateByPtrFollowness stk_args @@ -299,11 +302,9 @@ bindUnboxedTupleComponents args -- (trimming back the virtual SP), but the real SP still points to that slot ; freeStackSlots [vsp+1,vsp+2 .. rsp] - ; bindArgsToRegs reg_args - ; bindArgsToStack ptr_offsets - ; bindArgsToStack nptr_offsets + ; let arg_locs = lookupArgLocs' reg_args (ptr_offsets ++ nptr_offsets) repss - ; returnFC (reg_args, ptrs, nptrs, rsp) } + ; returnFC (arg_locs, [((snd (repss !! n)) !! i, reg) | ((n, i), reg) <- reg_args], ptrs, nptrs, rsp) } \end{code} %************************************************************************ @@ -324,7 +325,8 @@ cgReturnDataCon con amodes -- for it to be marked as "used" for LDV profiling. | opt_SccProfilingOn = build_it_then enter_it | otherwise - = ASSERT( amodes `lengthIs` dataConRepArity con ) + = -- NB: this assert is not true because some elements may be void/unboxed tuples + -- ASSERT( length amodes == dataConArity con ) do { EndOfBlockInfo _ sequel <- getEndOfBlockInfo ; case sequel of CaseAlts _ (Just (alts, deflt_lbl)) bndr @@ -369,7 +371,7 @@ cgReturnDataCon con amodes -- out as '54' :-) tickyReturnNewCon (length amodes) ; idinfo <- buildDynCon (dataConWorkId con) currentCCS con amodes - ; amode <- idInfoToAmode idinfo + ; amode <- idElemInfoToAmode (cgIdInfoSingleElem "cgReturnDataCon" idinfo) ; checkedAbsC (CmmAssign nodeReg amode) ; performReturn return_code } \end{code} @@ -466,8 +468,8 @@ cgDataCon data_con ; ldvEnter (CmmReg nodeReg) ; body_code } - arg_reps :: [(CgRep, Type)] - arg_reps = [(typeCgRep ty, ty) | ty <- dataConRepArgTys data_con] + arg_reps :: [(CgRep, ())] + arg_reps = [(rep, ()) | ty <- dataConRepArgTys data_con, rep <- typeCgRep ty] body_code = do { -- NB: We don't set CC when entering data (WDP 94/06) diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs index cb3a86ef7f..41d713bde4 100644 --- a/compiler/codeGen/CgExpr.lhs +++ b/compiler/codeGen/CgExpr.lhs @@ -48,6 +48,7 @@ import Maybes import ListSetOps import BasicTypes import Util +import MonadUtils import Outputable import StaticFlags \end{code} @@ -83,7 +84,7 @@ cgExpr (StgApp fun args) = cgTailCall fun args \begin{code} cgExpr (StgConApp con args) - = do { amodes <- getArgAmodes args + = do { amodes <- concatMapM getArgAmodes args ; cgReturnDataCon con amodes } \end{code} @@ -94,9 +95,9 @@ top of the stack. \begin{code} cgExpr (StgLit lit) = do { cmm_lit <- cgLit lit - ; performPrimReturn rep (CmmLit cmm_lit) } + ; performPrimReturn [(rep, CmmLit cmm_lit)] } where - rep = (typeCgRep) (literalType lit) + [rep] = typeCgRep (literalType lit) \end{code} @@ -122,16 +123,15 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do a return address right before doing the call, so the args must be out of the way. -} - reps_n_amodes <- getArgAmodes stg_args + reps_n_amodes <- mapM getArgAmodes stg_args let -- Get the *non-void* args, and jiggle them with shimForeignCall - arg_exprs = [ (shimForeignCallArg stg_arg expr, stg_arg) - | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, - nonVoidArg rep] + arg_exprs = [ expr + | (stg_arg, rep_exprs) <- stg_args `zip` reps_n_amodes + , expr <- shimForeignCallArg stg_arg (map snd rep_exprs) ] - arg_tmps <- sequence [ assignTemp arg - | (arg, _) <- arg_exprs] - let arg_hints = zipWith CmmHinted arg_tmps (map (typeForeignHint.stgArgType) stg_args) + arg_tmps <- mapM assignTemp arg_exprs + let arg_hints = zipWith CmmHinted arg_tmps (concatMap (typeForeignHint.stgArgType) stg_args) {- Now, allocate some result regs. -} @@ -145,7 +145,7 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) = ASSERT(isEnumerationTyCon tycon) - do { (_rep,amode) <- getArgAmode arg + do { [(_rep,amode)] <- getArgAmodes arg ; amode' <- assignTemp amode -- We're going to use it twice, -- so save in a temp if non-trivial ; stmtC (CmmAssign nodeReg (tagToClosure tycon amode')) @@ -170,15 +170,17 @@ cgExpr (StgOpApp (StgPrimOp primop) args res_ty) | primOpOutOfLine primop = tailCallPrimOp primop args - | ReturnsPrim VoidRep <- result_info + | ReturnsPrim [] <- result_info = do cgPrimOp [] primop args emptyVarSet -- ToDo: STG Live -- worried about this performReturn $ emitReturnInstr (Just []) - | ReturnsPrim rep <- result_info - = do res <- newTemp (typeCmmType res_ty) - cgPrimOp [res] primop args emptyVarSet - performPrimReturn (primRepToCgRep rep) (CmmReg (CmmLocal res)) + | ReturnsPrim reps <- result_info + = do ress <- mapM newTemp (typeCmmType res_ty) + cgPrimOp ress primop args emptyVarSet + performPrimReturn $ zipWithEqual "cgExpr" + (\rep res -> (primRepToCgRep rep, CmmReg (CmmLocal res))) + reps ress | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon = do (reps, regs, _hints) <- newUnboxedTupleRegs res_ty @@ -305,7 +307,7 @@ cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) -- the Id is passed along so a binding can be set up cgRhs name (StgRhsCon maybe_cc con args) - = do { amodes <- getArgAmodes args + = do { amodes <- concatMapM getArgAmodes args ; idinfo <- buildDynCon name maybe_cc con amodes ; returnFC (name, idinfo) } @@ -345,9 +347,10 @@ mkRhsClosure bndr cc bi (AlgAlt _) [(DataAlt con, params, _use_mask, (StgApp selectee [{-no args-}]))]) - | the_fv == scrutinee -- Scrutinee is the only free variable - && maybeToBool maybe_offset -- Selectee is a component of the tuple - && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + | the_fv == scrutinee -- Scrutinee is the only free variable + , [_] <- idCgRep selectee -- Selectee is unary (so guaranteed contiguous layout) + , maybeToBool maybe_offset -- Selectee is a component of the tuple + , offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough = -- NOT TRUE: ASSERT(is_single_constructor) -- The simplifier may have statically determined that the single alternative -- is the only possible case and eliminated the others, even if there are @@ -360,7 +363,7 @@ mkRhsClosure bndr cc bi (isUpdatable upd_flag) (_, params_w_offsets) = layOutDynConstr con (addIdReps params) -- Just want the layout - maybe_offset = assocMaybe params_w_offsets selectee + maybe_offset = assocMaybe params_w_offsets (selectee, 0) Just the_offset = maybe_offset offset_into_int = the_offset - fixedHdrSize \end{code} @@ -389,7 +392,8 @@ mkRhsClosure bndr cc bi body@(StgApp fun_id args) | args `lengthIs` (arity-1) - && all isFollowableArg (map idCgRep fvs) + && all (\fv -> case idCgRep fv of [rep] | isFollowableArg rep -> True; _ -> False) + fvs && isUpdatable upd_flag && arity <= mAX_SPEC_AP_SIZE && not opt_SccProfilingOn -- not when profiling: we don't want to @@ -481,9 +485,9 @@ newUnboxedTupleRegs :: Type -> FCode ([CgRep], [LocalReg], [ForeignHint]) newUnboxedTupleRegs res_ty = let ty_args = tyConAppArgs (repType res_ty) - (reps,hints) = unzip [ (rep, typeForeignHint ty) | ty <- ty_args, - let rep = typeCgRep ty, - nonVoidArg rep ] + (reps,hints) = unzip [ res + | ty <- ty_args + , res <- zipEqual "newUnboxedTupleRegs" (typeCgRep ty) (typeForeignHint ty) ] make_new_temp rep = newTemp (argMachRep rep) in do regs <- mapM make_new_temp reps diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index 16e77eca35..4b714d552b 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -24,7 +24,6 @@ import CgMonad import CgUtils import Type import TysPrim -import ClosureInfo( nonVoidArg ) import CLabel import OldCmm import OldCmmUtils @@ -36,6 +35,7 @@ import Outputable import Module import FastString import BasicTypes +import Util import Control.Monad @@ -50,15 +50,14 @@ cgForeignCall -> Code cgForeignCall results fcall stg_args live = do - reps_n_amodes <- getArgAmodes stg_args + reps_n_amodess <- mapM getArgAmodes stg_args let - -- Get the *non-void* args, and jiggle them with shimForeignCall - arg_exprs = [ shimForeignCallArg stg_arg expr - | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, - nonVoidArg rep] - - arg_hints = zipWith CmmHinted - arg_exprs (map (typeForeignHint.stgArgType) stg_args) + -- Get the args, and jiggle them with shimForeignCall + arg_hints = [ CmmHinted shimmed_expr hint + | (stg_arg, reps_n_amodes) <- zipEqual "cgForeignCall" stg_args reps_n_amodess + , let exprs = map snd reps_n_amodes + , (shimmed_expr, hint) <- zipEqual "cgForeignCall" (shimForeignCallArg stg_arg exprs) + (typeForeignHint (stgArgType stg_arg)) ] -- in emitForeignCall results fcall arg_hints live @@ -300,15 +299,14 @@ hpAlloc = CmmGlobal HpAlloc -- value passed to the call. For ByteArray#/Array# we pass the -- address of the actual array, not the address of the heap object. -shimForeignCallArg :: StgArg -> CmmExpr -> CmmExpr -shimForeignCallArg arg expr +shimForeignCallArg :: StgArg -> [CmmExpr] -> [CmmExpr] +shimForeignCallArg arg [expr] | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon - = cmmOffsetB expr arrPtrsHdrSize + = [cmmOffsetB expr arrPtrsHdrSize] | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon - = cmmOffsetB expr arrWordsHdrSize - - | otherwise = expr + = [cmmOffsetB expr arrWordsHdrSize] where -- should be a tycon app, since this is a foreign call tycon = tyConAppTyCon (repType (stgArgType arg)) +shimForeignCallArg _ exprs = exprs diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index dfe146dfc8..4571fe0a24 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -43,11 +43,9 @@ import SMRep import OldCmm import OldCmmUtils -import Id import DataCon import TyCon import CostCentre -import Util import Module import Constants import Outputable @@ -158,8 +156,7 @@ mkVirtHeapOffsets -- First in list gets lowest offset, which is initial offset + 1. mkVirtHeapOffsets is_thunk things - = let non_void_things = filterOut (isVoidArg . fst) things - (ptrs, non_ptrs) = separateByPtrFollowness non_void_things + = let (ptrs, non_ptrs) = separateByPtrFollowness things (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs in @@ -374,17 +371,18 @@ altHeapCheck alt_type code -- Enter R1 after the heap check; it's a pointer gc_info (PrimAlt tc) - = case primRepToCgRep (tyConPrimRep tc) of - VoidArg -> (mkL "stg_gc_noregs", Just []) - FloatArg -> (mkL "stg_gc_f1", Just [FloatReg 1]) - DoubleArg -> (mkL "stg_gc_d1", Just [DoubleReg 1]) - LongArg -> (mkL "stg_gc_l1", Just [LongReg 1]) + = case map primRepToCgRep (tyConPrimRep tc) of + [] -> (mkL "stg_gc_noregs", Just []) + [FloatArg] -> (mkL "stg_gc_f1", Just [FloatReg 1]) + [DoubleArg] -> (mkL "stg_gc_d1", Just [DoubleReg 1]) + [LongArg] -> (mkL "stg_gc_l1", Just [LongReg 1]) -- R1 is boxed but unlifted: - PtrArg -> (mkL "stg_gc_unpt_r1", Just [node]) + [PtrArg] -> (mkL "stg_gc_unpt_r1", Just [node]) -- R1 is unboxed: - NonPtrArg -> (mkL "stg_gc_unbx_r1", Just [node]) + [NonPtrArg] -> (mkL "stg_gc_unbx_r1", Just [node]) + _ -> panic "altHeapCheck: n-ary type bound in PrimAlt" - gc_info (UbxTupAlt _) = panic "altHeapCheck" + gc_info (UbxTupAlt _) = panic "altHeapCheck: unboxed tuple" \end{code} @@ -397,7 +395,7 @@ non-pointers, and pass the number of each to the heap check code. \begin{code} unbxTupleHeapCheck - :: [(Id, GlobalReg)] -- Live registers + :: [(CgRep, GlobalReg)] -- Live registers -> WordOff -- no. of stack slots containing ptrs -> WordOff -- no. of stack slots containing nonptrs -> CmmStmts -- code to insert in the failure path diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 1e80616887..a9bac49d20 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -42,6 +42,7 @@ import OldCmm import CLabel import Name import Unique +import UniqSupply import StaticFlags import Constants @@ -178,24 +179,27 @@ mkStackLayout = do [(offset - frame_sp - retAddrSizeW, b) | (offset, b) <- binds] + us <- newUniqSupply WARN( not (all (\bind -> fst bind >= 0) rel_binds), - pprPlatform platform binds $$ pprPlatform platform rel_binds $$ + pprPlatform platform (map fst binds) $$ pprPlatform platform (map fst rel_binds) $$ ppr frame_size $$ ppr real_sp $$ ppr frame_sp ) - return $ stack_layout rel_binds frame_size + return $ stack_layout us rel_binds frame_size -stack_layout :: [(VirtualSpOffset, CgIdInfo)] +stack_layout :: UniqSupply + -> [(VirtualSpOffset, CgRep)] -> WordOff -> [Maybe LocalReg] -stack_layout [] sizeW = replicate sizeW Nothing -stack_layout ((off, bind):binds) sizeW | off == sizeW - 1 = - (Just stack_bind) : (stack_layout binds (sizeW - rep_size)) +stack_layout _ [] sizeW = replicate sizeW Nothing +stack_layout us ((off, rep):binds) sizeW | off == sizeW - 1 = + (Just stack_bind) : (stack_layout us' binds (sizeW - rep_size)) where - rep_size = cgRepSizeW (cgIdInfoArgRep bind) + rep_size = cgRepSizeW rep stack_bind = LocalReg unique machRep - unique = getUnique (cgIdInfoId bind) - machRep = argMachRep (cgIdInfoArgRep bind) -stack_layout binds@(_:_) sizeW | otherwise = - Nothing : (stack_layout binds (sizeW - 1)) + (unique, us') = takeUniqFromSupply us + machRep = argMachRep rep +stack_layout us binds@(_:_) sizeW + | sizeW < 0 = panic "stack_layout: infinite loop?" + | otherwise = Nothing : (stack_layout us binds (sizeW - 1)) {- Another way to write the function that might be less error prone (untested) stack_layout offsets sizeW = result diff --git a/compiler/codeGen/CgLetNoEscape.lhs b/compiler/codeGen/CgLetNoEscape.lhs index 2fb603baed..8f13918279 100644 --- a/compiler/codeGen/CgLetNoEscape.lhs +++ b/compiler/codeGen/CgLetNoEscape.lhs @@ -188,7 +188,8 @@ cgLetNoEscapeBody :: Id -- Name of the joint point -> Code cgLetNoEscapeBody bndr _ cc_slot all_args body = do - { (arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents all_args + { (arg_locs, arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents [(arg, idCgRep arg) | arg <- all_args] + ; bindArgsToRegOrStack arg_locs -- restore the saved cost centre. BUT: we must not free the stack slot -- containing the cost centre, because it might be needed for a diff --git a/compiler/codeGen/CgParallel.hs b/compiler/codeGen/CgParallel.hs index 2804104708..af4c094de7 100644 --- a/compiler/codeGen/CgParallel.hs +++ b/compiler/codeGen/CgParallel.hs @@ -21,9 +21,9 @@ module CgParallel( doGranAllocate ) where +import ClosureInfo (CgRep) import CgMonad import CgCallConv -import Id import OldCmm import StaticFlags import Outputable @@ -50,7 +50,7 @@ doGranAllocate _hp ------------------------- -granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers +granFetchAndReschedule :: [(CgRep,GlobalReg)] -- Live registers -> Bool -- Node reqd? -> Code -- Emit code for simulating a fetch and then reschedule. @@ -89,7 +89,7 @@ reschedule _liveness _node_reqd = panic "granReschedule" -- that they are not inlined (see @CgCases.lhs@). These alternatives will -- be turned into separate functions. -granYield :: [(Id,GlobalReg)] -- Live registers +granYield :: [(CgRep,GlobalReg)] -- Live registers -> Bool -- Node reqd? -> Code diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index 3f1187f6be..17f508b666 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -32,6 +32,7 @@ import Constants import Outputable import FastString import StaticFlags +import MonadUtils import Control.Monad @@ -45,9 +46,8 @@ cgPrimOp :: [CmmFormal] -- where to put the results -> Code cgPrimOp results op args live - = do arg_exprs <- getArgAmodes args - let non_void_args = [ e | (r,e) <- arg_exprs, nonVoidArg r ] - emitPrimOp results op non_void_args live + = do arg_exprs <- concatMapM getArgAmodes args + emitPrimOp results op (map snd arg_exprs) live emitPrimOp :: [CmmFormal] -- where to put the results diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs index 2628760183..5053150bb9 100644 --- a/compiler/codeGen/CgStackery.lhs +++ b/compiler/codeGen/CgStackery.lhs @@ -119,14 +119,12 @@ mkVirtStkOffsets :: VirtualSpOffset -- Offset of the last allocated thing -> [(CgRep,a)] -- things to make offsets for -> (VirtualSpOffset, -- OUTPUTS: Topmost allocated word - [(a, VirtualSpOffset)]) -- things with offsets (voids filtered out) + [(a, VirtualSpOffset)]) -- things with offsets mkVirtStkOffsets init_Sp_offset things = loop init_Sp_offset [] (reverse things) where loop offset offs [] = (offset,offs) - loop offset offs ((VoidArg,_):things) = loop offset offs things - -- ignore Void arguments loop offset offs ((rep,t):things) = loop thing_slot ((t,thing_slot):offs) things where diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index 499529d841..ff5fc47586 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -43,9 +43,11 @@ import StgSyn import PrimOp import Outputable import StaticFlags +import Util +import Maybes +import MonadUtils import Control.Monad -import Data.Maybe ----------------------------------------------------------------------------- -- Tail Calls @@ -78,11 +80,11 @@ cgTailCall fun args ; if isUnLiftedType (idType fun) then -- Primitive return ASSERT( null args ) - do { fun_amode <- idInfoToAmode fun_info - ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } + do { fun_amodes <- idInfoToAmodes fun_info + ; performPrimReturn (zipEqual "cgTail" (map cgIdElemInfoArgRep (cgIdInfoElems fun_info)) fun_amodes) } else -- Normal case, fun is boxed - do { arg_amodes <- getArgAmodes args + do { arg_amodes <- mapM getArgAmodes args ; performTailCall fun_info arg_amodes noStmts } } @@ -91,26 +93,28 @@ cgTailCall fun args -- The guts of a tail-call performTailCall - :: CgIdInfo -- The function - -> [(CgRep,CmmExpr)] -- Args - -> CmmStmts -- Pending simultaneous assignments - -- *** GUARANTEED to contain only stack assignments. + :: CgIdInfo -- The function + -> [[(CgRep,CmmExpr)]] -- Args + -> CmmStmts -- Pending simultaneous assignments + -- *** GUARANTEED to contain only stack assignments. -> Code performTailCall fun_info arg_amodes pending_assts - | Just join_sp <- maybeLetNoEscape fun_info + | Just join_sp <- maybeLetNoEscape fun_elem_info = -- A let-no-escape is slightly different, because we -- arrange the stack arguments into pointers and non-pointers -- to make the heap check easier. The tail-call sequence -- is very similar to returning an unboxed tuple, so we -- share some code. - do { (final_sp, arg_assts, live) <- pushUnboxedTuple join_sp arg_amodes + -- + -- NB: let-no-escapes calls are always saturated or better! + do { (final_sp, arg_assts, live) <- pushUnboxedTuple join_sp (concat arg_amodes) ; emitSimultaneously (pending_assts `plusStmts` arg_assts) ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info)) ; doFinalJump final_sp True $ jumpToLbl lbl (Just live) } | otherwise - = do { fun_amode <- idInfoToAmode fun_info + = do { fun_amode <- idElemInfoToAmode fun_elem_info ; let assignSt = CmmAssign nodeReg fun_amode node_asst = oneStmt assignSt node_live = Just [node] @@ -160,7 +164,7 @@ performTailCall fun_info arg_amodes pending_assts { if (isKnownFun lf_info) then tickyKnownCallTooFewArgs else tickyUnknownCall - ; tickySlowCallPat (map fst arg_amodes) + ; tickySlowCallPat (concatMap (map fst) arg_amodes) } ; let (apply_lbl, args, extra_args) @@ -173,24 +177,25 @@ performTailCall fun_info arg_amodes pending_assts -- A direct function call (possibly with some left-over arguments) DirectEntry lbl arity -> do - { if arity == length arg_amodes - then tickyKnownCallExact - else do tickyKnownCallExtraArgs - tickySlowCallPat (map fst (drop arity arg_amodes)) + { if length arg_amodes == arity + then tickyKnownCallExact + else do tickyKnownCallExtraArgs + tickySlowCallPat (concatMap (map fst) (drop arity arg_amodes)) ; let -- The args beyond the arity go straight on the stack (arity_args, extra_args) = splitAt arity arg_amodes - ; directCall sp lbl arity_args extra_args opt_node_live + ; directCall sp lbl (concat arity_args) extra_args opt_node_live (opt_node_asst `plusStmts` pending_assts) } } where fun_id = cgIdInfoId fun_info fun_name = idName fun_id - lf_info = cgIdInfoLF fun_info - fun_has_cafs = idCafInfo fun_id + fun_elem_info = cgIdInfoSingleElem ("performTailCall: " ++ showPpr fun_id) fun_info + lf_info = cgIdElemInfoLF fun_elem_info + fun_has_cafs = idCafInfo fun_id untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) -- Test if closure is a constructor maybeSwitchOnCons enterClosure eob @@ -247,7 +252,7 @@ performTailCall fun_info arg_amodes pending_assts -} directCall :: VirtualSpOffset -> CLabel -> [(CgRep, CmmExpr)] - -> [(CgRep, CmmExpr)] -> Maybe [GlobalReg] -> CmmStmts + -> [[(CgRep, CmmExpr)]] -> Maybe [GlobalReg] -> CmmStmts -> Code directCall sp lbl args extra_args live_node assts = do let @@ -302,22 +307,17 @@ performReturn finish_code -- ---------------------------------------------------------------------------- -- Primitive Returns --- Just load the return value into the right register, and return. +-- Just load the return values into the right registers, and return. -performPrimReturn :: CgRep -> CmmExpr -> Code +performPrimReturn :: [(CgRep, CmmExpr)] -> Code --- non-void return value -performPrimReturn rep amode | not (isVoidArg rep) - = do { stmtC (CmmAssign ret_reg amode) - ; performReturn $ emitReturnInstr live_regs } - where - -- careful here as 'dataReturnConvPrim' will panic if given a Void rep - ret_reg@(CmmGlobal r) = dataReturnConvPrim rep - live_regs = Just [r] - --- void return value -performPrimReturn _ _ - = performReturn $ emitReturnInstr (Just []) +-- works for both void, non-void and unboxed-tuple Id return values +performPrimReturn rep_amodes + = do { live_regs <- forM rep_amodes $ \(rep, amode) -> do + let ret_reg@(CmmGlobal r) = dataReturnConvPrim rep + stmtC (CmmAssign ret_reg amode) + return r + ; performReturn $ emitReturnInstr (Just live_regs) } -- --------------------------------------------------------------------------- @@ -412,7 +412,7 @@ tailCallPrim lbl args = do { -- We're going to perform a normal-looking tail call, -- except that *all* the arguments will be in registers. -- Hence the ASSERT( null leftovers ) - arg_amodes <- getArgAmodes args + arg_amodes <- concatMapM getArgAmodes args ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes live_regs = Just $ map snd arg_regs jump_to_primop = jumpToLbl lbl live_regs diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs index 0ff440e6bf..2b7ed902d5 100644 --- a/compiler/codeGen/CgTicky.hs +++ b/compiler/codeGen/CgTicky.hs @@ -34,7 +34,7 @@ module CgTicky ( tickyUpdateBhCaf, tickyBlackHole, - tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyUnboxedTupleReturn, tickyReturnOldCon, tickyReturnNewCon, tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, @@ -62,6 +62,7 @@ import FastString import Constants import Outputable import Module +import Maybes -- Turgid imports for showTypeCategory import PrelNames @@ -71,8 +72,6 @@ import TyCon import DynFlags -import Data.Maybe - ----------------------------------------------------------------------------- -- -- Ticky-ticky profiling @@ -200,16 +199,11 @@ tickyReturnNewCon arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr") ; bumpHistogram (fsLit "RET_NEW_hst") arity } -tickyUnboxedTupleReturn :: Int -> Code +tickyUnboxedTupleReturn :: Arity -> Code tickyUnboxedTupleReturn arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr") ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity } -tickyVectoredReturn :: Int -> Code -tickyVectoredReturn family_size - = ifTicky $ do { bumpTickyCounter (fsLit "VEC_RETURN_ctr") - ; bumpHistogram (fsLit "RET_VEC_RETURN_hst") family_size } - -- ----------------------------------------------------------------------------- -- Ticky calls diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index f971a0500a..0092cad5b7 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -7,7 +7,8 @@ ----------------------------------------------------------------------------- module CgUtils ( - addIdReps, + addIdReps, lookupArgLocs, + addIdReps', lookupArgLocs', cgLit, emitDataLits, mkDataLits, emitRODataLits, mkRODataLits, @@ -80,8 +81,43 @@ import Data.Maybe -- ------------------------------------------------------------------------- -addIdReps :: [Id] -> [(CgRep, Id)] -addIdReps ids = [(idCgRep id, id) | id <- ids] +-- FIXME: perhaps nicer to just use the primed versions everywhere? + +addIdReps :: [Id] -> [(CgRep, (Id, Int))] +addIdReps ids = [(rep, (id, i)) + | id <- ids + , (i, rep) <- [0..] `zip` idCgRep id] + +addIdReps' :: [[CgRep]] -> [(CgRep, (Int, Int))] +addIdReps' repss = [(rep, (n, i)) + | (n, reps) <- [0..] `zip` repss + , (i, rep) <- [0..] `zip` reps] + +lookupArgLocs :: [((Id, Int), GlobalReg)] + -> [((Id, Int), VirtualSpOffset)] + -> [Id] + -> [(Id, [Either GlobalReg VirtualSpOffset])] +lookupArgLocs reg_args stk_args args + = [(arg, [case lookup (arg, i) reg_args of + Just reg -> Left reg + Nothing -> case lookup (arg, i) stk_args of + Just off -> Right off + _ -> pprPanic "lookupArgLocs" (ppr (arg, i)) + | (i, _rep) <- [0..] `zip` idCgRep arg]) + | arg <- args] + +lookupArgLocs' :: [((Int, Int), GlobalReg)] + -> [((Int, Int), VirtualSpOffset)] + -> [(a, [CgRep])] + -> [(a, [Either GlobalReg VirtualSpOffset])] +lookupArgLocs' reg_args stk_args repss + = [(x, [case lookup (n, i) reg_args of + Just reg -> Left reg + Nothing -> case lookup (n, i) stk_args of + Just off -> Right off + _ -> pprPanic "lookupArgLocs'" (ppr (n, i)) + | (i, _rep) <- [0..] `zip` reps]) + | (n, (x, reps)) <- [0..] `zip` repss] ------------------------------------------------------------------------- -- diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index 34746984c2..de23091973 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -61,10 +61,9 @@ module ClosureInfo ( staticClosureNeedsLink, -- CgRep and its functions - CgRep(..), nonVoidArg, + CgRep(..), argMachRep, primRepToCgRep, - isFollowableArg, isVoidArg, - isFloatingArg, is64BitArg, + isFollowableArg, isFloatingArg, is64BitArg, separateByPtrFollowness, cgRepSizeW, cgRepSizeB, retAddrSizeW, @@ -156,7 +155,7 @@ ClosureInfo contains a LambdaFormInfo. data LambdaFormInfo = LFReEntrant -- Reentrant closure (a function) TopLevelFlag -- True if top level - !Int -- Arity. Invariant: always > 0 + !Arity -- Arity. INVARIANT: > 0 !Bool -- True <=> no fvs ArgDescr -- Argument descriptor (should reall be in ClosureInfo) @@ -180,7 +179,7 @@ data LambdaFormInfo | LFLetNoEscape -- See LetNoEscape module for precise description of -- these "lets". - !Int -- arity; + !Arity -- arity; | LFBlackHole -- Used for the closures allocated to hold the result -- of a CAF. We want the target of the update frame to @@ -211,7 +210,7 @@ data StandardFormInfo -- The code for the thunk just pushes x2..xn on the stack and enters x1. -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled -- in the RTS to save space. - Int -- Arity, n + Arity -- Arity, n \end{code} @@ -228,14 +227,10 @@ arguments are used to decide which of the RTS's generic apply functions to call when applying an unknown function. It contains more information than the back-end data type MachRep, -so one can easily convert from CgRep -> MachRep. (Except that -there's no MachRep for a VoidRep.) +so one can easily convert from CgRep -> MachRep. -It distinguishes - pointers from non-pointers (we sort the pointers together - when building closures) - - void from other types: a void argument is different from no argument +It distinguishes pointers from non-pointers (we sort the pointers +together when building closures) All 64-bit types map to the same CgRep, because they're passed in the same register, but a PtrArg is still different from an NonPtrArg @@ -245,8 +240,7 @@ entry to the garbage collector. \begin{code} data CgRep - = VoidArg -- Void - | PtrArg -- Word-sized heap pointer, followed + = PtrArg -- Word-sized heap pointer, followed -- by the garbage collector | NonPtrArg -- Word-sized non-pointer -- (including addresses not followed by GC) @@ -256,7 +250,6 @@ data CgRep deriving Eq instance Outputable CgRep where - ppr VoidArg = ptext (sLit "V_") ppr PtrArg = ptext (sLit "P_") ppr NonPtrArg = ptext (sLit "I_") ppr LongArg = ptext (sLit "L_") @@ -269,10 +262,8 @@ argMachRep NonPtrArg = bWord argMachRep LongArg = b64 argMachRep FloatArg = f32 argMachRep DoubleArg = f64 -argMachRep VoidArg = panic "argMachRep:VoidRep" primRepToCgRep :: PrimRep -> CgRep -primRepToCgRep VoidRep = VoidArg primRepToCgRep PtrRep = PtrArg primRepToCgRep IntRep = NonPtrArg primRepToCgRep WordRep = NonPtrArg @@ -282,14 +273,14 @@ primRepToCgRep AddrRep = NonPtrArg primRepToCgRep FloatRep = FloatArg primRepToCgRep DoubleRep = DoubleArg -idCgRep :: Id -> CgRep +idCgRep :: Id -> [CgRep] idCgRep x = typeCgRep . idType $ x -tyConCgRep :: TyCon -> CgRep -tyConCgRep = primRepToCgRep . tyConPrimRep +tyConCgRep :: TyCon -> [CgRep] +tyConCgRep = map primRepToCgRep . tyConPrimRep -typeCgRep :: Type -> CgRep -typeCgRep = primRepToCgRep . typePrimRep +typeCgRep :: Type -> [CgRep] +typeCgRep = map primRepToCgRep . typePrimRep \end{code} Whether or not the thing is a pointer that the garbage-collector @@ -305,14 +296,6 @@ isFollowableArg :: CgRep -> Bool -- True <=> points to a heap object isFollowableArg PtrArg = True isFollowableArg _ = False -isVoidArg :: CgRep -> Bool -isVoidArg VoidArg = True -isVoidArg _ = False - -nonVoidArg :: CgRep -> Bool -nonVoidArg VoidArg = False -nonVoidArg _ = True - -- isFloatingArg is used to distinguish @Double@ and @Float@ which -- cause inadvertent numeric conversions if you aren't jolly careful. -- See codeGen/CgCon:cgTopRhsCon. @@ -343,13 +326,11 @@ separateByPtrFollowness things cgRepSizeB :: CgRep -> ByteOff cgRepSizeB DoubleArg = dOUBLE_SIZE cgRepSizeB LongArg = wORD64_SIZE -cgRepSizeB VoidArg = 0 cgRepSizeB _ = wORD_SIZE cgRepSizeW :: CgRep -> ByteOff cgRepSizeW DoubleArg = dOUBLE_SIZE `quot` wORD_SIZE cgRepSizeW LongArg = wORD64_SIZE `quot` wORD_SIZE -cgRepSizeW VoidArg = 0 cgRepSizeW _ = 1 retAddrSizeW :: WordOff @@ -404,7 +385,7 @@ mkSelectorLFInfo id offset updatable = LFThunk NotTopLevel False updatable (SelectorThunk offset) (might_be_a_function (idType id)) -mkApLFInfo :: Id -> UpdateFlag -> Int -> LambdaFormInfo +mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo mkApLFInfo id upd_flag arity = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity) (might_be_a_function (idType id)) @@ -413,17 +394,36 @@ mkApLFInfo id upd_flag arity Miscellaneous LF-infos. \begin{code} -mkLFArgument :: Id -> LambdaFormInfo -mkLFArgument id = LFUnknown (might_be_a_function (idType id)) +mkLFArgument :: Type -> [LambdaFormInfo] +mkLFArgument ty + | [] <- typePrimRep ty + = [] + | Just (tc, tys) <- splitTyConApp_maybe ty + , isUnboxedTupleTyCon tc + = concatMap mkLFArgument tys + | otherwise + = [LFUnknown (might_be_a_function ty)] mkLFLetNoEscape :: Int -> LambdaFormInfo mkLFLetNoEscape = LFLetNoEscape -mkLFImported :: Id -> LambdaFormInfo +-- Returns Nothing if the imported Id has void representation +mkLFImported :: Id -> Maybe LambdaFormInfo mkLFImported id - = case idArity id of - n | n > 0 -> LFReEntrant TopLevel n True (panic "arg_descr") -- n > 0 - _ -> mkLFArgument id -- Not sure of exact arity + | Just con <- isDataConWorkId_maybe id + , isNullaryRepDataCon con + = Just $ LFCon con -- An imported nullary constructor + -- We assume that the constructor is evaluated so that + -- the id really does point directly to the constructor + + | idArity id > 0 + = Just $ LFReEntrant TopLevel (idArity id) True (panic "arg_descr") -- n > 0 + + | otherwise + = case mkLFArgument (idType id) of + [] -> Nothing + [lf] -> Just lf -- Not sure of exact arity + _ -> pprPanic "mkLFImported: unboxed tuple import?" (ppr id) \end{code} \begin{code} @@ -634,13 +634,13 @@ data CallMethod | DirectEntry -- Jump directly, with args in regs CLabel -- The code label - Int -- Its arity + Arity -- Its arity getCallMethod :: DynFlags - -> Name -- Function being applied - -> CafInfo -- Can it refer to CAF's? - -> LambdaFormInfo -- Its info - -> Int -- Number of available arguments + -> Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? + -> LambdaFormInfo -- Its info + -> Arity -- Number of available arguments, Nothing if thunk use (i.e. no StgArgs at all, not even a void one) -> CallMethod getCallMethod _ _ _ lf_info _ @@ -651,10 +651,13 @@ getCallMethod _ _ _ lf_info _ EnterIt getCallMethod _ name caf (LFReEntrant _ arity _ _) n_args - | n_args == 0 = ASSERT( arity /= 0 ) - ReturnIt -- No args at all - | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel name caf) arity + | n_args == 0 + = ASSERT( arity /= 0 ) + ReturnIt -- No args at all + | n_args < arity + = SlowCall -- Not enough args + | otherwise + = DirectEntry (enterIdLabel name caf) arity getCallMethod _ _ _ (LFCon con) n_args | opt_SccProfilingOn -- when profiling, we must always enter @@ -695,7 +698,7 @@ getCallMethod _ _ _ (LFUnknown True) _ = SlowCall -- Might be a function getCallMethod _ name _ (LFUnknown False) n_args - | n_args > 0 + | n_args > 0 = WARN( True, ppr name <+> ppr n_args ) SlowCall -- Note [Unsafe coerce complications] @@ -711,8 +714,10 @@ getCallMethod _ name _ (LFLetNoEscape 0) _ = JumpToIt (enterReturnPtLabel (nameUnique name)) getCallMethod _ name _ (LFLetNoEscape arity) n_args - | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity - | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity) + | n_args == arity + = DirectEntry (enterReturnPtLabel (nameUnique name)) arity + | otherwise + = pprPanic "let-no-escape: " (ppr name <+> ppr arity) blackHoleOnEntry :: ClosureInfo -> Bool @@ -911,11 +916,11 @@ isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con isConstrClosure_maybe _ = Nothing -closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo :: ClosureInfo -> Maybe (Arity, ArgDescr) closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info closureFunInfo _ = Nothing -lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo :: LambdaFormInfo -> Maybe (Arity, ArgDescr) lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) lfFunInfo _ = Nothing @@ -935,7 +940,7 @@ funTagLFInfo lf | otherwise = 0 -tagForArity :: Int -> Maybe Int +tagForArity :: Arity -> Maybe Int tagForArity i | i <= mAX_PTR_TAG = Just i | otherwise = Nothing diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index 7aa159844b..79e5c5d8af 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -253,7 +253,7 @@ cgDataCon data_con = do { let (tot_wds, -- #ptr_wds + #nonptr_wds ptr_wds, -- #ptr_wds - arg_things) = mkVirtConstrOffsets arg_reps + _) = mkVirtConstrOffsets arg_reps nonptr_wds = tot_wds - ptr_wds @@ -268,13 +268,13 @@ cgDataCon data_con = -- NB: We don't set CC when entering data (WDP 94/06) do { _ <- ticky_code ; ldvEnter (CmmReg nodeReg) - ; tickyReturnOldCon (length arg_things) + ; tickyReturnOldCon (length arg_reps) ; emitReturn [cmmOffsetB (CmmReg nodeReg) (tagForCon data_con)] } -- The case continuation code expects a tagged pointer - arg_reps :: [(PrimRep, Type)] - arg_reps = [(typePrimRep ty, ty) | ty <- dataConRepArgTys data_con] + arg_reps :: [(PrimRep, ())] + arg_reps = [(rep, ()) | ty <- dataConRepArgTys data_con, rep <- typePrimRep ty] -- Dynamic closure code for non-nullary constructors only ; whenC (not (isNullaryRepDataCon data_con)) diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 9bf57b1cb4..11e8d9e712 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -44,6 +44,8 @@ import CLabel import StgSyn import CostCentre import Id +import Type ( PrimRep ) +import Control.Arrow ( second ) import Control.Monad import Name import Module @@ -53,7 +55,7 @@ import BasicTypes import Constants import Outputable import FastString -import Maybes +import MonadUtils ( concatMapM ) import DynFlags import StaticFlags @@ -89,12 +91,8 @@ cgTopRhsClosure id ccs _ upd_flag srt args body = do -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) ; emitDataLits closure_label closure_rep - ; let fv_details :: [(NonVoid Id, VirtualHpOffset)] - (_, _, fv_details) = mkVirtHeapOffsets (isLFThunk lf_info) - (addIdReps []) - -- Don't drop the non-void args until the closure info has been made ; forkClosureBody (closureCodeBody True id closure_info ccs - (nonVoidIds args) (length args) body fv_details) + args body ([], [])) ; returnFC cg_id_info } @@ -162,14 +160,14 @@ cgRhs name (StgRhsCon cc con args) = buildDynCon name cc con args cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) - = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body + = mkRhsClosure name cc bi fvs upd_flag srt args body ------------------------------------------------------------------------ -- Non-constructor right hand sides ------------------------------------------------------------------------ mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo - -> [NonVoid Id] -- Free vars + -> [Id] -- Free vars -> UpdateFlag -> SRT -> [Id] -- Args -> StgExpr @@ -212,7 +210,7 @@ for semi-obvious reasons. ---------- Note [Selectors] ------------------ mkRhsClosure bndr cc bi - [NonVoid the_fv] -- Just one free var + [the_fv] -- Just one free var upd_flag -- Updatable thunk _srt [] -- A thunk @@ -221,9 +219,11 @@ mkRhsClosure bndr cc bi (AlgAlt _) [(DataAlt _, params, _use_mask, (StgApp selectee [{-no args-}]))]) - | the_fv == scrutinee -- Scrutinee is the only free variable - && maybeToBool maybe_offset -- Selectee is a component of the tuple - && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + | the_fv == scrutinee -- Scrutinee is the only free variable + , [_] <- idPrimRep selectee -- Selectee is unary (so guaranteed contiguous layout) + , Just the_offset <- maybe_offset -- Selectee is a component of the tuple + , let offset_into_int = the_offset - fixedHdrSize + , offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough = -- NOT TRUE: ASSERT(is_single_constructor) -- The simplifier may have statically determined that the single alternative -- is the only possible case and eliminated the others, even if there are @@ -232,15 +232,12 @@ mkRhsClosure bndr cc bi -- will evaluate to. -- -- srt is discarded; it must be empty - cgStdThunk bndr cc bi body lf_info [StgVarArg the_fv] + cgStdThunk bndr cc bi body (mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag)) + [StgVarArg the_fv] where - lf_info = mkSelectorLFInfo bndr offset_into_int - (isUpdatable upd_flag) - (_, _, params_w_offsets) = mkVirtConstrOffsets (addIdReps params) - -- Just want the layout - maybe_offset = assocMaybe params_w_offsets (NonVoid selectee) - Just the_offset = maybe_offset - offset_into_int = the_offset - fixedHdrSize + (_, _, params_w_offsets) = mkVirtConstrOffsets [(rep, param) | param <- params, rep <- idPrimRep param] + -- Just want the offset of the first and only PrimRep belonging to this Id + maybe_offset = assocMaybe params_w_offsets selectee ---------- Note [Ap thunks] ------------------ mkRhsClosure bndr cc bi @@ -251,7 +248,7 @@ mkRhsClosure bndr cc bi body@(StgApp fun_id args) | args `lengthIs` (arity-1) - && all (isGcPtrRep . idPrimRep . stripNV) fvs + && all (\fv -> case idPrimRep fv of [rep] -> isGcPtrRep rep; _ -> False) fvs && isUpdatable upd_flag && arity <= mAX_SPEC_AP_SIZE && not opt_SccProfilingOn -- not when profiling: we don't want to @@ -279,8 +276,8 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body -- Node points to it... ; let is_elem = isIn "cgRhsClosure" - bndr_is_a_fv = (NonVoid bndr) `is_elem` fvs - reduced_fvs | bndr_is_a_fv = fvs `minusList` [NonVoid bndr] + bndr_is_a_fv = bndr `is_elem` fvs + reduced_fvs | bndr_is_a_fv = filter (/= bndr) fvs | otherwise = fvs @@ -288,12 +285,13 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args ; mod_name <- getModuleName ; c_srt <- getSRTInfo srt + ; fvs_regss <- idsToRegs reduced_fvs ; let name = idName bndr descr = closureDescription mod_name name - fv_details :: [(NonVoid Id, VirtualHpOffset)] - (tot_wds, ptr_wds, fv_details) + regs_offsets :: [(LocalReg, VirtualHpOffset)] + (tot_wds, ptr_wds, regs_offsets) = mkVirtHeapOffsets (isLFThunk lf_info) - (addIdReps (map stripNV reduced_fvs)) + (concatMap snd fvs_regss) closure_info = mkClosureInfo False -- Not static bndr lf_info tot_wds ptr_wds c_srt descr @@ -303,24 +301,24 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere -- (b) ignore Sequel from context; use empty Sequel -- And compile the body - closureCodeBody False bndr closure_info cc (nonVoidIds args) - (length args) body fv_details + closureCodeBody False bndr closure_info cc args body (map (second (map snd)) fvs_regss, regs_offsets) -- BUILD THE OBJECT -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body ; let use_cc = curCCS; blame_cc = curCCS ; emit (mkComment $ mkFastString "calling allocDynClosure") - ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off) ; let info_tbl = mkCmmInfo closure_info - ; (tmp, init) <- allocDynClosure info_tbl lf_info use_cc blame_cc - (map toVarArg fv_details) + ; fvs_exprs <- concatMapM (liftM idInfoToAmodes . getCgIdInfo) reduced_fvs + ; (tmp, init) <- allocDynClosureCmm info_tbl lf_info use_cc blame_cc + (zipWithEqual "mkRhsClosure" (\expr (_, offset) -> (expr, offset)) fvs_exprs regs_offsets) -- RETURN ; regIdInfo bndr lf_info tmp init } --- Use with care; if used inappropriately, it could break invariants. -stripNV :: NonVoid a -> a -stripNV (NonVoid a) = a +idsToRegs :: [Id] -> FCode [(Id, [(PrimRep, LocalReg)])] +idsToRegs ids = forM ids $ \id -> do + regs <- idToReg id + return (id, zipEqual "idsToRegs" (idPrimRep id) regs) ------------------------- cgStdThunk @@ -336,8 +334,9 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload = do -- AHA! A STANDARD-FORM THUNK { -- LAY OUT THE OBJECT mod_name <- getModuleName + ; payload_reps <- concatMapM addArgReps payload ; let (tot_wds, ptr_wds, payload_w_offsets) - = mkVirtHeapOffsets (isLFThunk lf_info) (addArgReps payload) + = mkVirtHeapOffsets (isLFThunk lf_info) payload_reps descr = closureDescription mod_name (idName bndr) closure_info = mkClosureInfo False -- Not static @@ -350,22 +349,22 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload -- BUILD THE OBJECT ; let info_tbl = mkCmmInfo closure_info - ; (tmp, init) <- allocDynClosure info_tbl lf_info - use_cc blame_cc payload_w_offsets + ; (tmp, init) <- allocDynClosureCmm info_tbl lf_info + use_cc blame_cc payload_w_offsets -- RETURN ; regIdInfo bndr lf_info tmp init } mkClosureLFInfo :: Id -- The binder -> TopLevelFlag -- True of top level - -> [NonVoid Id] -- Free vars + -> [Id] -- Free vars -> UpdateFlag -- Update flag -> [Id] -- Args -> FCode LambdaFormInfo mkClosureLFInfo bndr top fvs upd_flag args - | null args = return (mkLFThunk (idType bndr) top (map stripNV fvs) upd_flag) + | null args = return (mkLFThunk (idType bndr) top fvs upd_flag) | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args - ; return (mkLFReEntrant top (map stripNV fvs) args arg_descr) } + ; return (mkLFReEntrant top fvs args arg_descr) } ------------------------------------------------------------------------ @@ -376,10 +375,9 @@ closureCodeBody :: Bool -- whether this is a top-level binding -> Id -- the closure's name -> ClosureInfo -- Lots of information about this closure -> CostCentreStack -- Optional cost centre attached to closure - -> [NonVoid Id] -- incoming args to the closure - -> Int -- arity, including void args + -> [Id] -- incoming args to the closure -> StgExpr - -> [(NonVoid Id, VirtualHpOffset)] -- the closure's free vars + -> ([(Id, [LocalReg])], [(LocalReg, VirtualHpOffset)]) -- the closure's free vars -> FCode () {- There are two main cases for the code for closures. @@ -395,15 +393,15 @@ closureCodeBody :: Bool -- whether this is a top-level binding argSatisfactionCheck (by calling fetchAndReschedule). There info if Node points to closure is available. -- HWL -} -closureCodeBody top_lvl bndr cl_info cc args arity body fv_details - | length args == 0 -- No args i.e. thunk +closureCodeBody top_lvl bndr cl_info cc args body fv_details + | null args -- No args i.e. thunk = emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $ - \(_, node, _) -> thunkCode cl_info fv_details cc node arity body + \(_, node, _) -> thunkCode cl_info fv_details cc node body where lf_info = closureLFInfo cl_info info_tbl = mkCmmInfo cl_info -closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details +closureCodeBody top_lvl bndr cl_info _cc args body (fv_regs, regs_offsets) = ASSERT( length args > 0 ) do { -- Allocate the global ticky counter, -- and establish the ticky-counter @@ -411,7 +409,7 @@ closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details ; dflags <- getDynFlags ; let platform = targetPlatform dflags ticky_ctr_lbl = closureRednCountsLabel platform cl_info - ; emitTickyCounter cl_info (map stripNV args) + ; emitTickyCounter cl_info args ; setTickyCtrLabel ticky_ctr_lbl $ do ; let @@ -432,20 +430,17 @@ closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details ; granYield arg_regs node_points -- Main payload - ; entryHeapCheck cl_info offset node' arity arg_regs $ do - { fv_bindings <- mapM bind_fv fv_details + ; entryHeapCheck cl_info offset node' False{- not a thunk -} arg_regs $ do + { -- A function closure pointer may be tagged, so we + -- must take it into account when accessing the free variables. + ; mapM_ (uncurry rebindToReg) fv_regs -- Load free vars out of closure *after* -- heap check, to reduce live vars over check - ; if node_points then load_fvs node lf_info fv_bindings + ; if node_points then load_fvs node lf_info regs_offsets else return () ; cgExpr body }} } --- A function closure pointer may be tagged, so we --- must take it into account when accessing the free variables. -bind_fv :: (NonVoid Id, VirtualHpOffset) -> FCode (LocalReg, WordOff) -bind_fv (id, off) = do { reg <- rebindToReg id; return (reg, off) } - load_fvs :: LocalReg -> LambdaFormInfo -> [(LocalReg, WordOff)] -> FCode () load_fvs node lf_info = mapCs (\ (reg, off) -> emit $ mkTaggedObjectLoad reg node off tag) @@ -479,9 +474,9 @@ mkSlowEntryCode cl_info arg_regs -- function closure is already in `Node' | otherwise = return () ----------------------------------------- -thunkCode :: ClosureInfo -> [(NonVoid Id, VirtualHpOffset)] -> CostCentreStack - -> LocalReg -> Int -> StgExpr -> FCode () -thunkCode cl_info fv_details _cc node arity body +thunkCode :: ClosureInfo -> ([(Id, [LocalReg])], [(LocalReg, VirtualHpOffset)]) -> CostCentreStack + -> LocalReg -> StgExpr -> FCode () +thunkCode cl_info (fv_regs, regs_offsets) _cc node body = do { let node_points = nodeMustPointToIt (closureLFInfo cl_info) node' = if node_points then Just node else Nothing ; tickyEnterThunk cl_info @@ -489,7 +484,7 @@ thunkCode cl_info fv_details _cc node arity body ; granThunk node_points -- Heap overflow check - ; entryHeapCheck cl_info 0 node' arity [] $ do + ; entryHeapCheck cl_info 0 node' True{- Is a thunk -} [] $ do { -- Overwrite with black hole if necessary -- but *after* the heap-overflow check ; whenC (blackHoleOnEntry cl_info && node_points) @@ -503,8 +498,8 @@ thunkCode cl_info fv_details _cc node arity body -- subsumed by this enclosing cc do { enterCostCentreThunk (CmmReg nodeReg) ; let lf_info = closureLFInfo cl_info - ; fv_bindings <- mapM bind_fv fv_details - ; load_fvs node lf_info fv_bindings + ; mapM_ (uncurry rebindToReg) fv_regs + ; load_fvs node lf_info regs_offsets ; cgExpr body }}} diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 5c0741a65e..f15b5a60fe 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -21,7 +21,7 @@ module StgCmmClosure ( DynTag, tagForCon, isSmallFamily, ConTagZ, dataConTagZ, - isVoidRep, isGcPtrRep, addIdReps, addArgReps, + isGcPtrRep, addIdReps, argPrimRep, -- * LambdaFormInfo @@ -97,19 +97,12 @@ import DynFlags -- Why are these here? -addIdReps :: [Id] -> [(PrimRep, Id)] +addIdReps :: [Id] -> [([PrimRep], Id)] addIdReps ids = [(idPrimRep id, id) | id <- ids] -addArgReps :: [StgArg] -> [(PrimRep, StgArg)] -addArgReps args = [(argPrimRep arg, arg) | arg <- args] - -argPrimRep :: StgArg -> PrimRep +argPrimRep :: StgArg -> [PrimRep] argPrimRep arg = typePrimRep (stgArgType arg) -isVoidRep :: PrimRep -> Bool -isVoidRep VoidRep = True -isVoidRep _other = False - isGcPtrRep :: PrimRep -> Bool isGcPtrRep PtrRep = True isGcPtrRep _ = False @@ -127,7 +120,7 @@ isGcPtrRep _ = False data LambdaFormInfo = LFReEntrant -- Reentrant closure (a function) TopLevelFlag -- True if top level - !Int -- Arity. Invariant: always > 0 + !Arity -- Arity. INVARIANT: > 0 !Bool -- True <=> no fvs ArgDescr -- Argument descriptor (should really be in ClosureInfo) @@ -188,20 +181,20 @@ data StandardFormInfo -- The code for the thunk just pushes x2..xn on the stack and enters x1. -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled -- in the RTS to save space. - Int -- Arity, n + Arity -- Arity, n ------------------------------------------------------ -- Building LambdaFormInfo ------------------------------------------------------ -mkLFArgument :: Id -> LambdaFormInfo -mkLFArgument id - | isUnLiftedType ty = LFUnLifted - | might_be_a_function ty = LFUnknown True - | otherwise = LFUnknown False - where - ty = idType id +mkLFArgument :: Type -> [LambdaFormInfo] +mkLFArgument ty + | [] <- typePrimRep ty = [] + | Just (tc, tys) <- splitTyConApp_maybe ty + , isUnboxedTupleTyCon tc = concatMap mkLFArgument tys + | isUnLiftedType ty = [LFUnLifted] + | otherwise = [LFUnknown (might_be_a_function ty)] ------------- mkLFLetNoEscape :: LambdaFormInfo @@ -252,21 +245,24 @@ mkApLFInfo id upd_flag arity (might_be_a_function (idType id)) ------------- -mkLFImported :: Id -> LambdaFormInfo + +-- Returns Nothing info for an Id with Void representation +mkLFImported :: Id -> Maybe LambdaFormInfo mkLFImported id | Just con <- isDataConWorkId_maybe id , isNullaryRepDataCon con - = LFCon con -- An imported nullary constructor + = Just $ LFCon con -- An imported nullary constructor -- We assume that the constructor is evaluated so that -- the id really does point directly to the constructor - | arity > 0 - = LFReEntrant TopLevel arity True (panic "arg_descr") + | idArity id > 0 + = Just $ LFReEntrant TopLevel (idArity id) True (panic "arg_descr") | otherwise - = mkLFArgument id -- Not sure of exact arity - where - arity = idArity id + = case mkLFArgument (idType id) of + [] -> Nothing + [lf] -> Just lf -- Not sure of exact arity + _ -> pprPanic "mkLFImported: unboxed-tuple import?" (ppr id) ------------ mkLFBlackHole :: LambdaFormInfo @@ -309,7 +305,7 @@ tagForCon con con_tag = dataConTagZ con fam_size = tyConFamilySize (dataConTyCon con) -tagForArity :: Int -> DynTag +tagForArity :: Arity -> DynTag tagForArity arity | isSmallFamily arity = arity | otherwise = 0 @@ -458,13 +454,13 @@ data CallMethod | DirectEntry -- Jump directly, with args in regs CLabel -- The code label - Int -- Its arity + Arity -- Its arity getCallMethod :: DynFlags - -> Name -- Function being applied - -> CafInfo -- Can it refer to CAF's? - -> LambdaFormInfo -- Its info - -> Int -- Number of available arguments + -> Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? + -> LambdaFormInfo -- Its info + -> Arity -- Number of available arguments -> CallMethod getCallMethod _ _name _ lf_info _n_args @@ -475,10 +471,12 @@ getCallMethod _ _name _ lf_info _n_args EnterIt getCallMethod _ name caf (LFReEntrant _ arity _ _) n_args - | n_args == 0 = ASSERT( arity /= 0 ) - ReturnIt -- No args at all - | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel name caf) arity + | n_args == 0 + = ReturnIt -- No args at all + | n_args < arity + = SlowCall -- Not enough args + | otherwise + = DirectEntry (enterIdLabel name caf) arity getCallMethod _ _name _ LFUnLifted n_args = ASSERT( n_args == 0 ) ReturnIt @@ -513,8 +511,8 @@ getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_arg getCallMethod _ _name _ (LFUnknown True) _n_args = SlowCall -- might be a function -getCallMethod _ name _ (LFUnknown False) n_args - = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) +getCallMethod _ name _ (LFUnknown False) _n_args + = ASSERT2 ( _n_args == 0, ppr name <+> ppr _n_args ) EnterIt -- Not a function getCallMethod _ _name _ LFBlackHole _n_args @@ -744,10 +742,10 @@ closureReEntrant :: ClosureInfo -> Bool closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True closureReEntrant _ = False -closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo :: ClosureInfo -> Maybe (Arity, ArgDescr) closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info -lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo :: LambdaFormInfo -> Maybe (Arity, ArgDescr) lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) lfFunInfo _ = Nothing diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index e17ac4fd32..25b9b4c975 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -41,7 +41,8 @@ import PrelInfo import Outputable import Platform import StaticFlags -import Util ( lengthIs ) +import MonadUtils +import Util ( lengthIs, zipEqual ) import Control.Monad import Data.Char @@ -65,6 +66,7 @@ cgTopRhsCon id con args ; ASSERT( args `lengthIs` dataConRepArity con ) return () -- LAY IT OUT + ; args_reps <- concatMapM addArgReps args ; let name = idName id caffy = idCafInfo id -- any stgArgHasCafRefs args @@ -72,7 +74,7 @@ cgTopRhsCon id con args (tot_wds, -- #ptr_wds + #nonptr_wds ptr_wds, -- #ptr_wds - nv_args_w_offsets) = mkVirtConstrOffsets (addArgReps args) + nv_args_w_offsets) = mkVirtConstrOffsets args_reps nonptr_wds = tot_wds - ptr_wds @@ -81,14 +83,13 @@ cgTopRhsCon id con args -- needs to poke around inside it. info_tbl = mkDataConInfoTable con True ptr_wds nonptr_wds - get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg - ; return lit } - - ; payload <- mapM get_lit nv_args_w_offsets + payload = flip map nv_args_w_offsets $ \(cmm, _offset) -> case cmm of + CmmLit lit -> lit + _ -> panic "cgTopRhsCon" -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs -- NB2: all the amodes should be Lits! - ; let closure_rep = mkStaticClosureFields + closure_rep = mkStaticClosureFields info_tbl dontCareCCS -- Because it's static data caffy -- Has CAF refs @@ -204,13 +205,14 @@ buildDynCon' platform binder _cc con [arg] -------- buildDynCon': the general case ----------- buildDynCon' _ binder ccs con args - = do { let (tot_wds, ptr_wds, args_w_offsets) - = mkVirtConstrOffsets (addArgReps args) + = do { args_reps <- concatMapM addArgReps args + ; let (tot_wds, ptr_wds, args_w_offsets) + = mkVirtConstrOffsets args_reps -- No void args in args_w_offsets nonptr_wds = tot_wds - ptr_wds info_tbl = mkDataConInfoTable con False ptr_wds nonptr_wds - ; (tmp, init) <- allocDynClosure info_tbl lf_info - use_cc blame_cc args_w_offsets + ; (tmp, init) <- allocDynClosureCmm info_tbl lf_info + use_cc blame_cc args_w_offsets ; regIdInfo binder lf_info tmp init } where lf_info = mkConLFInfo con @@ -233,18 +235,19 @@ bindConArgs :: AltCon -> LocalReg -> [Id] -> FCode [LocalReg] -- found a con bindConArgs (DataAlt con) base args = ASSERT(not (isUnboxedTupleCon con)) - mapM bind_arg args_w_offsets + do { args_regs <- mapM (\id -> liftM ((,) id) $ idToReg id) args + ; let (_, _, regs_w_offsets) = mkVirtConstrOffsets [it | (arg, regs) <- args_regs, it <- zipEqual "bindConArgs" (idPrimRep arg) regs] + ; mapM_ initialise_reg regs_w_offsets + -- The binding below forces the masking out of the tag bits + -- when accessing the constructor field. + ; mapM_ (uncurry bindArgToReg) args_regs + ; return (concatMap snd args_regs) } where - (_, _, args_w_offsets) = mkVirtConstrOffsets (addIdReps args) - tag = tagForCon con - -- The binding below forces the masking out of the tag bits - -- when accessing the constructor field. - bind_arg :: (NonVoid Id, VirtualHpOffset) -> FCode LocalReg - bind_arg (arg, offset) - = do { emit $ mkTaggedObjectLoad (idToReg arg) base offset tag - ; bindArgToReg arg } + initialise_reg :: (LocalReg, VirtualHpOffset) -> FCode () + initialise_reg (reg, offset) + = emit $ mkTaggedObjectLoad reg base offset tag bindConArgs _other_con _base args = ASSERT( null args ) return [] diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index d8a7061eec..5a159c4a35 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -16,25 +16,23 @@ module StgCmmEnv ( CgIdInfo, - cgIdInfoId, cgIdInfoLF, + cgIdInfoId, cgIdInfoElems, cgIdInfoSingleElem, + cgIdElemInfoLF, litIdInfo, lneIdInfo, regIdInfo, - idInfoToAmode, - - NonVoid(..), isVoidId, nonVoidIds, + idInfoToAmodes, idElemInfoToAmode, addBindC, addBindsC, bindArgsToRegs, bindToReg, rebindToReg, bindArgToReg, idToReg, - getArgAmode, getNonVoidArgAmodes, + addArgReps, getArgAmodes, getCgIdInfo, maybeLetNoEscape, ) where #include "HsVersions.h" -import TyCon import StgCmmMonad import StgCmmUtils import StgCmmClosure @@ -45,7 +43,9 @@ import BlockId import CmmExpr import CmmUtils import MkGraph (CmmAGraph, mkAssign, (<*>)) +import UniqSupply (uniqsFromSupply) import FastString +import Type (PrimRep) import Id import VarEnv import Control.Monad @@ -53,48 +53,43 @@ import Name import StgSyn import DynFlags import Outputable - -------------------------------------- --- Non-void types -------------------------------------- --- We frequently need the invariant that an Id or a an argument --- is of a non-void type. This type is a witness to the invariant. - -newtype NonVoid a = NonVoid a - deriving (Eq, Show) - -instance (Outputable a) => Outputable (NonVoid a) where - ppr (NonVoid a) = ppr a - -isVoidId :: Id -> Bool -isVoidId = isVoidRep . idPrimRep - -nonVoidIds :: [Id] -> [NonVoid Id] -nonVoidIds ids = [NonVoid id | id <- ids, not (isVoidRep (idPrimRep id))] +import Util ------------------------------------- -- Manipulating CgIdInfo ------------------------------------- +mkCgIdElemInfo :: LambdaFormInfo -> CmmExpr -> CgIdElemInfo +mkCgIdElemInfo lf expr + = CgIdElemInfo { cg_lf = lf + , cg_loc = CmmLoc expr, + cg_tag = lfDynTag lf } + mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo mkCgIdInfo id lf expr - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = CmmLoc expr, - cg_tag = lfDynTag lf } + = CgIdInfo { cg_id = id + , cg_elems = [mkCgIdElemInfo lf expr] + } +-- Used for building info for external names (which are always lifted) +-- and closures/constructors (which are always represented as a single pointer) litIdInfo :: Id -> LambdaFormInfo -> CmmLit -> CgIdInfo litIdInfo id lf lit - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = CmmLoc (addDynTag (CmmLit lit) tag) - , cg_tag = tag } + = CgIdInfo { cg_id = id + , cg_elems = [CgIdElemInfo { cg_lf = lf + , cg_loc = CmmLoc (addDynTag (CmmLit lit) tag) + , cg_tag = tag }] + } where tag = lfDynTag lf lneIdInfo :: Id -> [LocalReg] -> CgIdInfo lneIdInfo id regs - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = LneLoc blk_id regs - , cg_tag = lfDynTag lf } + = CgIdInfo { cg_id = id + , cg_elems = [CgIdElemInfo { cg_lf = lf + , cg_loc = LneLoc blk_id regs + , cg_tag = lfDynTag lf }] + } where lf = mkLFLetNoEscape blk_id = mkBlockId (idUnique id) @@ -105,18 +100,21 @@ lneIdInfo id regs -- a new register in order to keep single-assignment and help out the -- inliner. -- EZY regIdInfo :: Id -> LambdaFormInfo -> LocalReg -> CmmAGraph -> FCode (CgIdInfo, CmmAGraph) -regIdInfo id lf_info reg init +regIdInfo id lf_info reg init = do { reg' <- newTemp (localRegType reg) ; let init' = init <*> mkAssign (CmmLocal reg') (addDynTag (CmmReg (CmmLocal reg)) (lfDynTag lf_info)) ; return (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg')), init') } -idInfoToAmode :: CgIdInfo -> CmmExpr +idElemInfoToAmode :: CgIdElemInfo -> CmmExpr -- Returns a CmmExpr for the *tagged* pointer -idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e }) = e -idInfoToAmode cg_info - = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc +idElemInfoToAmode (CgIdElemInfo { cg_loc = CmmLoc e }) = e +idElemInfoToAmode _cg_info + = panic "idElemInfoToAmode: LneLoc" + +idInfoToAmodes :: CgIdInfo -> [CmmExpr] +idInfoToAmodes = map idElemInfoToAmode . cg_elems addDynTag :: CmmExpr -> DynTag -> CmmExpr -- A tag adds a byte offset to the pointer @@ -125,12 +123,21 @@ addDynTag expr tag = cmmOffsetB expr tag cgIdInfoId :: CgIdInfo -> Id cgIdInfoId = cg_id -cgIdInfoLF :: CgIdInfo -> LambdaFormInfo -cgIdInfoLF = cg_lf +cgIdInfoElems :: CgIdInfo -> [CgIdElemInfo] +cgIdInfoElems = cg_elems -maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg]) -maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) -maybeLetNoEscape _other = Nothing +-- Used for where the caller knows there will only be one alternative (commonly +-- because it knows the info is for a thunk, closure or some data) +cgIdInfoSingleElem :: CgIdInfo -> CgIdElemInfo +cgIdInfoSingleElem (CgIdInfo { cg_elems = [elem_info] }) = elem_info +cgIdInfoSingleElem _ = panic "cgIdInfoSingleElem" + +cgIdElemInfoLF :: CgIdElemInfo -> LambdaFormInfo +cgIdElemInfoLF = cg_lf + +maybeLetNoEscape :: CgIdElemInfo -> Maybe (BlockId, [LocalReg]) +maybeLetNoEscape (CgIdElemInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) +maybeLetNoEscape _other = Nothing @@ -141,6 +148,18 @@ maybeLetNoEscape _other = Nothing -- modifying(modifyBindC) and looking up (getCgIdInfo) bindings. --------------------------------------------------------- +-- Note [CgIdInfo knot] +-- ~~~~~~~~~~~~~~~~~~~~ +-- +-- We can't be too strict in the CgIdInfo, because in e.g. letrecs the CgIdInfo +-- is knot-tied. A loop I build in practice was +-- cgExpr LetRec -> cgRhs StgRhsCon -> buildDynCon' +-- from code like (let xs = (:) y xs in xs) because we fixpoint the CgIdInfo for +-- xs and buildDynCon' is strict in the length of the CgIdElemInfo list. +-- +-- To work around this we try to be yield the length of the CgIdInfo element list +-- lazily by lazily zipping it with the idCgReps. + addBindC :: Id -> CgIdInfo -> FCode () addBindC name stuff_to_bind = do binds <- getBinds @@ -154,9 +173,16 @@ addBindsC new_bindings = do new_bindings setBinds new_binds +-- See: Note [CgIdInfo knot] +etaCgIdInfo :: Id -> CgIdInfo -> CgIdInfo +etaCgIdInfo id ~(CgIdInfo { cg_id = lazy_id, cg_elems = elems }) + = CgIdInfo { cg_id = lazy_id + , cg_elems = zipLazyWith (showPpr (id, idPrimRep id, length elems)) (\_ elem -> elem) (idPrimRep id) elems } + getCgIdInfo :: Id -> FCode CgIdInfo getCgIdInfo id - = do { -- Try local bindings first + = liftM (etaCgIdInfo id) $ + do { -- Try local bindings first ; local_binds <- getBinds ; case lookupVarEnv local_binds id of { Just info -> return info ; @@ -173,8 +199,11 @@ getCgIdInfo id name = idName id in if isExternalName name then do - let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id) - return (litIdInfo id (mkLFImported id) ext_lbl) + { let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id) + ; return $ case mkLFImported id of + Just lf_info -> litIdInfo id lf_info ext_lbl + Nothing -> CgIdInfo id [] } + else -- Bug cgLookupPanic id @@ -197,48 +226,41 @@ cgLookupPanic id -------------------- -getArgAmode :: NonVoid StgArg -> FCode CmmExpr -getArgAmode (NonVoid (StgVarArg var)) = - do { info <- getCgIdInfo var; return (idInfoToAmode info) } -getArgAmode (NonVoid (StgLitArg lit)) = liftM CmmLit $ cgLit lit -getArgAmode (NonVoid (StgTypeArg _)) = panic "getArgAmode: type arg" - -getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr] --- NB: Filters out void args, --- so the result list may be shorter than the argument list -getNonVoidArgAmodes [] = return [] -getNonVoidArgAmodes (arg:args) - | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args - | otherwise = do { amode <- getArgAmode (NonVoid arg) - ; amodes <- getNonVoidArgAmodes args - ; return ( amode : amodes ) } +getArgAmodes :: StgArg -> FCode [CmmExpr] +getArgAmodes (StgVarArg var) = + do { info <- getCgIdInfo var; return (idInfoToAmodes info) } +getArgAmodes (StgLitArg lit) = liftM (return . CmmLit) $ cgLit lit +getArgAmodes (StgTypeArg _) = return [] +addArgReps :: StgArg -> FCode [(PrimRep, CmmExpr)] +addArgReps arg = do + exprs <- getArgAmodes arg + return (zipEqual "addArgReps" (argPrimRep arg) exprs) ------------------------------------------------------------------------ -- Interface functions for binding and re-binding names ------------------------------------------------------------------------ -bindToReg :: NonVoid Id -> LambdaFormInfo -> FCode LocalReg +bindToReg :: Id -> [(LocalReg, LambdaFormInfo)] -> FCode () -- Bind an Id to a fresh LocalReg -bindToReg nvid@(NonVoid id) lf_info - = do { let reg = idToReg nvid - ; addBindC id (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg))) - ; return reg } +bindToReg id regs_lf_infos + = do { addBindC id (CgIdInfo { cg_id = id + , cg_elems = map (\(reg, lf_info) -> mkCgIdElemInfo lf_info (CmmReg (CmmLocal reg))) regs_lf_infos }) } -rebindToReg :: NonVoid Id -> FCode LocalReg +rebindToReg :: Id -> [LocalReg] -> FCode () -- Like bindToReg, but the Id is already in scope, so -- get its LF info from the envt -rebindToReg nvid@(NonVoid id) +rebindToReg id regs = do { info <- getCgIdInfo id - ; bindToReg nvid (cgIdInfoLF info) } + ; bindToReg id (zipEqual "rebindToReg" regs (map cgIdElemInfoLF (cg_elems info))) } -bindArgToReg :: NonVoid Id -> FCode LocalReg -bindArgToReg nvid@(NonVoid id) = bindToReg nvid (mkLFArgument id) +bindArgToReg :: Id -> [LocalReg] -> FCode () +bindArgToReg id regs = bindToReg id (zipEqual "bindArgToReg" regs (mkLFArgument (idType id))) -bindArgsToRegs :: [NonVoid Id] -> FCode [LocalReg] -bindArgsToRegs args = mapM bindArgToReg args +bindArgsToRegs :: [(Id, [LocalReg])] -> FCode () +bindArgsToRegs args = mapM_ (uncurry bindArgToReg) args -idToReg :: NonVoid Id -> LocalReg +idToReg :: Id -> FCode [LocalReg] -- Make a register from an Id, typically a function argument, -- free variable, or case binder -- @@ -246,8 +268,6 @@ idToReg :: NonVoid Id -> LocalReg -- -- By now the Ids should be uniquely named; else one would worry -- about accidental collision -idToReg (NonVoid id) = LocalReg (idUnique id) - (case idPrimRep id of VoidRep -> pprPanic "idToReg" (ppr id) - _ -> primRepCmmType (idPrimRep id)) - - +idToReg id = do + us <- newUniqSupply + return $ zipWith LocalReg (idUnique id : uniqsFromSupply us) (map primRepCmmType (idPrimRep id)) diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 5ea935984d..2dd254a734 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -45,8 +45,9 @@ import PrimOp import TyCon import Type import CostCentre ( CostCentreStack, currentCCS ) -import Control.Monad (when) +import Control.Monad (when, zipWithM_) import Maybes +import MonadUtils (concatMapM) import Util import FastString import Outputable @@ -129,7 +130,7 @@ cgLetNoEscapeRhs cgLetNoEscapeRhs join_id local_cc bndr rhs = do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs - ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info + ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape (cgIdInfoSingleElem info) ; emit (outOfLine $ mkLabel bid <*> rhs_body <*> mkBranch join_id) ; return info } @@ -140,7 +141,7 @@ cgLetNoEscapeRhsBody -> StgRhs -> FCode CgIdInfo cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body) - = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body + = cgLetNoEscapeClosure bndr local_cc cc args body cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args) = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args) -- For a constructor RHS we want to generate a single chunk of @@ -153,19 +154,19 @@ cgLetNoEscapeClosure :: Id -- binder -> Maybe LocalReg -- Slot for saved current cost centre -> CostCentreStack -- XXX: *** NOT USED *** why not? - -> [NonVoid Id] -- Args (as in \ args -> body) + -> [Id] -- Args (as in \ args -> body) -> StgExpr -- Body (as in above) -> FCode CgIdInfo -cgLetNoEscapeClosure bndr cc_slot _unused_cc args body - = do { arg_regs <- forkProc $ do - { restoreCurrentCostCentre cc_slot - ; arg_regs <- bindArgsToRegs args - ; altHeapCheck arg_regs (cgExpr body) - -- Using altHeapCheck just reduces - -- instructions to save on stack - ; return arg_regs } - ; return $ lneIdInfo bndr arg_regs} +cgLetNoEscapeClosure bndr cc_slot _unused_cc args body = forkProc $ + do { restoreCurrentCostCentre cc_slot + ; arg_regss <- mapM idToReg args + ; bindArgsToRegs (zipEqual "cgLetNoEscapeClosure" args arg_regss) + ; let arg_regs = concat arg_regss + ; altHeapCheck arg_regs (cgExpr body) + -- Using altHeapCheck just reduces + -- instructions to save on stack + ; return (lneIdInfo bndr arg_regs) } ------------------------------------------------------------------------ @@ -319,16 +320,18 @@ cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts do { when (not reps_compatible) $ panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" ; v_info <- getCgIdInfo v - ; emit (mkAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)) - ; _ <- bindArgsToRegs [NonVoid bndr] - ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts } + ; regs <- idToReg bndr + ; zipWithM_ (\reg expr -> emit (mkAssign (CmmLocal reg) expr)) regs (idInfoToAmodes v_info) + ; bindArgToReg bndr regs + ; cgAlts NoGcInAlts regs bndr alt_type alts } where reps_compatible = idPrimRep v == idPrimRep bndr cgCase scrut@(StgApp v []) _ _ (PrimAlt _) _ = -- fail at run-time, not compile-time do { mb_cc <- maybeSaveCostCentre True - ; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut) + ; regs <- idToReg v + ; withSequel (AssignTo regs False) (cgExpr scrut) ; restoreCurrentCostCentre mb_cc ; emit $ mkComment $ mkFastString "should be unreachable code" ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)} @@ -353,7 +356,8 @@ cgCase scrut bndr srt alt_type alts = -- the general case do { up_hp_usg <- getVirtHp -- Upstream heap usage ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts - alt_regs = map idToReg ret_bndrs + ; alts_regss <- mapM idToReg ret_bndrs + ; let alt_regs = concat alts_regss simple_scrut = isSimpleScrut scrut alt_type gcInAlts | not simple_scrut = True | isSingleton alts = False @@ -366,8 +370,8 @@ cgCase scrut bndr srt alt_type alts ; restoreCurrentCostCentre mb_cc -- JD: We need Note: [Better Alt Heap Checks] - ; _ <- bindArgsToRegs ret_bndrs - ; cgAlts gc_plan (NonVoid bndr) alt_type alts } + ; bindArgsToRegs (zipEqual "cgCase" ret_bndrs alts_regss) + ; cgAlts gc_plan alt_regs bndr alt_type alts } ----------------- maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) @@ -394,39 +398,40 @@ isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op) isSimpleOp (StgPrimCallOp _) = False ----------------- -chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id] +chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [Id] -- These are the binders of a case that are assigned -- by the evaluation of the scrutinee --- Only non-void ones come back chooseReturnBndrs bndr (PrimAlt _) _alts - = nonVoidIds [bndr] + = [bndr] chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)] - = nonVoidIds ids -- 'bndr' is not assigned! + = ids -- 'bndr' will be assigned by cgAlts chooseReturnBndrs bndr (AlgAlt _) _alts - = nonVoidIds [bndr] -- Only 'bndr' is assigned + = [bndr] -- Only 'bndr' is assigned chooseReturnBndrs bndr PolyAlt _alts - = nonVoidIds [bndr] -- Only 'bndr' is assigned + = [bndr] -- Only 'bndr' is assigned chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" -- UbxTupALt has only one alternative ------------------------------------- -cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode () +cgAlts :: GcPlan -> [LocalReg] -> Id -> AltType -> [StgAlt] -> FCode () -- At this point the result of the case are in the binders -cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] +cgAlts gc_plan _alt_regs _bndr PolyAlt [(_, _, _, rhs)] = maybeAltHeapCheck gc_plan (cgExpr rhs) -cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) - -- Here bndrs are *already* in scope, so don't rebind them +cgAlts gc_plan alt_regs bndr (UbxTupAlt _) [(_, _, _, rhs)] + = do { bindArgToReg bndr alt_regs + ; maybeAltHeapCheck gc_plan (cgExpr rhs) } + -- Here alt bndrs are *already* in scope, so don't rebind them, + -- but we do need to set up bndr to expand to the scrutinee result -cgAlts gc_plan bndr (PrimAlt _) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts +cgAlts gc_plan [alt_reg] _bndr (PrimAlt _) alts + = do { tagged_cmms <- cgAltRhss gc_plan alt_reg alts - ; let bndr_reg = CmmLocal (idToReg bndr) + ; let bndr_reg = CmmLocal alt_reg (DEFAULT,deflt) = head tagged_cmms -- PrimAlts always have a DEFAULT case -- and it always comes first @@ -435,11 +440,11 @@ cgAlts gc_plan bndr (PrimAlt _) alts | (LitAlt lit, code) <- tagged_cmms] ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) } -cgAlts gc_plan bndr (AlgAlt tycon) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts +cgAlts gc_plan [alt_reg] _bndr (AlgAlt tycon) alts + = do { tagged_cmms <- cgAltRhss gc_plan alt_reg alts ; let fam_sz = tyConFamilySize tycon - bndr_reg = CmmLocal (idToReg bndr) + bndr_reg = CmmLocal alt_reg mb_deflt = case tagged_cmms of ((DEFAULT,rhs) : _) -> Just rhs _other -> Nothing @@ -464,15 +469,14 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) } -cgAlts _ _ _ _ = panic "cgAlts" +cgAlts _ _ _ _ _ = panic "cgAlts" -- UbxTupAlt and PolyAlt have only one alternative ------------------- -cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] -cgAltRhss gc_plan bndr alts +cgAltRhss :: GcPlan -> LocalReg -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan base_reg alts = forkAlts (map cg_alt alts) where - base_reg = idToReg bndr cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) cg_alt (con, bndrs, _uses, rhs) = getCodeR $ @@ -492,7 +496,7 @@ maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code cgConApp :: DataCon -> [StgArg] -> FCode () cgConApp con stg_args | isUnboxedTupleCon con -- Unboxed tuple: assign and return - = do { arg_exprs <- getNonVoidArgAmodes stg_args + = do { arg_exprs <- concatMapM getArgAmodes stg_args ; tickyUnboxedTupleReturn (length arg_exprs) ; emitReturn arg_exprs } @@ -503,24 +507,32 @@ cgConApp con stg_args -- is "con", which is a bit of a fudge, but it only affects profiling ; emit init - ; emitReturn [idInfoToAmode idinfo] } + ; emitReturn (idInfoToAmodes idinfo) } cgIdApp :: Id -> [StgArg] -> FCode () -cgIdApp fun_id [] | isVoidId fun_id = emitReturn [] cgIdApp fun_id args = do { fun_info <- getCgIdInfo fun_id - ; case maybeLetNoEscape fun_info of - Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args - Nothing -> cgTailCall fun_id fun_info args } + ; case cg_elems fun_info of + -- If we mention an Id with a void representation, return nothing immediately + [] -> ASSERT( null args ) + emitReturn [] + -- Similarly for unboxed tuples, return the components immediately + elem_infos | isUnboxedTupleType (idType fun_id) -> ASSERT( null args ) + emitReturn (map idElemInfoToAmode elem_infos) + -- For standard function application, just try let-no-escape and then tailcall + [fun_info] -> case maybeLetNoEscape fun_info of + Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args + Nothing -> cgTailCall fun_id fun_info args + _ -> panic "cgIdApp" } cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () cgLneJump blk_id lne_regs args -- Join point; discard sequel - = do { cmm_args <- getNonVoidArgAmodes args + = do { cmm_args <- concatMapM getArgAmodes args ; emit (mkMultiAssign lne_regs cmm_args <*> mkBranch blk_id) } -cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () +cgTailCall :: Id -> CgIdElemInfo -> [StgArg] -> FCode () cgTailCall fun_id fun_info args = do dflags <- getDynFlags case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of @@ -555,10 +567,10 @@ cgTailCall fun_id fun_info args = do JumpToIt {} -> panic "cgTailCall" -- ??? where - fun_name = idName fun_id - fun = idInfoToAmode fun_info - lf_info = cgIdInfoLF fun_info - node_points = nodeMustPointToIt lf_info + fun_name = idName fun_id + fun = idElemInfoToAmode fun_info + lf_info = cgIdElemInfoLF fun_info + node_points = nodeMustPointToIt lf_info {- Note [case on Bool] diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index c41832a0ab..d64a2a7640 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -21,7 +21,6 @@ import StgCmmProf import StgCmmEnv import StgCmmMonad import StgCmmUtils -import StgCmmClosure import BlockId import Cmm @@ -35,9 +34,9 @@ import SMRep import ForeignCall import Constants import StaticFlags -import Maybes import Outputable import BasicTypes +import MonadUtils ( concatMapM ) import Control.Monad @@ -278,20 +277,14 @@ currentNursery = CmmGlobal CurrentNursery getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)] -- (a) Drop void args -- (b) Add foreign-call shim code --- It's (b) that makes this differ from getNonVoidArgAmodes +-- It's (b) that makes this differ from getArgsAmodes -getFCallArgs args - = do { mb_cmms <- mapM get args - ; return (catMaybes mb_cmms) } +getFCallArgs args = concatMapM get args where - get arg | isVoidRep arg_rep - = return Nothing - | otherwise - = do { cmm <- getArgAmode (NonVoid arg) - ; return (Just (add_shim arg_ty cmm, hint)) } + get arg = do { cmm <- getArgAmodes arg + ; return (map (add_shim arg_ty) cmm `zip` hint) } where arg_ty = stgArgType arg - arg_rep = typePrimRep arg_ty hint = typeForeignHint arg_ty add_shim :: Type -> CmmExpr -> CmmExpr diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 690b0a9622..68a1658ac1 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -15,13 +15,12 @@ module StgCmmHeap ( mkVirtHeapOffsets, mkVirtConstrOffsets, mkStaticClosureFields, mkStaticClosure, - allocDynClosure, allocDynClosureCmm, emitSetDynHdr + allocDynClosureCmm, emitSetDynHdr ) where #include "HsVersions.h" import CmmType -import StgSyn import CLabel import StgCmmLayout import StgCmmUtils @@ -30,7 +29,6 @@ import StgCmmProf import StgCmmTicky import StgCmmGran import StgCmmClosure -import StgCmmEnv import MkGraph @@ -49,24 +47,12 @@ import DynFlags -- Initialise dynamic heap objects ----------------------------------------------------------- -allocDynClosure - :: CmmInfoTable - -> LambdaFormInfo - -> CmmExpr -- Cost Centre to stick in the object - -> CmmExpr -- Cost Centre to blame for this alloc - -- (usually the same; sometimes "OVERHEAD") - - -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object - -- ie Info ptr has offset zero. - -- No void args in here - -> FCode (LocalReg, CmmAGraph) - allocDynClosureCmm :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> FCode (LocalReg, CmmAGraph) --- allocDynClosure allocates the thing in the heap, +-- allocDynClosureCmm allocates the thing in the heap, -- and modifies the virtual Hp to account for this. -- The second return value is the graph that sets the value of the -- returned LocalReg, which should point to the closure after executing @@ -74,7 +60,7 @@ allocDynClosureCmm -- Note [Return a LocalReg] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr. +-- allocDynClosureCmm returns a LocalReg, not a (Hp+8) CmmExpr. -- Reason: -- ...allocate object... -- obj = Hp + 8 @@ -83,13 +69,6 @@ allocDynClosureCmm -- but Hp+8 means something quite different... -allocDynClosure info_tbl lf_info use_cc _blame_cc args_w_offsets - = do { let (args, offsets) = unzip args_w_offsets - ; cmm_args <- mapM getArgAmode args -- No void args - ; allocDynClosureCmm info_tbl lf_info - use_cc _blame_cc (zip cmm_args offsets) - } - allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do { virt_hp <- getVirtHp @@ -322,17 +301,16 @@ These are used in the following circumstances entryHeapCheck :: ClosureInfo -> Int -- Arg Offset -> Maybe LocalReg -- Function (closure environment) - -> Int -- Arity -- not same as len args b/c of voids + -> Bool -- Heap check for a *thunk*? -> [LocalReg] -- Non-void args (empty for thunk) -> FCode () -> FCode () -entryHeapCheck cl_info offset nodeSet arity args code +entryHeapCheck cl_info offset nodeSet is_thunk args code = do dflags <- getDynFlags let platform = targetPlatform dflags - is_thunk = arity == 0 is_fastf = case closureFunInfo cl_info of Just (_, ArgGen _) -> False _otherwise -> True diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 9afcd029a4..aa7b65d298 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -49,13 +49,14 @@ import CLabel import StgSyn import Id import Name +import BasicTypes ( Arity ) import TyCon ( PrimRep(..) ) -import BasicTypes ( Arity ) import DynFlags import StaticFlags import Constants import Util +import Control.Monad import Data.List import Outputable import FastString ( mkFastString, FastString, fsLit ) @@ -133,76 +134,75 @@ directCall :: CLabel -> Arity -> [StgArg] -> FCode () -- calls f(arg1, ..., argn), and applies the result to the remaining args -- The function f has arity n, and there are guaranteed at least n args -- Both arity and args include void args +-- +-- NB: f is guaranteed to be a function, not a thunk directCall lbl arity stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) } + = do { cmm_args <- mapM addArgReps stg_args + ; direct_call "directCall" lbl arity cmm_args } slowCall :: CmmExpr -> [StgArg] -> FCode () -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; slow_call fun cmm_args (argsReps stg_args) } + = do { cmm_args <- mapM addArgReps stg_args + ; slow_call fun cmm_args } -------------- -direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode () --- NB1: (length args) may be less than (length reps), because --- the args exclude the void ones +direct_call :: String -> CLabel -> Arity -> [[(PrimRep, CmmExpr)]] -> FCode () -- NB2: 'arity' refers to the *reps* -direct_call caller lbl arity args reps - | debugIsOn && arity > length reps -- Too few args +direct_call caller lbl arity arg_reps + | debugIsOn && arity > length arg_reps -- Too few args = do -- Caller should ensure that there enough args! dflags <- getDynFlags let platform = targetPlatform dflags pprPanic "direct_call" (text caller <+> ppr arity - <+> pprPlatform platform lbl <+> ppr (length reps) - <+> pprPlatform platform args <+> ppr reps ) + <+> pprPlatform platform lbl <+> ppr (length arg_reps) + <+> pprPlatform platform (map (map snd) arg_reps) <+> ppr (map (map fst) arg_reps) ) - | null rest_reps -- Precisely the right number of arguments - = emitCall (NativeDirectCall, NativeReturn) target args + | null rest_arg_reps -- Precisely the right number of arguments + = emitCall (NativeDirectCall, NativeReturn) target (concatMap (map snd) arg_reps) | otherwise -- Over-saturated call - = ASSERT( arity == length initial_reps ) + = ASSERT( arity == length fast_arg_reps ) do { pap_id <- newTemp gcWord ; withSequel (AssignTo [pap_id] True) - (emitCall (NativeDirectCall, NativeReturn) target fast_args) + (emitCall (NativeDirectCall, NativeReturn) target (concatMap (map snd) fast_arg_reps)) ; slow_call (CmmReg (CmmLocal pap_id)) - rest_args rest_reps } + rest_arg_reps } where target = CmmLit (CmmLabel lbl) - (initial_reps, rest_reps) = splitAt arity reps - arg_arity = count isNonV initial_reps - (fast_args, rest_args) = splitAt arg_arity args + (fast_arg_reps, rest_arg_reps) = splitAt arity arg_reps -------------- -slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode () -slow_call fun args reps +slow_call :: CmmExpr -> [[(PrimRep, CmmExpr)]] -> FCode () +slow_call fun arg_reps = do dflags <- getDynFlags let platform = targetPlatform dflags - call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps + call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity arg_reps emit $ mkComment $ mkFastString ("slow_call for " ++ showSDoc (pprPlatform platform fun) ++ " with pat " ++ showSDoc (ftext rts_fun)) emit (mkAssign nodeReg fun <*> call) where - (rts_fun, arity) = slowCallPattern reps + (rts_fun, arity) = slowCallPattern (map (map (toArgRep . fst)) arg_reps) -- These cases were found to cover about 99% of all slow calls: -slowCallPattern :: [ArgRep] -> (FastString, Arity) +slowCallPattern :: [[ArgRep]] -> (FastString, Arity) -- Returns the generic apply function and arity -slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6) -slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5) -slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4) -slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4) -slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3) -slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3) -slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2) -slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2) -slowCallPattern (P: _) = (fsLit "stg_ap_p", 1) -slowCallPattern (V: _) = (fsLit "stg_ap_v", 1) -slowCallPattern (N: _) = (fsLit "stg_ap_n", 1) -slowCallPattern (F: _) = (fsLit "stg_ap_f", 1) -slowCallPattern (D: _) = (fsLit "stg_ap_d", 1) -slowCallPattern (L: _) = (fsLit "stg_ap_l", 1) -slowCallPattern [] = (fsLit "stg_ap_0", 0) +slowCallPattern ([P]: [P]: [P]: [P]: [P]: [P]: _) = (fsLit "stg_ap_pppppp", 6) +slowCallPattern ([P]: [P]: [P]: [P]: [P]: _) = (fsLit "stg_ap_ppppp", 5) +slowCallPattern ([P]: [P]: [P]: [P]: _) = (fsLit "stg_ap_pppp", 4) +slowCallPattern ([P]: [P]: [P]: []: _) = (fsLit "stg_ap_pppv", 4) +slowCallPattern ([P]: [P]: [P]: _) = (fsLit "stg_ap_ppp", 3) +slowCallPattern ([P]: [P]: []: _) = (fsLit "stg_ap_ppv", 3) +slowCallPattern ([P]: [P]: _) = (fsLit "stg_ap_pp", 2) +slowCallPattern ([P]: []: _) = (fsLit "stg_ap_pv", 2) +slowCallPattern ([P]: _) = (fsLit "stg_ap_p", 1) +slowCallPattern ([N]: _) = (fsLit "stg_ap_n", 1) +slowCallPattern ([F]: _) = (fsLit "stg_ap_f", 1) +slowCallPattern ([D]: _) = (fsLit "stg_ap_d", 1) +slowCallPattern ([L]: _) = (fsLit "stg_ap_l", 1) +slowCallPattern ([]: _) = (fsLit "stg_ap_v", 1) +slowCallPattern (rs: _) = (error "FIXME" rs, 1) +slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- @@ -215,19 +215,16 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) data ArgRep = P -- GC Ptr | N -- One-word non-ptr | L -- Two-word non-ptr (long) - | V -- Void | F -- Float | D -- Double instance Outputable ArgRep where ppr P = text "P" ppr N = text "N" ppr L = text "L" - ppr V = text "V" ppr F = text "F" ppr D = text "D" toArgRep :: PrimRep -> ArgRep -toArgRep VoidRep = V toArgRep PtrRep = P toArgRep IntRep = N toArgRep WordRep = N @@ -237,23 +234,15 @@ toArgRep Word64Rep = L toArgRep FloatRep = F toArgRep DoubleRep = D -isNonV :: ArgRep -> Bool -isNonV V = False -isNonV _ = True - -argsReps :: [StgArg] -> [ArgRep] -argsReps = map (toArgRep . argPrimRep) - argRepSizeW :: ArgRep -> WordOff -- Size in words argRepSizeW N = 1 argRepSizeW P = 1 argRepSizeW F = 1 argRepSizeW L = wORD64_SIZE `quot` wORD_SIZE argRepSizeW D = dOUBLE_SIZE `quot` wORD_SIZE -argRepSizeW V = 0 -idArgRep :: Id -> ArgRep -idArgRep = toArgRep . idPrimRep +idArgRep :: Id -> [ArgRep] +idArgRep = map toArgRep . idPrimRep ------------------------------------------------------------------------- ---- Laying out objects on the heap and stack @@ -275,7 +264,7 @@ mkVirtHeapOffsets -> [(PrimRep,a)] -- Things to make offsets for -> (WordOff, -- _Total_ number of words allocated WordOff, -- Number of words allocated for *pointers* - [(NonVoid a, VirtualHpOffset)]) + [(a, VirtualHpOffset)]) -- Things with their offsets from start of object in order of -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER @@ -288,8 +277,7 @@ mkVirtHeapOffsets -- than the unboxed things mkVirtHeapOffsets is_thunk things - = let non_void_things = filterOut (isVoidRep . fst) things - (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things + = let (ptrs, non_ptrs) = partition (isGcPtrRep . fst) things (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs in @@ -300,9 +288,9 @@ mkVirtHeapOffsets is_thunk things computeOffset wds_so_far (rep, thing) = (wds_so_far + argRepSizeW (toArgRep rep), - (NonVoid thing, hdr_size + wds_so_far)) + (thing, hdr_size + wds_so_far)) -mkVirtConstrOffsets :: [(PrimRep,a)] -> (WordOff, WordOff, [(NonVoid a, VirtualHpOffset)]) +mkVirtConstrOffsets :: [(PrimRep,a)] -> (WordOff, WordOff, [(a, VirtualHpOffset)]) -- Just like mkVirtHeapOffsets, but for constructors mkVirtConstrOffsets = mkVirtHeapOffsets False @@ -329,7 +317,7 @@ mkArgDescr _nm args Nothing -> return (ArgGen arg_bits) where arg_bits = argBits arg_reps - arg_reps = filter isNonV (map idArgRep args) + arg_reps = concatMap idArgRep args -- Getting rid of voids eases matching of standard patterns argBits :: [ArgRep] -> [Bool] -- True for non-ptr, False for ptr @@ -384,19 +372,20 @@ emitClosureProcAndInfoTable :: Bool -- top-level? -> Id -- name of the closure -> LambdaFormInfo -> CmmInfoTable - -> [NonVoid Id] -- incoming arguments + -> [Id] -- incoming arguments -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body -> FCode () emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body - = do { + = do { [node] <- idToReg bndr -- Bind the binder itself, but only if it's not a top-level -- binding. We need non-top let-bindings to refer to the -- top-level binding, which this binding would incorrectly shadow. - ; node <- if top_lvl then return $ idToReg (NonVoid bndr) - else bindToReg (NonVoid bndr) lf_info + ; unless top_lvl $ bindToReg bndr [(node, lf_info)] ; let node_points = nodeMustPointToIt lf_info - ; arg_regs <- bindArgsToRegs args - ; let args' = if node_points then (node : arg_regs) else arg_regs + ; args_regs <- mapM idToReg args + ; bindArgsToRegs (args `zip` args_regs) + ; let arg_regs = concat args_regs + args' = if node_points then (node : arg_regs) else arg_regs conv = if nodeMustPointToIt lf_info then NativeNodeCall else NativeDirectCall (offset, _) = mkCallEntry conv args' diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 71457c530c..80200f16bf 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -47,7 +47,7 @@ module StgCmmMonad ( getState, setState, getInfoDown, getDynFlags, getThisPackage, -- more localised access to monad state - CgIdInfo(..), CgLoc(..), + CgIdInfo(..), CgIdElemInfo(..), CgLoc(..), getBinds, setBinds, getStaticBinds, -- out of general friendliness, we also export ... @@ -178,11 +178,25 @@ data CgInfoDownwards -- information only passed *downwards* by the monad type CgBindings = IdEnv CgIdInfo data CgIdInfo - = CgIdInfo - { cg_id :: Id -- Id that this is the info for - -- Can differ from the Id at occurrence sites by - -- virtue of being externalised, for splittable C - , cg_lf :: LambdaFormInfo + = CgIdInfo + { cg_id :: Id -- Id that this is the info for + -- Can differ from the Id at occurrence sites by + -- virtue of being externalised, for splittable C + -- + -- This is only really meaningful for cases where the + -- IdInfo is a singleton, because only top-level names + -- get externalised and all top-level names are lifted. + -- However, we keep it around even in the other cases + -- as it is useful for debugging purposes. + , cg_elems :: [CgIdElemInfo] -- Info for each of the things the Id expands to during + -- code generation. Most Ids expand to a single thing, + -- but ones of void representation expand to nothing + -- and unboxed tuples expand to an arbitrary number. + } + +data CgIdElemInfo + = CgIdElemInfo + { cg_lf :: LambdaFormInfo , cg_loc :: CgLoc -- CmmExpr for the *tagged* value , cg_tag :: {-# UNPACK #-} !DynTag -- Cache for (lfDynTag cg_lf) } @@ -198,8 +212,12 @@ data CgLoc -- and branch to the block id instance PlatformOutputable CgIdInfo where - pprPlatform platform (CgIdInfo { cg_id = id, cg_loc = loc }) - = ppr id <+> ptext (sLit "-->") <+> pprPlatform platform loc + pprPlatform platform (CgIdInfo { cg_id = id, cg_elems = elems }) + = ppr id <+> ptext (sLit "-->") <+> hsep (map (pprPlatform platform) elems) + +instance PlatformOutputable CgIdElemInfo where + pprPlatform platform (CgIdElemInfo { cg_loc = loc }) + = pprPlatform platform loc instance PlatformOutputable CgLoc where pprPlatform platform (CmmLoc e) = ptext (sLit "cmm") <+> pprPlatform platform e diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 6518c5b5b0..b81479caa8 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -36,6 +36,7 @@ import Type ( Type, tyConAppTyCon ) import TyCon import CLabel import CmmUtils +import MonadUtils import PrimOp import SMRep import Constants @@ -61,7 +62,7 @@ might be a Haskell closure pointer, we don't want to evaluate it. -} ---------------------------------- cgOpApp :: StgOp -- The op -> [StgArg] -- Arguments - -> Type -- Result type (always an unboxed tuple) + -> Type -- Result type -> FCode () -- Foreign calls @@ -79,7 +80,7 @@ cgOpApp (StgFCallOp fcall _) stg_args res_ty cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty = ASSERT(isEnumerationTyCon tycon) - do { args' <- getNonVoidArgAmodes [arg] + do { args' <- getArgAmodes arg ; let amode = case args' of [amode] -> amode _ -> panic "TagToEnumOp had void arg" ; emitReturn [tagToClosure tycon amode] } @@ -91,25 +92,16 @@ cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty -- That won't work. tycon = tyConAppTyCon res_ty -cgOpApp (StgPrimOp primop) args res_ty +cgOpApp (StgPrimOp primop) args _res_ty | primOpOutOfLine primop - = do { cmm_args <- getNonVoidArgAmodes args + = do { cmm_args <- concatMapM getArgAmodes args ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop)) ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args } - | ReturnsPrim VoidRep <- result_info - = do cgPrimOp [] primop args - emitReturn [] - | ReturnsPrim rep <- result_info - = do res <- newTemp (primRepCmmType rep) - cgPrimOp [res] primop args - emitReturn [CmmReg (CmmLocal res)] - - | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon - = do (regs, _hints) <- newUnboxedTupleRegs res_ty - cgPrimOp regs primop args - emitReturn (map (CmmReg . CmmLocal) regs) + = do (res, _hints) <- newSequelRegs rep + cgPrimOp res primop args + emitReturn (map (CmmReg . CmmLocal) res) | ReturnsAlg tycon <- result_info , isEnumerationTyCon tycon @@ -124,7 +116,7 @@ cgOpApp (StgPrimOp primop) args res_ty result_info = getPrimOpResultInfo primop cgOpApp (StgPrimCallOp primcall) args _res_ty - = do { cmm_args <- getNonVoidArgAmodes args + = do { cmm_args <- concatMapM getArgAmodes args ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall)) ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args } @@ -135,7 +127,7 @@ cgPrimOp :: [LocalReg] -- where to put the results -> FCode () cgPrimOp results op args - = do arg_exprs <- getNonVoidArgAmodes args + = do arg_exprs <- concatMapM getArgAmodes args emitPrimOp results op arg_exprs diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index a6c592cfd8..0aa949b7a8 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -36,7 +36,7 @@ module StgCmmTicky ( tickyUpdateBhCaf, tickyBlackHole, - tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyUnboxedTupleReturn, tickyReturnOldCon, tickyReturnNewCon, tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, @@ -67,6 +67,7 @@ import BasicTypes import FastString import Constants import Outputable +import Maybes import DynFlags @@ -76,8 +77,6 @@ import TcType import Type import TyCon -import Data.Maybe - ----------------------------------------------------------------------------- -- -- Ticky-ticky profiling @@ -205,16 +204,11 @@ tickyReturnNewCon arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr") ; bumpHistogram (fsLit "RET_NEW_hst") arity } -tickyUnboxedTupleReturn :: Int -> FCode () +tickyUnboxedTupleReturn :: Arity -> FCode () tickyUnboxedTupleReturn arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr") ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity } -tickyVectoredReturn :: Int -> FCode () -tickyVectoredReturn family_size - = ifTicky $ do { bumpTickyCounter (fsLit "VEC_RETURN_ctr") - ; bumpHistogram (fsLit "RET_VEC_RETURN_hst") family_size } - -- ----------------------------------------------------------------------------- -- Ticky calls @@ -223,7 +217,7 @@ tickyDirectCall :: Arity -> [StgArg] -> FCode () tickyDirectCall arity args | arity == length args = tickyKnownCallExact | otherwise = do tickyKnownCallExtraArgs - tickySlowCallPat (map argPrimRep (drop arity args)) + tickySlowCallPat (concatMap argPrimRep (drop arity args)) tickyKnownCallTooFewArgs :: FCode () tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_TOO_FEW_ARGS_ctr") @@ -246,7 +240,7 @@ tickySlowCall lf_info args = do { if (isKnownFun lf_info) then tickyKnownCallTooFewArgs else tickyUnknownCall - ; tickySlowCallPat (map argPrimRep args) } + ; tickySlowCallPat (concatMap argPrimRep args) } tickySlowCallPat :: [PrimRep] -> FCode () tickySlowCallPat _args = return () diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index c3327138b3..9a2e82daf5 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -20,7 +20,7 @@ module StgCmmUtils ( emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, emitRtsCallGen, assignTemp, newTemp, withTemp, - newUnboxedTupleRegs, + newUnboxedTupleRegs, newSequelRegs, mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch, emitSwitch, @@ -447,25 +447,28 @@ newTemp rep = do { uniq <- newUnique ; return (LocalReg uniq rep) } newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint]) --- Choose suitable local regs to use for the components --- of an unboxed tuple that we are about to return to --- the Sequel. If the Sequel is a join point, using the --- regs it wants will save later assignments. newUnboxedTupleRegs res_ty = ASSERT( isUnboxedTupleType res_ty ) - do { sequel <- getSequel - ; regs <- choose_regs sequel - ; ASSERT( regs `equalLength` reps ) - return (regs, map primRepForeignHint reps) } + newSequelRegs reps where ty_args = tyConAppArgs (repType res_ty) reps = [ rep | ty <- ty_args - , let rep = typePrimRep ty - , not (isVoidRep rep) ] - choose_regs (AssignTo regs _) = return regs - choose_regs _other = mapM (newTemp . primRepCmmType) reps + , rep <- typePrimRep ty ] +newSequelRegs :: [PrimRep] -> FCode ([LocalReg], [ForeignHint]) +-- Choose suitable local regs to use for the components +-- of e.g. an unboxed tuple that we are about to return to +-- the Sequel. If the Sequel is a join point, using the +-- regs it wants will save later assignments. +newSequelRegs reps + = do { sequel <- getSequel + ; regs <- choose_regs sequel + ; ASSERT( regs `equalLength` reps ) + return (regs, map primRepForeignHint reps) } + where + choose_regs (AssignTo regs _) = return regs + choose_regs _other = mapM (newTemp . primRepCmmType) reps ------------------------------------------------------------------------- |