diff options
author | Simon Marlow <simonmar@microsoft.com> | 2007-07-27 10:41:57 +0000 |
---|---|---|
committer | Simon Marlow <simonmar@microsoft.com> | 2007-07-27 10:41:57 +0000 |
commit | 6015a94f9108a502150565577b66c23650796639 (patch) | |
tree | 20d499d1a9644c2c98374d99f511a4a1c2cb7d1d /compiler | |
parent | 04d444716b2e5415fb8f13771e49f1192ef8c8f8 (diff) | |
download | haskell-6015a94f9108a502150565577b66c23650796639.tar.gz |
Pointer Tagging
This patch implements pointer tagging as per our ICFP'07 paper "Faster
laziness using dynamic pointer tagging". It improves performance by
10-15% for most workloads, including GHC itself.
The original patches were by Alexey Rodriguez Yakushev
<mrchebas@gmail.com>, with additions and improvements by me. I've
re-recorded the development as a single patch.
The basic idea is this: we use the low 2 bits of a pointer to a heap
object (3 bits on a 64-bit architecture) to encode some information
about the object pointed to. For a constructor, we encode the "tag"
of the constructor (e.g. True vs. False), for a function closure its
arity. This enables some decisions to be made without dereferencing
the pointer, which speeds up some common operations. In particular it
enables us to avoid costly indirect jumps in many cases.
More information in the commentary:
http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/HaskellExecution/PointerTagging
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/cmm/CmmLint.hs | 11 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/CgBindery.lhs | 68 | ||||
-rw-r--r-- | compiler/codeGen/CgClosure.lhs | 24 | ||||
-rw-r--r-- | compiler/codeGen/CgCon.lhs | 27 | ||||
-rw-r--r-- | compiler/codeGen/CgHeapery.lhs | 3 | ||||
-rw-r--r-- | compiler/codeGen/CgInfoTbls.hs | 25 | ||||
-rw-r--r-- | compiler/codeGen/CgPrimOp.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/CgProf.hs | 15 | ||||
-rw-r--r-- | compiler/codeGen/CgTailCall.lhs | 70 | ||||
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 62 | ||||
-rw-r--r-- | compiler/codeGen/ClosureInfo.lhs | 38 | ||||
-rw-r--r-- | compiler/main/Constants.lhs | 10 | ||||
-rw-r--r-- | compiler/nativeGen/MachCodeGen.hs | 12 |
14 files changed, 326 insertions, 45 deletions
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 130dba05f9..d8d6c9bb46 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -88,7 +88,8 @@ cmmCheckMachOp op args = return (resultRepOfMachOp op) isWordOffsetReg (CmmGlobal Sp) = True -isWordOffsetReg (CmmGlobal Hp) = True +-- No warnings for unaligned arithmetic, which is used to tag dynamic constructor closures. +--isWordOffsetReg (CmmGlobal Hp) = True isWordOffsetReg _ = False isOffsetOp (MO_Add _) = True @@ -98,14 +99,18 @@ isOffsetOp _ = False -- This expression should be an address from which a word can be loaded: -- check for funny-looking sub-word offsets. cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) - | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset e cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) - | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset e cmmCheckWordAddress _ = return () +-- No warnings for unaligned arithmetic with the node register, +-- which is used to extract fields from tagged constructor closures. +notNodeReg (CmmReg reg) | reg == nodeReg = False +notNodeReg _ = True lintCmmStmt :: CmmStmt -> CmmLint () lintCmmStmt stmt@(CmmAssign reg expr) = do diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 77d337df93..6032dc255c 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -322,8 +322,9 @@ pprExpr e = case e of -> char '*' <> pprAsPtrReg r CmmLoad (CmmRegOff r off) rep - | isPtrReg r && rep == wordRep + | isPtrReg r && rep == wordRep && (off `rem` wORD_SIZE == 0) -- ToDo: check that the offset is a word multiple? + -- (For tagging to work, I had to avoid unaligned loads. --ARY) -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift)) CmmLoad expr rep -> diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs index d5a2c69d60..7447222d45 100644 --- a/compiler/codeGen/CgBindery.lhs +++ b/compiler/codeGen/CgBindery.lhs @@ -11,7 +11,8 @@ module CgBindery ( cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF, - stableIdInfo, heapIdInfo, + stableIdInfo, heapIdInfo, + taggedStableIdInfo, taggedHeapIdInfo, letNoEscapeIdInfo, idInfoToAmode, addBindC, addBindsC, @@ -22,7 +23,7 @@ module CgBindery ( getLiveStackBindings, bindArgsToStack, rebindToStack, - bindNewToNode, bindNewToReg, bindArgsToRegs, + bindNewToNode, bindNewToUntagNode, bindNewToReg, bindArgsToRegs, bindNewToTemp, getArgAmode, getArgAmodes, getCgIdInfo, @@ -38,11 +39,13 @@ import CgStackery import CgUtils import CLabel import ClosureInfo +import Constants import Cmm import PprCmm ( {- instance Outputable -} ) import SMRep import Id +import DataCon import VarEnv import VarSet import Literal @@ -52,6 +55,7 @@ import StgSyn import Unique import UniqSet import Outputable + \end{code} @@ -80,23 +84,44 @@ data CgIdInfo , cg_rep :: CgRep , cg_vol :: VolatileLoc , cg_stb :: StableLoc - , cg_lf :: LambdaFormInfo } + , cg_lf :: LambdaFormInfo + , cg_tag :: {-# UNPACK #-} !Int -- tag to be added in idInfoToAmode + } mkCgIdInfo id vol stb lf = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, - cg_lf = lf, cg_rep = idCgRep id } + cg_lf = lf, cg_rep = idCgRep id, cg_tag = tag } + where + tag + | Just con <- isDataConWorkId_maybe id, + {- Is this an identifier for a static constructor closure? -} + isNullaryRepDataCon con + {- If yes, is this a nullary constructor? + If yes, we assume that the constructor is evaluated and can + be tagged. + -} + = tagForCon con + + | otherwise + = funTagLFInfo lf voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc , cg_stb = VoidLoc, cg_lf = mkLFArgument id - , cg_rep = VoidArg } + , cg_rep = VoidArg, cg_tag = 0 } -- Used just for VoidRep things data VolatileLoc -- These locations die across a call = NoVolatileLoc | RegLoc CmmReg -- In one of the registers (global or local) | VirHpLoc VirtualHpOffset -- Hp+offset (address of closure) - | VirNodeLoc VirtualHpOffset -- Cts of offset indirect from Node - -- ie *(Node+offset) + | VirNodeLoc ByteOff -- Cts of offset indirect from Node + -- ie *(Node+offset). + -- NB. Byte offset, because we subtract R1's + -- tag from the offset. + +mkTaggedCgIdInfo id vol stb lf con + = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, + cg_lf = lf, cg_rep = idCgRep id, cg_tag = tagForCon con } \end{code} @StableLoc@ encodes where an Id can be found, used by @@ -121,7 +146,7 @@ data StableLoc \begin{code} instance Outputable CgIdInfo where - ppr (CgIdInfo id rep vol stb lf) + ppr (CgIdInfo id rep vol stb lf _) -- TODO, pretty pring the tag info = ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb] instance Outputable VolatileLoc where @@ -149,19 +174,29 @@ stableIdInfo id amode lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode) heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info -nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info +nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset)) NoStableLoc lf_info regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info +taggedStableIdInfo id amode lf_info con + = mkTaggedCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info con +taggedHeapIdInfo id offset lf_info con + = mkTaggedCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info con +untagNodeIdInfo id offset lf_info tag + = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset - tag)) NoStableLoc lf_info + + idInfoToAmode :: CgIdInfo -> FCode CmmExpr idInfoToAmode info = case cg_vol info of { RegLoc reg -> returnFC (CmmReg reg) ; - VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ; - VirHpLoc hp_off -> getHpRelOffset hp_off ; + VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetB (CmmReg nodeReg) nd_off) + mach_rep) ; + VirHpLoc hp_off -> do { off <- getHpRelOffset hp_off + ; return $! maybeTag off }; NoVolatileLoc -> case cg_stb info of - StableLoc amode -> returnFC amode + StableLoc amode -> returnFC $! maybeTag amode VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off ; return (CmmLoad sp_rel mach_rep) } @@ -177,6 +212,11 @@ idInfoToAmode info where mach_rep = argMachRep (cg_rep info) + maybeTag amode -- add the tag, if we have one + | tag == 0 = amode + | otherwise = cmmOffsetB amode tag + where tag = cg_tag info + cgIdInfoId :: CgIdInfo -> Id cgIdInfoId = cg_id @@ -389,6 +429,10 @@ bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code bindNewToNode id offset lf_info = addBindC id (nodeIdInfo id offset lf_info) +bindNewToUntagNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Int -> Code +bindNewToUntagNode id offset lf_info tag + = addBindC id (untagNodeIdInfo id offset lf_info tag) + -- Create a new temporary whose unique is that in the id, -- bind the id to it, and return the addressing mode for the -- temporary. diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index fabf434d07..86e13ab383 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -177,7 +177,14 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do -- BUILD ITS INFO TABLE AND CODE ; forkClosureBody (do { -- Bind the fvs - let bind_fv (info, offset) + let + -- A function closure pointer may be tagged, so we + -- must take it into account when accessing the free variables. + mbtag = tagForArity (length args) + bind_fv (info, offset) + | Just tag <- mbtag + = bindNewToUntagNode (cgIdInfoId info) offset (cgIdInfoLF info) tag + | otherwise = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info) ; mapCs bind_fv bind_details @@ -236,7 +243,7 @@ NB: Thunks cannot have a primitive type! closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do { body_absC <- getCgStmts $ do { tickyEnterThunk cl_info - ; ldvEnter (CmmReg nodeReg) -- NB: Node always points when profiling + ; ldvEnterClosure cl_info -- NB: Node always points when profiling ; thunkWrapper cl_info $ do -- We only enter cc after setting up update so -- that cc of enclosing scope will be recorded @@ -400,8 +407,19 @@ funWrapper :: ClosureInfo -- Closure whose code body this is funWrapper closure_info arg_regs reg_save_code fun_body = do { let node_points = nodeMustPointToIt (closureLFInfo closure_info) + {- + -- Debugging: check that R1 has the correct tag + ; let tag = funTag closure_info + ; whenC (tag /= 0 && node_points) $ do + l <- newLabelC + stmtC (CmmCondBranch (CmmMachOp mo_wordEq [cmmGetTag (CmmReg nodeReg), + CmmLit (mkIntCLit tag)]) l) + stmtC (CmmStore (CmmLit (mkWordCLit 0)) (CmmLit (mkWordCLit 0))) + labelC l + -} + -- Enter for Ldv profiling - ; whenC node_points (ldvEnter (CmmReg nodeReg)) + ; whenC node_points (ldvEnterClosure closure_info) -- GranSim yeild poin ; granYield arg_regs node_points diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index a2c8578d18..91d7098f3e 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -43,8 +43,10 @@ import Id import Type import PrelInfo import Outputable -import Util import ListSetOps +#ifdef DEBUG +import Util ( lengthIs ) +#endif \end{code} @@ -93,7 +95,7 @@ cgTopRhsCon id con args ; emitDataLits closure_label closure_rep -- RETURN - ; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) } + ; returnFC (id, taggedStableIdInfo id (mkLblExpr closure_label) lf_info con) } \end{code} %************************************************************************ @@ -134,9 +136,10 @@ at all. \begin{code} buildDynCon binder cc con [] = do this_pkg <- getThisPackage - returnFC (stableIdInfo binder + returnFC (taggedStableIdInfo binder (mkLblExpr (mkClosureLabel this_pkg (dataConName con))) - (mkConLFInfo con)) + (mkConLFInfo con) + con) \end{code} The following three paragraphs about @Char@-like and @Int@-like @@ -170,7 +173,7 @@ buildDynCon binder cc con [arg_amode] offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) -- INTLIKE closures consist of a header and one word payload intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) - ; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) } + ; returnFC (taggedStableIdInfo binder intlike_amode (mkConLFInfo con) con) } buildDynCon binder cc con [arg_amode] | maybeCharLikeCon con @@ -181,7 +184,7 @@ buildDynCon binder cc con [arg_amode] offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) - ; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) } + ; returnFC (taggedStableIdInfo binder charlike_amode (mkConLFInfo con) con) } \end{code} Now the general case. @@ -194,7 +197,7 @@ buildDynCon binder ccs con args (closure_info, amodes_w_offsets) = layOutDynConstr this_pkg con args ; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets - ; returnFC (heapIdInfo binder hp_off lf_info) } + ; returnFC (taggedHeapIdInfo binder hp_off lf_info con) } where lf_info = mkConLFInfo con @@ -223,7 +226,9 @@ bindConArgs :: DataCon -> [Id] -> Code bindConArgs con args = do this_pkg <- getThisPackage let - bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg) + -- The binding below forces the masking out of the tag bits + -- when accessing the constructor field. + bind_arg (arg, offset) = bindNewToUntagNode arg offset (mkLFArgument arg) (tagForCon con) (_, args_w_offsets) = layOutDynConstr this_pkg con (addIdReps args) -- ASSERT(not (isUnboxedTupleCon con)) return () @@ -386,11 +391,12 @@ cgTyCon tycon -- Put the table after the data constructor decls, because the -- datatype closure table (for enumeration types) -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + -- Note that the closure pointers are tagged. ; extra <- if isEnumerationTyCon tycon then do tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel (tyConName tycon)) - [ CmmLabel (mkLocalClosureLabel (dataConName con)) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con)) (tagForCon con) | con <- tyConDataCons tycon]) return [tbl] else @@ -434,6 +440,9 @@ cgDataCon data_con body_code = do { -- NB: We don't set CC when entering data (WDP 94/06) tickyReturnOldCon (length arg_things) + -- The case continuation code is expecting a tagged pointer + ; stmtC (CmmAssign nodeReg + (tagCons data_con (CmmReg nodeReg))) ; performReturn emitReturnInstr } -- noStmts: Ptr to thing already in Node diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index 3bba211aa1..b89452e1de 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -288,6 +288,9 @@ hpStkCheck cl_info is_fun reg_save_code code = noStmts | otherwise = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl))) + -- Strictly speaking, we should tag node here. But if + -- node doesn't point to the closure, the code for the closure + -- cannot depend on the value of R1 anyway, so we're safe. closure_lbl = closureLabelFromCI cl_info full_save_code = node_asst `plusStmts` reg_save_code diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 4e38485455..e9751fa748 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -15,6 +15,7 @@ module CgInfoTbls ( stdInfoTableSizeB, entryCode, closureInfoPtr, getConstrTag, + cmmGetClosureType, infoTable, infoTableClosureType, infoTablePtrs, infoTableNonPtrs, funInfoTable, makeRelativeRefTo @@ -273,14 +274,24 @@ emitAlgReturnTarget emitAlgReturnTarget name branches mb_deflt fam_sz = do { blks <- getCgStmts $ - emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) - -- NB: tag_expr is zero-based + -- is the constructor tag in the node reg? + if isSmallFamily fam_sz + then do -- yes, node has constr. tag + let tag_expr = cmmConstrTag1 (CmmReg nodeReg) + branches' = [(tag+1,branch)|(tag,branch)<-branches] + emitSwitch tag_expr branches' mb_deflt 1 fam_sz + else do -- no, get tag from info table + let -- Note that ptr _always_ has tag 1 + -- when the family size is big enough + untagged_ptr = cmmRegOffB nodeReg (-1) + tag_expr = getConstrTag (untagged_ptr) + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) ; lbl <- emitReturnTarget name blks ; return (lbl, Nothing) } -- Nothing: the internal branches in the switch don't have -- global labels, so we can't use them at the 'call site' where - tag_expr = getConstrTag (CmmReg nodeReg) + uniq = getUnique name -------------------------------- emitReturnInstr :: Code @@ -346,6 +357,14 @@ getConstrTag closure_ptr where info_table = infoTable (closureInfoPtr closure_ptr) +cmmGetClosureType :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the closure type +-- obtained from the info table +cmmGetClosureType closure_ptr + = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableClosureType info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + infoTable :: CmmExpr -> CmmExpr -- Takes an info pointer (the first word of a closure) -- and returns a pointer to the first word of the standard-form diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index d26d9c6901..e489d73646 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -183,8 +183,9 @@ emitPrimOp [res] AddrToHValueOp [arg] live = stmtC (CmmAssign (CmmLocal res) arg) -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +-- Note: argument may be tagged! emitPrimOp [res] DataToTagOp [arg] live - = stmtC (CmmAssign (CmmLocal res) (getConstrTag arg)) + = stmtC (CmmAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) {- Freezing arrays-of-ptrs requires changing an info table, for the benefit of the generational collector. It needs to scavenge mutable diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index 27ee54c50d..651f0eaa82 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -20,7 +20,7 @@ module CgProf ( emitSetCCC, emitCCS, -- Lag/drag/void stuff - ldvEnter, ldvRecordCreate + ldvEnter, ldvEnterClosure, ldvRecordCreate ) where #include "HsVersions.h" @@ -242,9 +242,12 @@ enter_cost_centre closure_info ccs body where enc_ccs = CmmLit (mkCCostCentreStack ccs) re_entrant = closureReEntrant closure_info - node_ccs = costCentreFrom (CmmReg nodeReg) + node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) is_box = isBox body + -- if this is a function, then node will be tagged; we must subract the tag + node_tag = funTag closure_info + -- set the current CCS when entering a PAP enterCostCentrePAP :: CmmExpr -> Code enterCostCentrePAP closure = @@ -448,9 +451,14 @@ ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit -- The closure is not IND or IND_OLDGEN because neither is considered for LDV -- profiling. -- +ldvEnterClosure :: ClosureInfo -> Code +ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag)) + where tag = funTag closure_info + -- don't forget to substract node's tag + ldvEnter :: CmmExpr -> Code -- Argument is a closure pointer -ldvEnter cl_ptr +ldvEnter cl_ptr = ifProfiling $ -- if (era > 0) { -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | @@ -458,6 +466,7 @@ ldvEnter cl_ptr emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) (stmtC (CmmStore ldv_wd new_ldv_wd)) where + -- don't forget to substract node's tag ldv_wd = ldvWord cl_ptr new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) (CmmLit (mkWordCLit lDV_CREATE_MASK))) diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index 22cecb7249..952702674f 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -27,6 +27,7 @@ import CgUtils import CgTicky import ClosureInfo import SMRep +import MachOp import Cmm import CmmUtils import CLabel @@ -102,7 +103,8 @@ performTailCall fun_info arg_amodes pending_assts | otherwise = do { fun_amode <- idInfoToAmode fun_info - ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode) + ; let assignSt = CmmAssign nodeReg fun_amode + node_asst = oneStmt assignSt opt_node_asst | nodeMustPointToIt lf_info = node_asst | otherwise = noStmts ; EndOfBlockInfo sp _ <- getEndOfBlockInfo @@ -113,8 +115,15 @@ performTailCall fun_info arg_amodes pending_assts -- Node must always point to things we enter EnterIt -> do { emitSimultaneously (node_asst `plusStmts` pending_assts) - ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) - ; doFinalJump sp False (stmtC (CmmJump target [])) } + ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) + enterClosure = stmtC (CmmJump target []) + -- If this is a scrutinee + -- let's check if the closure is a constructor + -- so we can directly jump to the alternatives switch + -- statement. + jumpInstr = getEndOfBlockInfo >>= + maybeSwitchOnCons enterClosure + ; doFinalJump sp False jumpInstr } -- A function, but we have zero arguments. It is already in WHNF, -- so we can just return it. @@ -149,6 +158,7 @@ performTailCall fun_info arg_amodes pending_assts ; directCall sp apply_lbl args extra_args (node_asst `plusStmts` pending_assts) + } -- A direct function call (possibly with some left-over arguments) @@ -169,8 +179,58 @@ performTailCall fun_info arg_amodes pending_assts where fun_name = idName (cgIdInfoId fun_info) lf_info = cgIdInfoLF fun_info - - + untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) + -- Test if closure is a constructor + maybeSwitchOnCons enterClosure eob + | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob + = do { is_constr <- newLabelC + -- Is the pointer tagged? + -- Yes, jump to switch statement + ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg)) + is_constr) + -- No, enter the closure. + ; enterClosure + ; labelC is_constr + ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) []) + } +{- + -- This is a scrutinee for a case expression + -- so let's see if we can directly inspect the closure + | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob + = do { no_cons <- newLabelC + -- Both the NCG and gcc optimize away the temp + ; z <- newTemp wordRep + ; stmtC (CmmAssign z tag_expr) + ; let tag = CmmReg z + -- Is the closure a cons? + ; stmtC (CmmCondBranch (cond1 tag) no_cons) + ; stmtC (CmmCondBranch (cond2 tag) no_cons) + -- Yes, jump to switch statement + ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) []) + ; labelC no_cons + -- No, enter the closure. + ; enterClosure + } +-} + -- No case expression involved, enter the closure. + | otherwise + = do { stmtC untag_node + ; enterClosure + } + where + --cond1 tag = cmmULtWord tag lowCons + -- More efficient than the above? + tag_expr = cmmGetClosureType (CmmReg nodeReg) + cond1 tag = cmmEqWord tag (CmmLit (mkIntCLit 0)) + cond2 tag = cmmUGtWord tag highCons + lowCons = CmmLit (mkIntCLit 1) + -- CONSTR + highCons = CmmLit (mkIntCLit 8) + -- CONSTR_NOCAF_STATIC (from ClosureType.h) + + +untagCmmAssign (CmmAssign r cmmExpr) = CmmAssign r (cmmUntag cmmExpr) +untagCmmAssign stmt = stmt directCall sp lbl args extra_args assts = do let diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index c66fc9ebdd..8d3578e1ef 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -22,12 +22,17 @@ module CgUtils ( callerSaveVolatileRegs, get_GlobalReg_addr, cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmUGtWord, cmmOffsetExprW, cmmOffsetExprB, cmmRegOffW, cmmRegOffB, cmmLabelOffW, cmmLabelOffB, cmmOffsetW, cmmOffsetB, cmmOffsetLitW, cmmOffsetLitB, cmmLoadIndexW, + cmmConstrTag, cmmConstrTag1, + + tagForCon, tagCons, isSmallFamily, + cmmUntag, cmmIsTagged, cmmGetTag, addToMem, addToMemE, mkWordCLit, @@ -43,6 +48,7 @@ module CgUtils ( import CgMonad import TyCon +import DataCon import Id import Constants import SMRep @@ -61,7 +67,9 @@ import Util import DynFlags import FastString import PackageConfig +#ifdef DEBUG import Outputable +#endif import Data.Char import Data.Bits @@ -164,6 +172,9 @@ cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] +--cmmShlWord e1 e2 = CmmMachOp mo_wordShl [e1, e2] +--cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2] +cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] cmmNegate :: CmmExpr -> CmmExpr cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) @@ -172,6 +183,57 @@ cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] blankWord :: CmmStatic blankWord = CmmUninitialised wORD_SIZE +-- Tagging -- +-- Tag bits mask +--cmmTagBits = CmmLit (mkIntCLit tAG_BITS) +cmmTagMask = CmmLit (mkIntCLit tAG_MASK) +cmmPointerMask = CmmLit (mkIntCLit (complement tAG_MASK)) + +-- Used to untag a possibly tagged pointer +-- A static label need not be untagged +cmmUntag e@(CmmLit (CmmLabel _)) = e +-- Default case +cmmUntag e = (e `cmmAndWord` cmmPointerMask) + +cmmGetTag e = (e `cmmAndWord` cmmTagMask) + +-- Test if a closure pointer is untagged +cmmIsTagged e = (e `cmmAndWord` cmmTagMask) + `cmmNeWord` CmmLit zeroCLit + +cmmConstrTag e = (e `cmmAndWord` cmmTagMask) `cmmSubWord` (CmmLit (mkIntCLit 1)) +-- Get constructor tag, but one based. +cmmConstrTag1 e = e `cmmAndWord` cmmTagMask + +{- + The family size of a data type (the number of constructors) + can be either: + * small, if the family size < 2**tag_bits + * big, otherwise. + + Small families can have the constructor tag in the tag + bits. + Big families only use the tag value 1 to represent + evaluatedness. +-} +isSmallFamily fam_size = fam_size <= mAX_PTR_TAG + +tagForCon con = tag + where + con_tag = dataConTagZ con + fam_size = tyConFamilySize (dataConTyCon con) + tag | isSmallFamily fam_size = con_tag + 1 + | otherwise = 1 + +--Tag an expression, to do: refactor, this appears in some other module. +tagCons con expr = cmmOffsetB expr (tagForCon con) + +-- Copied from CgInfoTbls.hs +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + ----------------------- -- Making literals diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index d0d2ed98b2..d537a7b3d9 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -23,7 +23,7 @@ module ClosureInfo ( mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, - mkClosureInfo, mkConInfo, + mkClosureInfo, mkConInfo, maybeIsLFCon, closureSize, closureNonHdrSize, closureGoodStuffSize, closurePtrsSize, @@ -35,6 +35,7 @@ module ClosureInfo ( closureNeedsUpdSpace, closureIsThunk, closureSingleEntry, closureReEntrant, isConstrClosure_maybe, closureFunInfo, isStandardFormThunk, isKnownFun, + funTag, funTagLFInfo, tagForArity, enterIdLabel, enterLocalIdLabel, enterReturnPtLabel, @@ -58,6 +59,7 @@ module ClosureInfo ( #include "../includes/MachDeps.h" #include "HsVersions.h" +--import CgUtils import StgSyn import SMRep @@ -277,6 +279,10 @@ might_be_a_function ty mkConLFInfo :: DataCon -> LambdaFormInfo mkConLFInfo con = LFCon con +maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon +maybeIsLFCon (LFCon con) = Just con +maybeIsLFCon _ = Nothing + mkSelectorLFInfo id offset updatable = LFThunk NotTopLevel False updatable (SelectorThunk offset) (might_be_a_function (idType id)) @@ -804,10 +810,32 @@ isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con isConstrClosure_maybe _ = Nothing closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) -closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc}) - = Just (arity, arg_desc) -closureFunInfo _ - = Nothing +closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info +closureFunInfo _ = Nothing + +lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) +lfFunInfo _ = Nothing + +funTag :: ClosureInfo -> Int +funTag (ClosureInfo { closureLFInfo = lf_info }) = funTagLFInfo lf_info +funTag _ = 0 + +-- maybe this should do constructor tags too? +funTagLFInfo :: LambdaFormInfo -> Int +funTagLFInfo lf + -- A function is tagged with its arity + | Just (arity,_) <- lfFunInfo lf, + Just tag <- tagForArity arity + = tag + + -- other closures (and unknown ones) are not tagged + | otherwise + = 0 + +tagForArity :: Int -> Maybe Int +tagForArity i | i <= mAX_PTR_TAG = Just i + | otherwise = Nothing \end{code} \begin{code} diff --git a/compiler/main/Constants.lhs b/compiler/main/Constants.lhs index 4f13af8828..2e0c4d4095 100644 --- a/compiler/main/Constants.lhs +++ b/compiler/main/Constants.lhs @@ -6,6 +6,8 @@ \begin{code} module Constants (module Constants) where +import Data.Bits (shiftL) + -- This magical #include brings in all the everybody-knows-these magic -- constants unfortunately, we need to be *explicit* about which one -- we want; if we just hope a -I... will get the right one, we could @@ -108,6 +110,14 @@ wORD_SIZE = (SIZEOF_HSWORD :: Int) wORD_SIZE_IN_BITS = wORD_SIZE * 8 :: Int \end{code} +Amount of pointer bits used for semi-tagging constructor closures + +\begin{code} +tAG_BITS = (TAG_BITS :: Int) +tAG_MASK = ((1 `shiftL` tAG_BITS) - 1) :: Int +mAX_PTR_TAG = tAG_MASK :: Int +\end{code} + Size of a C int, in bytes. May be smaller than wORD_SIZE. \begin{code} diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 2c07016a4f..cc940749f9 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -2216,6 +2216,18 @@ condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do -- return (CondCode False cond code) +-- anything vs zero, using a mask +-- TODO: Add some sanity checking!!!! +condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk)) + | (CmmLit (CmmInt mask pk2)) <- o2 + = do + (x_reg, x_code) <- getSomeReg x + let + code = x_code `snocOL` + TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg) + -- + return (CondCode False cond code) + -- anything vs zero condIntCode cond x (CmmLit (CmmInt 0 pk)) = do (x_reg, x_code) <- getSomeReg x |