diff options
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/CgCallConv.hs | 15 | ||||
-rw-r--r-- | compiler/codeGen/CgClosure.lhs | 31 | ||||
-rw-r--r-- | compiler/codeGen/CgCon.lhs | 6 | ||||
-rw-r--r-- | compiler/codeGen/CgExpr.lhs | 2 | ||||
-rw-r--r-- | compiler/codeGen/CgHeapery.lhs | 8 | ||||
-rw-r--r-- | compiler/codeGen/CgProf.hs | 195 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 23 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmCon.hs | 4 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 2 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmHeap.hs | 5 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmProf.hs | 177 |
11 files changed, 82 insertions, 386 deletions
diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index 32f6727b04..345b65cd3c 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -28,6 +28,7 @@ module CgCallConv ( ) where import CgMonad +import CgProf import SMRep import OldCmm @@ -160,10 +161,16 @@ constructSlowCall amodes -- fewer arguments than we currently have. slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)] slowArgs [] = [] -slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest - where (arg_pat, args, rest) = matchSlowPattern amodes - stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat - +slowArgs amodes + | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest + | otherwise = this_pat ++ slowArgs rest + where + (arg_pat, args, rest) = matchSlowPattern amodes + stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat + this_pat = (NonPtrArg, mkLblExpr stg_ap_pat) : args + save_cccs = [(NonPtrArg, mkLblExpr save_cccs_lbl), (NonPtrArg, curCCS)] + save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") + matchSlowPattern :: [(CgRep,CmmExpr)] -> (FastString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)]) matchSlowPattern amodes = (arg_pat, these, rest) diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index bccadb5a5d..32190a3c9c 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -108,7 +108,7 @@ cgStdRhsClosure -> [StgArg] -- payload -> FCode (Id, CgIdInfo) -cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload +cgStdRhsClosure bndr _cc _bndr_info _fvs _args _body lf_info payload = do -- AHA! A STANDARD-FORM THUNK { -- LAY OUT THE OBJECT amodes <- getArgAmodes payload @@ -122,10 +122,10 @@ cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload NoC_SRT -- No SRT for a std-form closure descr - ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body +-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body -- BUILD THE OBJECT - ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets -- RETURN ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } @@ -197,9 +197,9 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do ; let to_amode (info, offset) = do { amode <- idInfoToAmode info ; return (amode, offset) } - ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body +-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body ; amodes_w_offsets <- mapFCs to_amode bind_details - ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets -- RETURN ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } @@ -239,16 +239,15 @@ So it should set up an update frame (if it is shared). NB: Thunks cannot have a primitive type! \begin{code} -closureCodeBody _binder_info cl_info cc [{- No args i.e. thunk -}] body = do +closureCodeBody _binder_info cl_info _cc [{- No args i.e. thunk -}] body = do { body_absC <- getCgStmts $ do { tickyEnterThunk cl_info ; ldvEnterClosure cl_info -- NB: Node always points when profiling ; thunkWrapper cl_info $ do -- We only enter cc after setting up update so -- that cc of enclosing scope will be recorded - -- in update frame CAF/DICT functions will be - -- subsumed by this enclosing cc - { enterCostCentre cl_info cc body + -- in the update frame + { enterCostCentreThunk (CmmReg nodeReg) ; cgExpr body } } @@ -307,16 +306,14 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do ; bindArgsToStack stk_args ; setRealAndVirtualSp sp_top - -- Enter the cost-centre, if required - -- ToDo: It's not clear why this is outside the funWrapper, - -- but the tickyEnterFun is inside. Perhaps we can put - -- them together? - ; enterCostCentre cl_info cc body - - -- Do the business + -- Do the business ; funWrapper cl_info reg_args reg_save_code $ do { tickyEnterFun cl_info - ; cgExpr body } + ; enterCostCentreFun cc $ + CmmMachOp mo_wordSub [ CmmReg nodeReg + , CmmLit (mkIntCLit (funTag cl_info)) ] + + ; cgExpr body } } \end{code} diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index 26489e5945..17bb9d0ad8 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -223,9 +223,9 @@ buildDynCon' _ binder ccs con args where lf_info = mkConLFInfo con - use_cc -- cost-centre to stick in the object - | currentOrSubsumedCCS ccs = curCCS - | otherwise = CmmLit (mkCCostCentreStack ccs) + use_cc -- cost-centre to stick in the object + | isCurrentCCS ccs = curCCS + | otherwise = panic "buildDynCon: non-current CCS not implemented" blame_cc = use_cc -- cost-centre on which to blame the alloc (same) \end{code} diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs index 1f16c1feee..5ffa4e550d 100644 --- a/compiler/codeGen/CgExpr.lhs +++ b/compiler/codeGen/CgExpr.lhs @@ -258,7 +258,7 @@ SCC expressions are treated specially. They set the current cost centre. \begin{code} -cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr +cgExpr (StgSCC cc tick push expr) = do emitSetCCC cc tick push; cgExpr expr \end{code} %******************************************************** diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index ebdde2d31a..9e9cc8d1d8 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -575,13 +575,7 @@ allocDynClosure cl_info use_cc _blame_cc amodes_with_offsets -- SAY WHAT WE ARE ABOUT TO DO ; profDynAlloc cl_info use_cc - -- ToDo: This is almost certainly wrong - -- We're ignoring blame_cc. But until we've - -- fixed the boxing hack in chooseDynCostCentres etc, - -- we're worried about making things worse by "fixing" - -- this part to use blame_cc! - - ; tickyDynAlloc cl_info + ; tickyDynAlloc cl_info -- ALLOCATE THE OBJECT ; base <- getHpRelOffset info_offset diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index b58fbb4238..b43751361c 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -10,13 +10,13 @@ module CgProf ( mkCCostCentre, mkCCostCentreStack, -- Cost-centre Profiling - dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, - enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, - chooseDynCostCentres, - costCentreFrom, + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentreThunk, + enterCostCentreFun, + costCentreFrom, curCCS, curCCSAddr, emitCostCentreDecl, emitCostCentreStackDecl, - emitSetCCC, emitCCS, + emitSetCCC, -- Lag/drag/void stuff ldvEnter, ldvEnterClosure, ldvRecordCreate @@ -40,10 +40,8 @@ import OldCmm import OldCmmUtils import CLabel -import Id import qualified Module import CostCentre -import StgSyn import StaticFlags import FastString import Module @@ -108,6 +106,9 @@ profDynAlloc cl_info ccs -- | Record the allocation of a closure (size is given by a CmmExpr) -- The size must be in words, because the allocation counter in a CCS counts -- in words. +-- +-- This API is used by the @CCS_ALLOC()@ macro in @.cmm@ code. +-- profAlloc :: CmmExpr -> CmmExpr -> Code profAlloc words ccs = ifProfiling $ @@ -121,160 +122,21 @@ profAlloc words ccs where alloc_rep = typeWidth REP_CostCentreStack_mem_alloc --- ---------------------------------------------------------------------- --- Setting the cost centre in a new closure - -chooseDynCostCentres :: CostCentreStack - -> [Id] -- Args - -> StgExpr -- Body - -> FCode (CmmExpr, CmmExpr) --- Called when alllcating a closure --- Tells which cost centre to put in the object, and which --- to blame the cost of allocation on -chooseDynCostCentres ccs args body = do - -- Cost-centre we record in the object - use_ccs <- emitCCS ccs - - -- Cost-centre on whom we blame the allocation - let blame_ccs - | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) - | otherwise = use_ccs - - return (use_ccs, blame_ccs) - - --- Some CostCentreStacks are a sequence of pushes on top of CCCS. --- These pushes must be performed before we can refer to the stack in --- an expression. -emitCCS :: CostCentreStack -> FCode CmmExpr -emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) - where - (cc's, ccs') = decomposeCCS ccs - - push_em ccs [] = return ccs - push_em ccs (cc:rest) = do - tmp <- newTemp bWord -- TODO FIXME NOW - pushCostCentre tmp ccs cc - push_em (CmmReg (CmmLocal tmp)) rest - -ccsExpr :: CostCentreStack -> CmmExpr -ccsExpr ccs - | isCurrentCCS ccs = curCCS - | otherwise = CmmLit (mkCCostCentreStack ccs) - - -isBox :: StgExpr -> Bool --- If it's an utterly trivial RHS, then it must be --- one introduced by boxHigherOrderArgs for profiling, --- so we charge it to "OVERHEAD". --- This looks like a GROSS HACK to me --SDM -isBox (StgApp _ []) = True -isBox _ = False - - -- ----------------------------------------------------------------------- -- Setting the current cost centre on entry to a closure --- For lexically scoped profiling we have to load the cost centre from --- the closure entered, if the costs are not supposed to be inherited. --- This is done immediately on entering the fast entry point. - --- Load current cost centre from closure, if not inherited. --- Node is guaranteed to point to it, if profiling and not inherited. - -enterCostCentre - :: ClosureInfo - -> CostCentreStack - -> StgExpr -- The RHS of the closure - -> Code - --- We used to have a special case for bindings of form --- f = g True --- where g has arity 2. The RHS is a thunk, but we don't --- need to update it; and we want to subsume costs. --- We don't have these sort of PAPs any more, so the special --- case has gone away. - -enterCostCentre closure_info ccs body - = ifProfiling $ - ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) - enter_cost_centre closure_info ccs body - -enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> Code -enter_cost_centre closure_info ccs body - | isSubsumedCCS ccs - = ASSERT(isToplevClosure closure_info) - ASSERT(re_entrant) - enter_ccs_fsub - - | isDerivedFromCurrentCCS ccs - = do { - if re_entrant && not is_box - then - enter_ccs_fun node_ccs - else - stmtC (CmmStore curCCSAddr node_ccs) - - -- don't forget to bump the scc count. This closure might have been - -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal - -- pass has turned into simply let x = e in ...x... and attached - -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that - -- we don't lose the scc counter, bump it in the entry code for x. - -- ToDo: for a multi-push we should really bump the counter for - -- each of the intervening CCSs, not just the top one. - ; when (not (isCurrentCCS ccs)) $ - stmtC (bumpSccCount curCCS) - } - - | isCafCCS ccs - = ASSERT(isToplevClosure closure_info) - ASSERT(not re_entrant) - do { -- This is just a special case of the isDerivedFromCurrentCCS - -- case above. We could delete this, but it's a micro - -- optimisation and saves a bit of code. - stmtC (CmmStore curCCSAddr enc_ccs) - ; stmtC (bumpSccCount node_ccs) - } - - | otherwise - = panic "enterCostCentre" - where - enc_ccs = CmmLit (mkCCostCentreStack ccs) - re_entrant = closureReEntrant closure_info - node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) - is_box = isBox body - - -- if this is a function, then node will be tagged; we must subract the tag - node_tag = funTag closure_info - --- set the current CCS when entering a PAP -enterCostCentrePAP :: CmmExpr -> Code -enterCostCentrePAP closure = - ifProfiling $ do - enter_ccs_fun (costCentreFrom closure) - enteringPAP 1 - enterCostCentreThunk :: CmmExpr -> Code enterCostCentreThunk closure = ifProfiling $ do stmtC $ CmmStore curCCSAddr (costCentreFrom closure) -enter_ccs_fun :: CmmExpr -> Code -enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [CmmHinted stack AddrHint] False - -- ToDo: vols - -enter_ccs_fsub :: Code -enter_ccs_fsub = enteringPAP 0 - --- When entering a PAP, EnterFunCCS is called by both the PAP entry --- code and the function entry code; we don't want the function's --- entry code to also update CCCS in the event that it was called via --- a PAP, so we set the flag entering_PAP to indicate that we are --- entering via a PAP. -enteringPAP :: Integer -> Code -enteringPAP n - = stmtC (CmmStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP")))) - (CmmLit (CmmInt n cIntWidth))) +enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code +enterCostCentreFun ccs closure = + ifProfiling $ do + if isCurrentCCS ccs + then emitRtsCall rtsPackageId (fsLit "enterFunCCS") + [CmmHinted (costCentreFrom closure) AddrHint] False + else return () -- top-level function, nothing to do ifProfiling :: Code -> Code ifProfiling code @@ -286,7 +148,6 @@ ifProfilingL xs | opt_SccProfilingOn = xs | otherwise = [] - -- --------------------------------------------------------------------------- -- Initialising Cost Centres & CCSs @@ -306,15 +167,15 @@ emitCostCentreDecl cc = do modl, -- char *module, zero, -- StgWord time_ticks zero64, -- StgWord64 mem_alloc - subsumed, -- StgInt is_caf - zero -- struct _CostCentre *link + is_caf, -- StgInt is_caf + zero -- struct _CostCentre *link ] ; emitDataLits (mkCCLabel cc) lits } where - subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF - | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring - + is_caf | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = zero + emitCostCentreStackDecl :: CostCentreStack @@ -349,23 +210,21 @@ sizeof_ccs_words -- --------------------------------------------------------------------------- -- Set the current cost centre stack -emitSetCCC :: CostCentre -> Code -emitSetCCC cc +emitSetCCC :: CostCentre -> Bool -> Bool -> Code +emitSetCCC cc tick push | not opt_SccProfilingOn = nopC | otherwise = do tmp <- newTemp bWord -- TODO FIXME NOW - ASSERT( sccAbleCostCentre cc ) - pushCostCentre tmp curCCS cc - stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) - when (isSccCountCostCentre cc) $ - stmtC (bumpSccCount curCCS) + pushCostCentre tmp curCCS cc + when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp))) + when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code pushCostCentre result ccs cc = emitRtsCallWithResult result AddrHint rtsPackageId - (fsLit "PushCostCentre") [CmmHinted ccs AddrHint, - CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint] + (fsLit "pushCostCentre") [CmmHinted ccs AddrHint, + CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint] False bumpSccCount :: CmmExpr -> CmmStmt diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 67dcd2d90f..4f60d0a6cf 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -151,8 +151,8 @@ cgRhs :: Id -> StgRhs -> FCode (CgIdInfo, CmmAGraph) -- The returned values are the binding for the environment -- and the Initialization Code that witnesses the binding -cgRhs name (StgRhsCon maybe_cc con args) - = buildDynCon name maybe_cc con args +cgRhs name (StgRhsCon cc con args) + = buildDynCon name cc con args cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body @@ -300,12 +300,13 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body (length args) body fv_details -- BUILD THE OBJECT - ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body +-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + ; let use_cc = curCCS; blame_cc = curCCS ; emit (mkComment $ mkFastString "calling allocDynClosure") ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off) ; let info_tbl = mkCmmInfo closure_info ; (tmp, init) <- allocDynClosure info_tbl lf_info use_cc blame_cc - (map toVarArg fv_details) + (map toVarArg fv_details) -- RETURN ; regIdInfo bndr lf_info tmp init } @@ -324,7 +325,7 @@ cgStdThunk -> [StgArg] -- payload -> FCode (CgIdInfo, CmmAGraph) -cgStdThunk bndr cc _bndr_info body lf_info payload +cgStdThunk bndr _cc _bndr_info _body lf_info payload = do -- AHA! A STANDARD-FORM THUNK { -- LAY OUT THE OBJECT mod_name <- getModuleName @@ -337,7 +338,8 @@ cgStdThunk bndr cc _bndr_info body lf_info payload NoC_SRT -- No SRT for a std-form closure descr - ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body +-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body + ; let use_cc = curCCS; blame_cc = curCCS -- BUILD THE OBJECT ; let info_tbl = mkCmmInfo closure_info @@ -394,7 +396,7 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details lf_info = closureLFInfo cl_info info_tbl = mkCmmInfo cl_info -closureCodeBody top_lvl bndr cl_info cc args arity body fv_details +closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details = ASSERT( length args > 0 ) do { -- Allocate the global ticky counter, -- and establish the ticky-counter @@ -424,8 +426,7 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details -- Main payload ; entryHeapCheck cl_info offset node' arity arg_regs $ do - { enterCostCentre cl_info cc body - ; fv_bindings <- mapM bind_fv fv_details + { fv_bindings <- mapM bind_fv fv_details -- Load free vars out of closure *after* -- heap check, to reduce live vars over check ; if node_points then load_fvs node lf_info fv_bindings @@ -473,7 +474,7 @@ mkSlowEntryCode cl_info arg_regs -- function closure is already in `Node' ----------------------------------------- thunkCode :: ClosureInfo -> [(NonVoid Id, VirtualHpOffset)] -> CostCentreStack -> LocalReg -> Int -> StgExpr -> FCode () -thunkCode cl_info fv_details cc node arity body +thunkCode cl_info fv_details _cc node arity body = do { let node_points = nodeMustPointToIt (closureLFInfo cl_info) node' = if node_points then Just node else Nothing ; tickyEnterThunk cl_info @@ -493,7 +494,7 @@ thunkCode cl_info fv_details cc node arity body -- that cc of enclosing scope will be recorded -- in update frame CAF/DICT functions will be -- subsumed by this enclosing cc - do { enterCostCentre cl_info cc body + do { enterCostCentreThunk (CmmReg nodeReg) ; let lf_info = closureLFInfo cl_info ; fv_bindings <- mapM bind_fv fv_details ; load_fvs node lf_info fv_bindings diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index a357db4d05..e17ac4fd32 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -216,8 +216,8 @@ buildDynCon' _ binder ccs con args lf_info = mkConLFInfo con use_cc -- cost-centre to stick in the object - | currentOrSubsumedCCS ccs = curCCS - | otherwise = CmmLit (mkCCostCentreStack ccs) + | isCurrentCCS ccs = curCCS + | otherwise = panic "buildDynCon: non-current CCS not implemented" blame_cc = use_cc -- cost-centre on which to blame the alloc (same) diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index cb68f51bd4..bc4e9df44f 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -59,7 +59,7 @@ cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) = cgExpr (StgOpApp op args ty) = cgOpApp op args ty cgExpr (StgConApp con args) = cgConApp con args -cgExpr (StgSCC cc expr) = do { emitSetCCC cc; cgExpr expr } +cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr } cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr } cgExpr (StgLit lit) = do cmm_lit <- cgLit lit emitReturn [CmmLit cmm_lit] diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 857fd38e27..690b0a9622 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -97,11 +97,6 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets ; let rep = cit_rep info_tbl ; tickyDynAlloc rep lf_info ; profDynAlloc rep use_cc - -- ToDo: This is almost certainly wrong - -- We're ignoring blame_cc. But until we've - -- fixed the boxing hack in chooseDynCostCentres etc, - -- we're worried about making things worse by "fixing" - -- this part to use blame_cc! -- FIND THE OFFSET OF THE INFO-PTR WORD ; let info_offset = virt_hp + 1 diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index b1aca6e37e..17b61c6a59 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -12,11 +12,10 @@ module StgCmmProf ( -- Cost-centre Profiling dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, - enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, - chooseDynCostCentres, - costCentreFrom, + enterCostCentreThunk, + costCentreFrom, curCCS, curCCSAddr, - emitSetCCC, emitCCS, + emitSetCCC, saveCurrentCostCentre, restoreCurrentCostCentre, @@ -43,17 +42,14 @@ import Cmm import CmmUtils import CLabel -import Id import qualified Module import CostCentre -import StgSyn import StaticFlags import FastString import Module -import Constants -- Lots of field offsets +import Constants -- Lots of field offsets import Outputable -import Data.Char import Control.Monad ----------------------------------------------------------------------------- @@ -177,161 +173,14 @@ profAlloc words ccs where alloc_rep = REP_CostCentreStack_mem_alloc --- ---------------------------------------------------------------------- --- Setting the cost centre in a new closure - -chooseDynCostCentres :: CostCentreStack - -> [Id] -- Args - -> StgExpr -- Body - -> FCode (CmmExpr, CmmExpr) --- Called when allocating a closure --- Tells which cost centre to put in the object, and which --- to blame the cost of allocation on -chooseDynCostCentres ccs args body = do - -- Cost-centre we record in the object - use_ccs <- emitCCS ccs - - -- Cost-centre on whom we blame the allocation - let blame_ccs - | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) - | otherwise = use_ccs - - return (use_ccs, blame_ccs) - - --- Some CostCentreStacks are a sequence of pushes on top of CCCS. --- These pushes must be performed before we can refer to the stack in --- an expression. -emitCCS :: CostCentreStack -> FCode CmmExpr -emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) - where - (cc's, ccs') = decomposeCCS ccs - - push_em ccs [] = return ccs - push_em ccs (cc:rest) = do - tmp <- newTemp ccsType - pushCostCentre tmp ccs cc - push_em (CmmReg (CmmLocal tmp)) rest - -ccsExpr :: CostCentreStack -> CmmExpr -ccsExpr ccs - | isCurrentCCS ccs = curCCS - | otherwise = CmmLit (mkCCostCentreStack ccs) - - -isBox :: StgExpr -> Bool --- If it's an utterly trivial RHS, then it must be --- one introduced by boxHigherOrderArgs for profiling, --- so we charge it to "OVERHEAD". --- This looks like a GROSS HACK to me --SDM -isBox (StgApp _ []) = True -isBox _ = False - - -- ----------------------------------------------------------------------- -- Setting the current cost centre on entry to a closure --- For lexically scoped profiling we have to load the cost centre from --- the closure entered, if the costs are not supposed to be inherited. --- This is done immediately on entering the fast entry point. - --- Load current cost centre from closure, if not inherited. --- Node is guaranteed to point to it, if profiling and not inherited. - -enterCostCentre - :: ClosureInfo - -> CostCentreStack - -> StgExpr -- The RHS of the closure - -> FCode () - --- We used to have a special case for bindings of form --- f = g True --- where g has arity 2. The RHS is a thunk, but we don't --- need to update it; and we want to subsume costs. --- We don't have these sort of PAPs any more, so the special --- case has gone away. - -enterCostCentre closure_info ccs body - = ifProfiling $ - ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) - enter_cost_centre closure_info ccs body - -enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> FCode () -enter_cost_centre closure_info ccs body - | isSubsumedCCS ccs - = ASSERT(isToplevClosure closure_info) - ASSERT(re_entrant) - enter_ccs_fsub - - | isDerivedFromCurrentCCS ccs - = do { - if re_entrant && not is_box - then - enter_ccs_fun node_ccs - else - emit (mkStore curCCSAddr node_ccs) - - -- don't forget to bump the scc count. This closure might have been - -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal - -- pass has turned into simply let x = e in ...x... and attached - -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that - -- we don't lose the scc counter, bump it in the entry code for x. - -- ToDo: for a multi-push we should really bump the counter for - -- each of the intervening CCSs, not just the top one. - ; when (not (isCurrentCCS ccs)) $ - emit (bumpSccCount curCCS) - } - - | isCafCCS ccs - = ASSERT(isToplevClosure closure_info) - ASSERT(not re_entrant) - do { -- This is just a special case of the isDerivedFromCurrentCCS - -- case above. We could delete this, but it's a micro - -- optimisation and saves a bit of code. - emit (mkStore curCCSAddr enc_ccs) - ; emit (bumpSccCount node_ccs) - } - - | otherwise - = panic "enterCostCentre" - where - enc_ccs = CmmLit (mkCCostCentreStack ccs) - re_entrant = closureReEntrant closure_info - node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) - is_box = isBox body - - -- if this is a function, then node will be tagged; we must subract the tag - node_tag = funTag closure_info - --- set the current CCS when entering a PAP -enterCostCentrePAP :: CmmExpr -> FCode () -enterCostCentrePAP closure = - ifProfiling $ do - enter_ccs_fun (costCentreFrom closure) - enteringPAP 1 - enterCostCentreThunk :: CmmExpr -> FCode () enterCostCentreThunk closure = ifProfiling $ do emit $ mkStore curCCSAddr (costCentreFrom closure) -enter_ccs_fun :: CmmExpr -> FCode () -enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [(stack,AddrHint)] False - -- ToDo: vols - -enter_ccs_fsub :: FCode () -enter_ccs_fsub = enteringPAP 0 - --- When entering a PAP, EnterFunCCS is called by both the PAP entry --- code and the function entry code; we don't want the function's --- entry code to also update CCCS in the event that it was called via --- a PAP, so we set the flag entering_PAP to indicate that we are --- entering via a PAP. -enteringPAP :: Integer -> FCode () -enteringPAP n - = emit (mkStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP")))) - (CmmLit (CmmInt n cIntWidth))) - ifProfiling :: FCode () -> FCode () ifProfiling code | opt_SccProfilingOn = code @@ -368,14 +217,10 @@ emitCostCentreDecl cc = do modl, -- char *module, zero, -- StgWord time_ticks zero64, -- StgWord64 mem_alloc - subsumed, -- StgInt is_caf - zero -- struct _CostCentre *link + zero -- struct _CostCentre *link ] ; emitDataLits (mkCCLabel cc) lits } - where - subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF - | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring emitCostCentreStackDecl :: CostCentreStack -> FCode () emitCostCentreStackDecl ccs @@ -408,16 +253,14 @@ sizeof_ccs_words -- --------------------------------------------------------------------------- -- Set the current cost centre stack -emitSetCCC :: CostCentre -> FCode () -emitSetCCC cc +emitSetCCC :: CostCentre -> Bool -> Bool -> FCode () +emitSetCCC cc tick push | not opt_SccProfilingOn = nopC | otherwise = do tmp <- newTemp ccsType -- TODO FIXME NOW - ASSERT( sccAbleCostCentre cc ) - pushCostCentre tmp curCCS cc - emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) - when (isSccCountCostCentre cc) $ - emit (bumpSccCount curCCS) + pushCostCentre tmp curCCS cc + when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp))) + when push $ emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () pushCostCentre result ccs cc |