diff options
Diffstat (limited to 'compiler/codeGen/StgCmmProf.hs')
| -rw-r--r-- | compiler/codeGen/StgCmmProf.hs | 553 |
1 files changed, 553 insertions, 0 deletions
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs new file mode 100644 index 0000000000..f442295d25 --- /dev/null +++ b/compiler/codeGen/StgCmmProf.hs @@ -0,0 +1,553 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for profiling +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmProf ( + initCostCentres, ccType, ccsType, + mkCCostCentre, mkCCostCentreStack, + + -- Cost-centre Profiling + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, + chooseDynCostCentres, + costCentreFrom, + curCCS, curCCSAddr, + emitSetCCC, emitCCS, + + saveCurrentCostCentre, restoreCurrentCostCentre, + + -- Lag/drag/void stuff + ldvEnter, ldvEnterClosure, ldvRecordCreate + ) where + +#include "HsVersions.h" +#include "MachDeps.h" + -- For WORD_SIZE_IN_BITS only. +#include "../includes/Constants.h" + -- For LDV_CREATE_MASK, LDV_STATE_USE + -- which are StgWords +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import StgCmmClosure +import StgCmmUtils +import StgCmmMonad +import SMRep + +import MkZipCfgCmm +import Cmm +import TyCon ( PrimRep(..) ) +import CmmUtils +import CLabel + +import Id +import qualified Module +import CostCentre +import StgSyn +import StaticFlags +import FastString +import Constants -- Lots of field offsets +import Outputable + +import Data.Maybe +import Data.Char +import Control.Monad + +----------------------------------------------------------------------------- +-- +-- Cost-centre-stack Profiling +-- +----------------------------------------------------------------------------- + +-- Expression representing the current cost centre stack +ccsType :: CmmType -- Type of a cost-centre stack +ccsType = bWord + +ccType :: CmmType -- Type of a cost centre +ccType = bWord + +curCCS :: CmmExpr +curCCS = CmmLoad curCCSAddr ccsType + +-- Address of current CCS variable, for storing into +curCCSAddr :: CmmExpr +curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCCS"))) + +mkCCostCentre :: CostCentre -> CmmLit +mkCCostCentre cc = CmmLabel (mkCCLabel cc) + +mkCCostCentreStack :: CostCentreStack -> CmmLit +mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) + +costCentreFrom :: CmmExpr -- A closure pointer + -> CmmExpr -- The cost centre from that closure +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType + +staticProfHdr :: CostCentreStack -> [CmmLit] +-- The profiling header words in a static closure +-- Was SET_STATIC_PROF_HDR +staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs, + staticLdvInit] + +dynProfHdr :: CmmExpr -> [CmmExpr] +-- Profiling header words in a dynamic closure +dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit] + +initUpdFrameProf :: CmmExpr -> FCode () +-- Initialise the profiling field of an update frame +initUpdFrameProf frame_amode + = ifProfiling $ -- frame->header.prof.ccs = CCCS + emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) + -- is unnecessary because it is not used anyhow. + +--------------------------------------------------------------------------- +-- Saving and restoring the current cost centre +--------------------------------------------------------------------------- + +{- Note [Saving the current cost centre] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The current cost centre is like a global register. Like other +global registers, it's a caller-saves one. But consider + case (f x) of (p,q) -> rhs +Since 'f' may set the cost centre, we must restore it +before resuming rhs. So we want code like this: + local_cc = CCC -- save + r = f( x ) + CCC = local_cc -- restore +That is, we explicitly "save" the current cost centre in +a LocalReg, local_cc; and restore it after the call. The +C-- infrastructure will arrange to save local_cc across the +call. + +The same goes for join points; + let j x = join-stuff + in blah-blah +We want this kind of code: + local_cc = CCC -- save + blah-blah + J: + CCC = local_cc -- restore +-} + +saveCurrentCostCentre :: FCode (Maybe LocalReg) + -- Returns Nothing if profiling is off +saveCurrentCostCentre + | not opt_SccProfilingOn + = return Nothing + | otherwise + = do { local_cc <- newTemp ccType + ; emit (mkAssign (CmmLocal local_cc) curCCS) + ; return (Just local_cc) } + +restoreCurrentCostCentre :: Maybe LocalReg -> FCode () +restoreCurrentCostCentre Nothing + = return () +restoreCurrentCostCentre (Just local_cc) + = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc))) + + +------------------------------------------------------------------------------- +-- Recording allocation in a cost centre +------------------------------------------------------------------------------- + +-- | Record the allocation of a closure. The CmmExpr is the cost +-- centre stack to which to attribute the allocation. +profDynAlloc :: ClosureInfo -> CmmExpr -> FCode () +profDynAlloc cl_info ccs + = ifProfiling $ + profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs + +-- | Record the allocation of a closure (size is given by a CmmExpr) +-- The size must be in words, because the allocation counter in a CCS counts +-- in words. +profAlloc :: CmmExpr -> CmmExpr -> FCode () +profAlloc words ccs + = ifProfiling $ + emit (addToMemE alloc_rep + (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) + (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $ + [CmmMachOp mo_wordSub [words, + CmmLit (mkIntCLit profHdrSize)]])) + -- subtract the "profiling overhead", which is the + -- profiling header in a closure. + where + alloc_rep = REP_CostCentreStack_mem_alloc + +-- ---------------------------------------------------------------------- +-- Setting the cost centre in a new closure + +chooseDynCostCentres :: CostCentreStack + -> [Id] -- Args + -> StgExpr -- Body + -> FCode (CmmExpr, CmmExpr) +-- Called when alllcating a closure +-- Tells which cost centre to put in the object, and which +-- to blame the cost of allocation on +chooseDynCostCentres ccs args body = do + -- Cost-centre we record in the object + use_ccs <- emitCCS ccs + + -- Cost-centre on whom we blame the allocation + let blame_ccs + | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) + | otherwise = use_ccs + + return (use_ccs, blame_ccs) + + +-- Some CostCentreStacks are a sequence of pushes on top of CCCS. +-- These pushes must be performed before we can refer to the stack in +-- an expression. +emitCCS :: CostCentreStack -> FCode CmmExpr +emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) + where + (cc's, ccs') = decomposeCCS ccs + + push_em ccs [] = return ccs + push_em ccs (cc:rest) = do + tmp <- newTemp ccsType + pushCostCentre tmp ccs cc + push_em (CmmReg (CmmLocal tmp)) rest + +ccsExpr :: CostCentreStack -> CmmExpr +ccsExpr ccs + | isCurrentCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + +isBox :: StgExpr -> Bool +-- If it's an utterly trivial RHS, then it must be +-- one introduced by boxHigherOrderArgs for profiling, +-- so we charge it to "OVERHEAD". +-- This looks like a GROSS HACK to me --SDM +isBox (StgApp fun []) = True +isBox other = False + + +-- ----------------------------------------------------------------------- +-- Setting the current cost centre on entry to a closure + +-- For lexically scoped profiling we have to load the cost centre from +-- the closure entered, if the costs are not supposed to be inherited. +-- This is done immediately on entering the fast entry point. + +-- Load current cost centre from closure, if not inherited. +-- Node is guaranteed to point to it, if profiling and not inherited. + +enterCostCentre + :: ClosureInfo + -> CostCentreStack + -> StgExpr -- The RHS of the closure + -> FCode () + +-- We used to have a special case for bindings of form +-- f = g True +-- where g has arity 2. The RHS is a thunk, but we don't +-- need to update it; and we want to subsume costs. +-- We don't have these sort of PAPs any more, so the special +-- case has gone away. + +enterCostCentre closure_info ccs body + = ifProfiling $ + ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) + enter_cost_centre closure_info ccs body + +enter_cost_centre closure_info ccs body + | isSubsumedCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(re_entrant) + enter_ccs_fsub + + | isDerivedFromCurrentCCS ccs + = do { + if re_entrant && not is_box + then + enter_ccs_fun node_ccs + else + emit (mkStore curCCSAddr node_ccs) + + -- don't forget to bump the scc count. This closure might have been + -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal + -- pass has turned into simply let x = e in ...x... and attached + -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that + -- we don't lose the scc counter, bump it in the entry code for x. + -- ToDo: for a multi-push we should really bump the counter for + -- each of the intervening CCSs, not just the top one. + ; when (not (isCurrentCCS ccs)) $ + emit (bumpSccCount curCCS) + } + + | isCafCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(not re_entrant) + do { -- This is just a special case of the isDerivedFromCurrentCCS + -- case above. We could delete this, but it's a micro + -- optimisation and saves a bit of code. + emit (mkStore curCCSAddr enc_ccs) + ; emit (bumpSccCount node_ccs) + } + + | otherwise + = panic "enterCostCentre" + where + enc_ccs = CmmLit (mkCCostCentreStack ccs) + re_entrant = closureReEntrant closure_info + node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) + is_box = isBox body + + -- if this is a function, then node will be tagged; we must subract the tag + node_tag = funTag closure_info + +-- set the current CCS when entering a PAP +enterCostCentrePAP :: CmmExpr -> FCode () +enterCostCentrePAP closure = + ifProfiling $ do + enter_ccs_fun (costCentreFrom closure) + enteringPAP 1 + +enterCostCentreThunk :: CmmExpr -> FCode () +enterCostCentreThunk closure = + ifProfiling $ do + emit $ mkStore curCCSAddr (costCentreFrom closure) + +enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [(stack,AddrHint)] False + -- ToDo: vols + +enter_ccs_fsub = enteringPAP 0 + +-- When entering a PAP, EnterFunCCS is called by both the PAP entry +-- code and the function entry code; we don't want the function's +-- entry code to also update CCCS in the event that it was called via +-- a PAP, so we set the flag entering_PAP to indicate that we are +-- entering via a PAP. +enteringPAP :: Integer -> FCode () +enteringPAP n + = emit (mkStore (CmmLit (CmmLabel (mkRtsDataLabel (sLit "entering_PAP")))) + (CmmLit (CmmInt n cIntWidth))) + +ifProfiling :: FCode () -> FCode () +ifProfiling code + | opt_SccProfilingOn = code + | otherwise = nopC + +ifProfilingL :: [a] -> [a] +ifProfilingL xs + | opt_SccProfilingOn = xs + | otherwise = [] + + +--------------------------------------------------------------- +-- Initialising Cost Centres & CCSs +--------------------------------------------------------------- + +initCostCentres :: CollectedCCs -> FCode CmmAGraph +-- Emit the declarations, and return code to register them +initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) + = getCode $ whenC opt_SccProfilingOn $ + do { mapM_ emitCostCentreDecl local_CCs + ; mapM_ emitCostCentreStackDecl singleton_CCSs + ; emit $ catAGraphs $ map mkRegisterCC local_CCs + ; emit $ catAGraphs $ map mkRegisterCCS singleton_CCSs } + + +emitCostCentreDecl :: CostCentre -> FCode () +emitCostCentreDecl cc = do + { label <- mkStringCLit (costCentreUserName cc) + ; modl <- mkStringCLit (Module.moduleNameString + (Module.moduleName (cc_mod cc))) + -- All cost centres will be in the main package, since we + -- don't normally use -auto-all or add SCCs to other packages. + -- Hence don't emit the package name in the module here. + ; let lits = [ zero, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, + zero, -- StgWord time_ticks + zero64, -- StgWord64 mem_alloc + subsumed, -- StgInt is_caf + zero -- struct _CostCentre *link + ] + ; emitDataLits (mkCCLabel cc) lits + } + where + subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring + +emitCostCentreStackDecl :: CostCentreStack -> FCode () +emitCostCentreStackDecl ccs + = case maybeSingletonCCS ccs of + Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc) + Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs) + where + mk_lits cc = zero : + mkCCostCentre cc : + replicate (sizeof_ccs_words - 2) zero + -- Note: to avoid making any assumptions about how the + -- C compiler (that compiles the RTS, in particular) does + -- layouts of structs containing long-longs, simply + -- pad out the struct with zero words until we hit the + -- size of the overall struct (which we get via DerivedConstants.h) + +zero = mkIntCLit 0 +zero64 = CmmInt 0 W64 + +sizeof_ccs_words :: Int +sizeof_ccs_words + -- round up to the next word. + | ms == 0 = ws + | otherwise = ws + 1 + where + (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE + +-- --------------------------------------------------------------------------- +-- Registering CCs and CCSs + +-- (cc)->link = CC_LIST; +-- CC_LIST = (cc); +-- (cc)->ccID = CC_ID++; + +mkRegisterCC :: CostCentre -> CmmAGraph +mkRegisterCC cc + = withTemp cInt $ \tmp -> + catAGraphs [ + mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) + (CmmLoad cC_LIST bWord), + mkStore cC_LIST cc_lit, + mkAssign (CmmLocal tmp) (CmmLoad cC_ID cInt), + mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg (CmmLocal tmp)), + mkStore cC_ID (cmmRegOffB (CmmLocal tmp) 1) + ] + where + cc_lit = CmmLit (CmmLabel (mkCCLabel cc)) + +-- (ccs)->prevStack = CCS_LIST; +-- CCS_LIST = (ccs); +-- (ccs)->ccsID = CCS_ID++; + +mkRegisterCCS :: CostCentreStack -> CmmAGraph +mkRegisterCCS ccs + = withTemp cInt $ \ tmp -> + catAGraphs [ + mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) + (CmmLoad cCS_LIST bWord), + mkStore cCS_LIST ccs_lit, + mkAssign (CmmLocal tmp) (CmmLoad cCS_ID cInt), + mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg (CmmLocal tmp)), + mkStore cCS_ID (cmmRegOffB (CmmLocal tmp) 1) + ] + where + ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs)) + + +cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CC_LIST"))) +cC_ID = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CC_ID"))) + +cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCS_LIST"))) +cCS_ID = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCS_ID"))) + +-- --------------------------------------------------------------------------- +-- Set the current cost centre stack + +emitSetCCC :: CostCentre -> FCode () +emitSetCCC cc + | not opt_SccProfilingOn = nopC + | otherwise = do + tmp <- newTemp ccsType -- TODO FIXME NOW + ASSERT( sccAbleCostCentre cc ) + pushCostCentre tmp curCCS cc + emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) + when (isSccCountCostCentre cc) $ + emit (bumpSccCount curCCS) + +pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () +pushCostCentre result ccs cc + = emitRtsCallWithResult result AddrHint + (sLit "PushCostCentre") [(ccs,AddrHint), + (CmmLit (mkCCostCentre cc), AddrHint)] + False + +bumpSccCount :: CmmExpr -> CmmAGraph +bumpSccCount ccs + = addToMem REP_CostCentreStack_scc_count + (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 + +----------------------------------------------------------------------------- +-- +-- Lag/drag/void stuff +-- +----------------------------------------------------------------------------- + +-- +-- Initial value for the LDV field in a static closure +-- +staticLdvInit :: CmmLit +staticLdvInit = zeroCLit + +-- +-- Initial value of the LDV field in a dynamic closure +-- +dynLdvInit :: CmmExpr +dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE + CmmMachOp mo_wordOr [ + CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ], + CmmLit (mkWordCLit lDV_STATE_CREATE) + ] + +-- +-- Initialise the LDV word of a new closure +-- +ldvRecordCreate :: CmmExpr -> FCode () +ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit + +-- +-- Called when a closure is entered, marks the closure as having been "used". +-- The closure is not an 'inherently used' one. +-- The closure is not IND or IND_OLDGEN because neither is considered for LDV +-- profiling. +-- +ldvEnterClosure :: ClosureInfo -> FCode () +ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag)) + where tag = funTag closure_info + -- don't forget to substract node's tag + +ldvEnter :: CmmExpr -> FCode () +-- Argument is a closure pointer +ldvEnter cl_ptr + = ifProfiling $ + -- if (era > 0) { + -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | + -- era | LDV_STATE_USE } + emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + (mkStore ldv_wd new_ldv_wd) + mkNop) + where + -- don't forget to substract node's tag + ldv_wd = ldvWord cl_ptr + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord) + (CmmLit (mkWordCLit lDV_CREATE_MASK))) + (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) + +loadEra :: CmmExpr +loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth) + [CmmLoad (mkLblExpr (mkRtsDataLabel (sLit "era"))) cInt] + +ldvWord :: CmmExpr -> CmmExpr +-- Takes the address of a closure, and returns +-- the address of the LDV word in the closure +ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw + +-- LDV constants, from ghc/includes/Constants.h +lDV_SHIFT = (LDV_SHIFT :: Int) +--lDV_STATE_MASK = (LDV_STATE_MASK :: StgWord) +lDV_CREATE_MASK = (LDV_CREATE_MASK :: StgWord) +--lDV_LAST_MASK = (LDV_LAST_MASK :: StgWord) +lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord) +lDV_STATE_USE = (LDV_STATE_USE :: StgWord) + |
