summaryrefslogtreecommitdiff
path: root/compiler/codeGen/CgProf.hs
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-11-28 16:48:43 +0000
committerSimon Marlow <marlowsd@gmail.com>2011-11-29 12:21:18 +0000
commit50de6034343abc93a7b01daccff34121042c0e7c (patch)
tree24496a5fc6bc39c6baaa574608e53c5d76c169f6 /compiler/codeGen/CgProf.hs
parent1c2b838131134d44004dfdff18c302131478390d (diff)
downloadhaskell-50de6034343abc93a7b01daccff34121042c0e7c.tar.gz
Make profiling work with multiple capabilities (+RTS -N)
This means that both time and heap profiling work for parallel programs. Main internal changes: - CCCS is no longer a global variable; it is now another pseudo-register in the StgRegTable struct. Thus every Capability has its own CCCS. - There is a new built-in CCS called "IDLE", which records ticks for Capabilities in the idle state. If you profile a single-threaded program with +RTS -N2, you'll see about 50% of time in "IDLE". - There is appropriate locking in rts/Profiling.c to protect the shared cost-centre-stack data structures. This patch does enough to get it working, I have cut one big corner: the cost-centre-stack data structure is still shared amongst all Capabilities, which means that multiple Capabilities will race when updating the "allocations" and "entries" fields of a CCS. Not only does this give unpredictable results, but it runs very slowly due to cache line bouncing. It is strongly recommended that you use -fno-prof-count-entries to disable the "entries" count when profiling parallel programs. (I shall add a note to this effect to the docs).
Diffstat (limited to 'compiler/codeGen/CgProf.hs')
-rw-r--r--compiler/codeGen/CgProf.hs22
1 files changed, 11 insertions, 11 deletions
diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs
index 13667c399a..3e247ff4d6 100644
--- a/compiler/codeGen/CgProf.hs
+++ b/compiler/codeGen/CgProf.hs
@@ -21,7 +21,7 @@ module CgProf (
enterCostCentreThunk,
enterCostCentreFun,
costCentreFrom,
- curCCS, curCCSAddr,
+ curCCS, storeCurCCS,
emitCostCentreDecl, emitCostCentreStackDecl,
emitSetCCC,
@@ -66,11 +66,10 @@ import Control.Monad
-- Expression representing the current cost centre stack
curCCS :: CmmExpr
-curCCS = CmmLoad curCCSAddr bWord
+curCCS = CmmReg (CmmGlobal CCCS)
--- Address of current CCS variable, for storing into
-curCCSAddr :: CmmExpr
-curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
+storeCurCCS :: CmmExpr -> CmmStmt
+storeCurCCS e = CmmAssign (CmmGlobal CCCS) e
mkCCostCentre :: CostCentre -> CmmLit
mkCCostCentre cc = CmmLabel (mkCCLabel cc)
@@ -135,14 +134,15 @@ profAlloc words ccs
enterCostCentreThunk :: CmmExpr -> Code
enterCostCentreThunk closure =
ifProfiling $ do
- stmtC $ CmmStore curCCSAddr (costCentreFrom closure)
+ stmtC $ storeCurCCS (costCentreFrom closure)
-enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code
-enterCostCentreFun ccs closure =
+enterCostCentreFun :: CostCentreStack -> CmmExpr -> [GlobalReg] -> Code
+enterCostCentreFun ccs closure vols =
ifProfiling $ do
if isCurrentCCS ccs
- then emitRtsCall rtsPackageId (fsLit "enterFunCCS")
- [CmmHinted (costCentreFrom closure) AddrHint]
+ then emitRtsCallWithVols rtsPackageId (fsLit "enterFunCCS")
+ [CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint,
+ CmmHinted (costCentreFrom closure) AddrHint] vols
else return () -- top-level function, nothing to do
ifProfiling :: Code -> Code
@@ -226,7 +226,7 @@ emitSetCCC cc tick push
tmp <- newTemp bWord -- TODO FIXME NOW
pushCostCentre tmp curCCS cc
when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp)))
- when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp)))
+ when push $ stmtC (storeCurCCS (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code
pushCostCentre result ccs cc