summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitrios Vytiniotis <dimitris@microsoft.com>2011-11-29 19:20:33 +0000
committerDimitrios Vytiniotis <dimitris@microsoft.com>2011-11-29 19:20:33 +0000
commit42e3b5bd3ee9c555adaaf1e5a12f7ddd71423c0c (patch)
tree0e8ca1886f847d878ab9b9126c2268e4c8b54a1b
parente4d87e140697bcb380cc51a5aee598409930281e (diff)
parent1f7433b7b998dda4dde6d09f22a37f637745c079 (diff)
downloadhaskell-42e3b5bd3ee9c555adaaf1e5a12f7ddd71423c0c.tar.gz
Merge branch 'master' of http://darcs.haskell.org//ghc
-rw-r--r--compiler/cmm/CmmCvt.hs2
-rw-r--r--compiler/cmm/CmmExpr.hs6
-rw-r--r--compiler/cmm/CmmLex.x5
-rw-r--r--compiler/cmm/CmmLint.hs2
-rw-r--r--compiler/cmm/CmmOpt.hs6
-rw-r--r--compiler/cmm/CmmParse.y8
-rw-r--r--compiler/cmm/OldCmm.hs3
-rw-r--r--compiler/cmm/OldPprCmm.hs7
-rw-r--r--compiler/cmm/PprC.hs20
-rw-r--r--compiler/cmm/PprCmmExpr.hs1
-rw-r--r--compiler/codeGen/CgCase.lhs2
-rw-r--r--compiler/codeGen/CgClosure.lhs11
-rw-r--r--compiler/codeGen/CgForeignCall.hs12
-rw-r--r--compiler/codeGen/CgPrimOp.hs2
-rw-r--r--compiler/codeGen/CgProf.hs23
-rw-r--r--compiler/codeGen/CgUtils.hs33
-rw-r--r--compiler/codeGen/StgCmmForeign.hs4
-rw-r--r--compiler/codeGen/StgCmmPrim.hs3
-rw-r--r--compiler/codeGen/StgCmmProf.hs15
-rw-r--r--compiler/codeGen/StgCmmUtils.hs8
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs2
-rw-r--r--compiler/nativeGen/AsmCodeGen.lhs4
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs2
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs2
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs4
-rw-r--r--compiler/prelude/primops.txt.pp3
-rw-r--r--docs/users_guide/profiling.xml44
-rw-r--r--includes/Cmm.h2
-rw-r--r--includes/RtsAPI.h9
-rw-r--r--includes/mkDerivedConstants.c1
-rw-r--r--includes/rts/prof/CCS.h7
-rw-r--r--includes/stg/MiscClosures.h1
-rw-r--r--includes/stg/Regs.h1
-rw-r--r--rts/Apply.cmm2
-rw-r--r--rts/AutoApply.h12
-rw-r--r--rts/Capability.c24
-rw-r--r--rts/Exception.cmm4
-rw-r--r--rts/HeapStackCheck.cmm13
-rw-r--r--rts/Interpreter.c4
-rw-r--r--rts/PrimOps.cmm40
-rw-r--r--rts/Profiling.c136
-rw-r--r--rts/Proftimer.c6
-rw-r--r--rts/RetainerProfile.h3
-rw-r--r--rts/RtsFlags.c6
-rw-r--r--rts/Schedule.c4
-rw-r--r--rts/StgMiscClosures.cmm2
-rw-r--r--rts/StgStdThunks.cmm4
-rw-r--r--rts/sm/GC.c18
-rw-r--r--rts/sm/Storage.c4
-rw-r--r--utils/genapply/GenApply.hs2
50 files changed, 335 insertions, 204 deletions
diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs
index c8a1d85597..c82f517849 100644
--- a/compiler/cmm/CmmCvt.hs
+++ b/compiler/cmm/CmmCvt.hs
@@ -91,7 +91,7 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g
Old.CmmCall (cmm_target target)
(add_hints (get_conv target) Results ress)
(add_hints (get_conv target) Arguments args)
- Old.CmmUnsafe Old.CmmMayReturn
+ Old.CmmMayReturn
last :: CmmNode O C -> () -> [Old.CmmStmt]
last node _ = stmts
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index ef97a82aa9..885639b874 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -343,7 +343,8 @@ data GlobalReg
| SpLim -- Stack limit
| Hp -- Heap ptr; points to last occupied heap location.
| HpLim -- Heap limit register
- | CurrentTSO -- pointer to current thread's TSO
+ | CCCS -- Current cost-centre stack
+ | CurrentTSO -- pointer to current thread's TSO
| CurrentNursery -- pointer to allocation area
| HpAlloc -- allocation count for heap check failure
@@ -395,6 +396,7 @@ instance Ord GlobalReg where
compare SpLim SpLim = EQ
compare Hp Hp = EQ
compare HpLim HpLim = EQ
+ compare CCCS CCCS = EQ
compare CurrentTSO CurrentTSO = EQ
compare CurrentNursery CurrentNursery = EQ
compare HpAlloc HpAlloc = EQ
@@ -419,6 +421,8 @@ instance Ord GlobalReg where
compare _ Hp = GT
compare HpLim _ = LT
compare _ HpLim = GT
+ compare CCCS _ = LT
+ compare _ CCCS = GT
compare CurrentTSO _ = LT
compare _ CurrentTSO = GT
compare CurrentNursery _ = LT
diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x
index 1e2b20d4b3..ddd681d25e 100644
--- a/compiler/cmm/CmmLex.x
+++ b/compiler/cmm/CmmLex.x
@@ -106,8 +106,9 @@ $white_no_nl+ ;
SpLim { global_reg SpLim }
Hp { global_reg Hp }
HpLim { global_reg HpLim }
- CurrentTSO { global_reg CurrentTSO }
- CurrentNursery { global_reg CurrentNursery }
+ CCCS { global_reg CCCS }
+ CurrentTSO { global_reg CurrentTSO }
+ CurrentNursery { global_reg CurrentNursery }
HpAlloc { global_reg HpAlloc }
BaseReg { global_reg BaseReg }
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
index e03da8ccd7..ee53c1b6c7 100644
--- a/compiler/cmm/CmmLint.hs
+++ b/compiler/cmm/CmmLint.hs
@@ -133,7 +133,7 @@ lintCmmStmt platform labels = lint
_ <- lintCmmExpr platform l
_ <- lintCmmExpr platform r
return ()
- lint (CmmCall target _res args _ _) =
+ lint (CmmCall target _res args _) =
lintTarget platform target >> mapM_ (lintCmmExpr platform . hintlessCmm) args
lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond platform e
lint (CmmSwitch e branches) = do
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index 1005448894..007b7a715e 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -59,7 +59,7 @@ cmmEliminateDeadBlocks blocks@(BasicBlock base_id _:_) =
stmt m (CmmComment _) = m
stmt m (CmmAssign _ e) = expr m e
stmt m (CmmStore e1 e2) = expr (expr m e1) e2
- stmt m (CmmCall c _ as _ _) = f (actuals m as) c
+ stmt m (CmmCall c _ as _) = f (actuals m as) c
where f m (CmmCallee e _) = expr m e
f m (CmmPrim _) = m
stmt m (CmmBranch b) = b:m
@@ -266,8 +266,8 @@ lookForInline' u expr regset (stmt : rest)
inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
-inlineStmt u a (CmmCall target regs es srt ret)
- = CmmCall (infn target) regs es' srt ret
+inlineStmt u a (CmmCall target regs es ret)
+ = CmmCall (infn target) regs es' ret
where infn (CmmCallee fn cconv) = CmmCallee (inlineExpr u a fn) cconv
infn (CmmPrim p) = CmmPrim p
es' = [ (CmmHinted (inlineExpr u a e) hint) | (CmmHinted e hint) <- es ]
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index bdb2c4c918..4e315ddbdf 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -775,8 +775,9 @@ isPtrGlobalReg Sp = True
isPtrGlobalReg SpLim = True
isPtrGlobalReg Hp = True
isPtrGlobalReg HpLim = True
-isPtrGlobalReg CurrentTSO = True
-isPtrGlobalReg CurrentNursery = True
+isPtrGlobalReg CCCS = True
+isPtrGlobalReg CurrentTSO = True
+isPtrGlobalReg CurrentNursery = True
isPtrGlobalReg (VanillaReg _ VGcPtr) = True
isPtrGlobalReg _ = False
@@ -867,10 +868,9 @@ foreignCall conv_string results_code expr_code args_code vols safety ret
results <- sequence results_code
expr <- expr_code
args <- sequence args_code
- --code (stmtC (CmmCall (CmmCallee expr convention) results args safety))
case convention of
-- Temporary hack so at least some functions are CmmSafe
- CmmCallConv -> code (stmtC (CmmCall (CmmCallee expr convention) results args safety ret))
+ CmmCallConv -> code (stmtC (CmmCall (CmmCallee expr convention) results args ret))
_ ->
let expr' = adjCallTarget convention expr args in
case safety of
diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs
index 3703de4e32..a8a9d5dde0 100644
--- a/compiler/cmm/OldCmm.hs
+++ b/compiler/cmm/OldCmm.hs
@@ -154,7 +154,6 @@ data CmmStmt -- Old-style
CmmCallTarget
[HintedCmmFormal] -- zero or more results
[HintedCmmActual] -- zero or more arguments
- CmmSafety -- whether to build a continuation
CmmReturnInfo
-- Some care is necessary when handling the arguments of these, see
-- [Register parameter passing] and the hack in cmm/CmmOpt.hs
@@ -192,7 +191,7 @@ instance UserOfLocalRegs CmmStmt where
stmt (CmmComment {}) = id
stmt (CmmAssign _ e) = gen e
stmt (CmmStore e1 e2) = gen e1 . gen e2
- stmt (CmmCall target _ es _ _) = gen target . gen es
+ stmt (CmmCall target _ es _) = gen target . gen es
stmt (CmmBranch _) = id
stmt (CmmCondBranch e _) = gen e
stmt (CmmSwitch e _) = gen e
diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs
index d2f03f78b7..07dfbf63bf 100644
--- a/compiler/cmm/OldPprCmm.hs
+++ b/compiler/cmm/OldPprCmm.hs
@@ -122,11 +122,10 @@ pprStmt platform stmt = case stmt of
-- call "ccall" foo(x, y)[r1, r2];
-- ToDo ppr volatile
- CmmCall (CmmCallee fn cconv) results args safety ret ->
+ CmmCall (CmmCallee fn cconv) results args ret ->
sep [ pp_lhs <+> pp_conv
, nest 2 (pprExpr9 platform fn <>
parens (commafy (map ppr_ar args)))
- <> brackets (pprPlatform platform safety)
, case ret of CmmMayReturn -> empty
CmmNeverReturns -> ptext $ sLit (" never returns")
] <> semi
@@ -142,9 +141,9 @@ pprStmt platform stmt = case stmt of
_ -> ptext (sLit("foreign")) <+> doubleQuotes (ppr cconv)
-- Call a CallishMachOp, like sin or cos that might be implemented as a library call.
- CmmCall (CmmPrim op) results args safety ret ->
+ CmmCall (CmmPrim op) results args ret ->
pprStmt platform (CmmCall (CmmCallee (CmmLit lbl) CCallConv)
- results args safety ret)
+ results args ret)
where
-- HACK: A CallishMachOp doesn't really correspond to a ForeignLabel, but we
-- use one to get the label printed.
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 4f8a061bdd..270ce12670 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -193,7 +193,7 @@ pprStmt platform stmt = case stmt of
where
rep = cmmExprType src
- CmmCall (CmmCallee fn cconv) results args safety ret ->
+ CmmCall (CmmCallee fn cconv) results args ret ->
maybe_proto $$
fnCall
where
@@ -215,7 +215,7 @@ pprStmt platform stmt = case stmt of
case fn of
CmmLit (CmmLabel lbl)
| StdCallConv <- cconv ->
- let myCall = pprCall platform (pprCLabel platform lbl) cconv results args safety
+ let myCall = pprCall platform (pprCLabel platform lbl) cconv results args
in (real_fun_proto lbl, myCall)
-- stdcall functions must be declared with
-- a function type, otherwise the C compiler
@@ -223,22 +223,22 @@ pprStmt platform stmt = case stmt of
-- can't add the @n suffix ourselves, because
-- it isn't valid C.
| CmmNeverReturns <- ret ->
- let myCall = pprCall platform (pprCLabel platform lbl) cconv results args safety
+ let myCall = pprCall platform (pprCLabel platform lbl) cconv results args
in (real_fun_proto lbl, myCall)
| not (isMathFun lbl) ->
let myCall = braces (
pprCFunType (char '*' <> text "ghcFunPtr") cconv results args <> semi
$$ text "ghcFunPtr" <+> equals <+> cast_fn <> semi
- $$ pprCall platform (text "ghcFunPtr") cconv results args safety <> semi
+ $$ pprCall platform (text "ghcFunPtr") cconv results args <> semi
)
in (fun_proto lbl, myCall)
_ ->
(empty {- no proto -},
- pprCall platform cast_fn cconv results args safety <> semi)
+ pprCall platform cast_fn cconv results args <> semi)
-- for a dynamic call, no declaration is necessary.
- CmmCall (CmmPrim op) results args safety _ret ->
- pprCall platform ppr_fn CCallConv results args' safety
+ CmmCall (CmmPrim op) results args _ret ->
+ pprCall platform ppr_fn CCallConv results args'
where
ppr_fn = pprCallishMachOp_for_C op
-- The mem primops carry an extra alignment arg, must drop it.
@@ -812,10 +812,10 @@ pprLocalReg (LocalReg uniq _) = char '_' <> ppr uniq
-- Foreign Calls
pprCall :: Platform -> SDoc -> CCallConv
- -> [HintedCmmFormal] -> [HintedCmmActual] -> CmmSafety
+ -> [HintedCmmFormal] -> [HintedCmmActual]
-> SDoc
-pprCall platform ppr_fn cconv results args _
+pprCall platform ppr_fn cconv results args
| not (is_cishCC cconv)
= panic $ "pprCall: unknown calling convention"
@@ -926,7 +926,7 @@ te_Lit _ = return ()
te_Stmt :: CmmStmt -> TE ()
te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e
te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r
-te_Stmt (CmmCall _ rs es _ _) = mapM_ (te_temp.hintlessCmm) rs >>
+te_Stmt (CmmCall _ rs es _) = mapM_ (te_temp.hintlessCmm) rs >>
mapM_ (te_Expr.hintlessCmm) es
te_Stmt (CmmCondBranch e _) = te_Expr e
te_Stmt (CmmSwitch e _) = te_Expr e
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 76fbdcec8d..81ce84c264 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -271,6 +271,7 @@ pprGlobalReg gr
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
HpLim -> ptext (sLit "HpLim")
+ CCCS -> ptext (sLit "CCCS")
CurrentTSO -> ptext (sLit "CurrentTSO")
CurrentNursery -> ptext (sLit "CurrentNursery")
HpAlloc -> ptext (sLit "HpAlloc")
diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs
index e4fe386043..a36621bdaf 100644
--- a/compiler/codeGen/CgCase.lhs
+++ b/compiler/codeGen/CgCase.lhs
@@ -670,6 +670,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC
restoreCurrentCostCentre (Just slot) freeit
= do { sp_rel <- getSpRelOffset slot
; whenC freeit (freeStackSlots [slot])
- ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) }
+ ; stmtC (storeCurCCS (CmmLoad sp_rel bWord)) }
\end{code}
diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs
index 85d629dbaf..7bad8516d9 100644
--- a/compiler/codeGen/CgClosure.lhs
+++ b/compiler/codeGen/CgClosure.lhs
@@ -316,9 +316,10 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do
-- Do the business
; funWrapper cl_info reg_args reg_save_code $ do
{ tickyEnterFun cl_info
- ; enterCostCentreFun cc $
- CmmMachOp mo_wordSub [ CmmReg nodeReg
- , CmmLit (mkIntCLit (funTag cl_info)) ]
+ ; enterCostCentreFun cc
+ (CmmMachOp mo_wordSub [ CmmReg nodeReg
+ , CmmLit (mkIntCLit (funTag cl_info)) ])
+ (node : map snd reg_args) -- live regs
; cgExpr body }
}
@@ -482,7 +483,7 @@ emitBlackHoleCode is_single_entry = do
stmtsC [
CmmStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize)
(CmmReg (CmmGlobal CurrentTSO)),
- CmmCall (CmmPrim MO_WriteBarrier) [] [] CmmUnsafe CmmMayReturn,
+ CmmCall (CmmPrim MO_WriteBarrier) [] [] CmmMayReturn,
CmmStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo))
]
\end{code}
@@ -580,7 +581,7 @@ link_caf cl_info _is_upd = do
[ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint,
CmmHinted (CmmReg nodeReg) AddrHint,
CmmHinted hp_rel AddrHint ]
- (Just [node]) False
+ (Just [node])
-- node is live, so save it.
-- see Note [atomic CAF entry] in rts/sm/Storage.c
diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs
index d96e9f8cfc..8d8b97d76a 100644
--- a/compiler/codeGen/CgForeignCall.hs
+++ b/compiler/codeGen/CgForeignCall.hs
@@ -127,7 +127,7 @@ emitForeignCall' safety results target args vols _srt ret
let (caller_save, caller_load) = callerSaveVolatileRegs vols
let caller_load' = if ret == CmmNeverReturns then [] else caller_load
stmtsC caller_save
- stmtC (CmmCall target results temp_args CmmUnsafe ret)
+ stmtC (CmmCall target results temp_args ret)
stmtsC caller_load'
| otherwise = do
@@ -149,12 +149,12 @@ emitForeignCall' safety results target args vols _srt ret
[ CmmHinted id AddrHint ]
[ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint
, CmmHinted (CmmLit (CmmInt (fromIntegral (fromEnum (playInterruptible safety))) wordWidth)) NoHint]
- CmmUnsafe ret)
- stmtC (CmmCall temp_target results temp_args CmmUnsafe ret)
+ ret)
+ stmtC (CmmCall temp_target results temp_args ret)
stmtC (CmmCall (CmmCallee resumeThread CCallConv)
[ CmmHinted new_base AddrHint ]
[ CmmHinted (CmmReg (CmmLocal id)) AddrHint ]
- CmmUnsafe ret)
+ ret)
-- Assign the result to BaseReg: we
-- might now have a different Capability!
stmtC (CmmAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)))
@@ -240,8 +240,8 @@ emitLoadThreadState = do
emitOpenNursery
-- and load the current cost centre stack from the TSO when profiling:
when opt_SccProfilingOn $
- stmtC (CmmStore curCCSAddr
- (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord))
+ stmtC $ storeCurCCS $
+ CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord
emitOpenNursery :: Code
emitOpenNursery = stmtsC [
diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs
index 6fe934b54c..72bbf6cc58 100644
--- a/compiler/codeGen/CgPrimOp.hs
+++ b/compiler/codeGen/CgPrimOp.hs
@@ -155,6 +155,8 @@ emitPrimOp [res] SparkOp [arg] live = do
where
newspark = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark")))
+emitPrimOp [res] GetCCCSOp [] _live
+ = stmtC (CmmAssign (CmmLocal res) curCCS)
emitPrimOp [res] ReadMutVarOp [mutv] _
= stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord))
diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs
index c961e24147..3e247ff4d6 100644
--- a/compiler/codeGen/CgProf.hs
+++ b/compiler/codeGen/CgProf.hs
@@ -21,7 +21,7 @@ module CgProf (
enterCostCentreThunk,
enterCostCentreFun,
costCentreFrom,
- curCCS, curCCSAddr,
+ curCCS, storeCurCCS,
emitCostCentreDecl, emitCostCentreStackDecl,
emitSetCCC,
@@ -66,11 +66,10 @@ import Control.Monad
-- Expression representing the current cost centre stack
curCCS :: CmmExpr
-curCCS = CmmLoad curCCSAddr bWord
+curCCS = CmmReg (CmmGlobal CCCS)
--- Address of current CCS variable, for storing into
-curCCSAddr :: CmmExpr
-curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
+storeCurCCS :: CmmExpr -> CmmStmt
+storeCurCCS e = CmmAssign (CmmGlobal CCCS) e
mkCCostCentre :: CostCentre -> CmmLit
mkCCostCentre cc = CmmLabel (mkCCLabel cc)
@@ -135,14 +134,15 @@ profAlloc words ccs
enterCostCentreThunk :: CmmExpr -> Code
enterCostCentreThunk closure =
ifProfiling $ do
- stmtC $ CmmStore curCCSAddr (costCentreFrom closure)
+ stmtC $ storeCurCCS (costCentreFrom closure)
-enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code
-enterCostCentreFun ccs closure =
+enterCostCentreFun :: CostCentreStack -> CmmExpr -> [GlobalReg] -> Code
+enterCostCentreFun ccs closure vols =
ifProfiling $ do
if isCurrentCCS ccs
- then emitRtsCall rtsPackageId (fsLit "enterFunCCS")
- [CmmHinted (costCentreFrom closure) AddrHint] False
+ then emitRtsCallWithVols rtsPackageId (fsLit "enterFunCCS")
+ [CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint,
+ CmmHinted (costCentreFrom closure) AddrHint] vols
else return () -- top-level function, nothing to do
ifProfiling :: Code -> Code
@@ -226,7 +226,7 @@ emitSetCCC cc tick push
tmp <- newTemp bWord -- TODO FIXME NOW
pushCostCentre tmp curCCS cc
when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp)))
- when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp)))
+ when push $ stmtC (storeCurCCS (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code
pushCostCentre result ccs cc
@@ -234,7 +234,6 @@ pushCostCentre result ccs cc
rtsPackageId
(fsLit "pushCostCentre") [CmmHinted ccs AddrHint,
CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint]
- False
bumpSccCount :: CmmExpr -> CmmStmt
bumpSccCount ccs
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index a0a5ac2554..5274a176a0 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -233,23 +233,22 @@ emitRtsCall
:: PackageId -- ^ package the function is in
-> FastString -- ^ name of function
-> [CmmHinted CmmExpr] -- ^ function args
- -> Bool -- ^ whether this is a safe call
-> Code -- ^ cmm code
-emitRtsCall pkg fun args safe = emitRtsCallGen [] pkg fun args Nothing safe
+emitRtsCall pkg fun args = emitRtsCallGen [] pkg fun args Nothing
-- The 'Nothing' says "save all global registers"
-emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Bool -> Code
-emitRtsCallWithVols pkg fun args vols safe
- = emitRtsCallGen [] pkg fun args (Just vols) safe
+emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Code
+emitRtsCallWithVols pkg fun args vols
+ = emitRtsCallGen [] pkg fun args (Just vols)
emitRtsCallWithResult
:: LocalReg -> ForeignHint
-> PackageId -> FastString
- -> [CmmHinted CmmExpr] -> Bool -> Code
+ -> [CmmHinted CmmExpr] -> Code
-emitRtsCallWithResult res hint pkg fun args safe
- = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing safe
+emitRtsCallWithResult res hint pkg fun args
+ = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing
-- Make a call to an RTS C procedure
emitRtsCallGen
@@ -258,14 +257,10 @@ emitRtsCallGen
-> FastString
-> [CmmHinted CmmExpr]
-> Maybe [GlobalReg]
- -> Bool -- True <=> CmmSafe call
-> Code
-emitRtsCallGen res pkg fun args vols safe = do
- safety <- if safe
- then getSRTInfo >>= (return . CmmSafe)
- else return CmmUnsafe
+emitRtsCallGen res pkg fun args vols = do
stmtsC caller_save
- stmtC (CmmCall target res args safety CmmMayReturn)
+ stmtC (CmmCall target res args CmmMayReturn)
stmtsC caller_load
where
(caller_save, caller_load) = callerSaveVolatileRegs vols
@@ -291,7 +286,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load)
caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
- system_regs = [Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery,
+ system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery,
{-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
regs_to_save = system_regs ++ vol_list
@@ -389,6 +384,9 @@ callerSaves Hp = True
#ifdef CALLER_SAVES_HpLim
callerSaves HpLim = True
#endif
+#ifdef CALLER_SAVES_CCCS
+callerSaves CCCS = True
+#endif
#ifdef CALLER_SAVES_CurrentTSO
callerSaves CurrentTSO = True
#endif
@@ -428,6 +426,7 @@ baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
baseRegOffset Hp = oFFSET_StgRegTable_rHp
baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
+baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
@@ -1009,13 +1008,13 @@ fixStgRegStmt stmt
CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src)
- CmmCall target regs args srt returns ->
+ CmmCall target regs args returns ->
let target' = case target of
CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv
other -> other
args' = map (\(CmmHinted arg hint) ->
(CmmHinted (fixStgRegExpr arg) hint)) args
- in CmmCall target' regs args' srt returns
+ in CmmCall target' regs args' returns
CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index 78aabd82ce..7c739c61b6 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -210,8 +210,8 @@ loadThreadState tso stack = do
openNursery,
-- and load the current cost centre stack from the TSO when profiling:
if opt_SccProfilingOn then
- mkStore curCCSAddr
- (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType)
+ storeCurCCS
+ (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType)
else mkNop]
emitLoadThreadState :: LocalReg -> LocalReg -> FCode ()
emitLoadThreadState tso stack = emit $ loadThreadState tso stack
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index f8cc4256f4..d546c38a90 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -228,6 +228,9 @@ emitPrimOp [res] SparkOp [arg]
[(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
emit (mkAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
+emitPrimOp [res] GetCCCSOp []
+ = emit (mkAssign (CmmLocal res) curCCS)
+
emitPrimOp [res] ReadMutVarOp [mutv]
= emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord))
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index 13c1be7f42..d9b3583382 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -21,7 +21,7 @@ module StgCmmProf (
dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
enterCostCentreThunk,
costCentreFrom,
- curCCS, curCCSAddr,
+ curCCS, storeCurCCS,
emitSetCCC,
saveCurrentCostCentre, restoreCurrentCostCentre,
@@ -73,11 +73,10 @@ ccType :: CmmType -- Type of a cost centre
ccType = bWord
curCCS :: CmmExpr
-curCCS = CmmLoad curCCSAddr ccsType
+curCCS = CmmReg (CmmGlobal CCCS)
--- Address of current CCS variable, for storing into
-curCCSAddr :: CmmExpr
-curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
+storeCurCCS :: CmmExpr -> CmmAGraph
+storeCurCCS e = mkAssign (CmmGlobal CCCS) e
mkCCostCentre :: CostCentre -> CmmLit
mkCCostCentre cc = CmmLabel (mkCCLabel cc)
@@ -150,7 +149,7 @@ restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
restoreCurrentCostCentre Nothing
= return ()
restoreCurrentCostCentre (Just local_cc)
- = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc)))
+ = emit (storeCurCCS (CmmReg (CmmLocal local_cc)))
-------------------------------------------------------------------------------
@@ -186,7 +185,7 @@ profAlloc words ccs
enterCostCentreThunk :: CmmExpr -> FCode ()
enterCostCentreThunk closure =
ifProfiling $ do
- emit $ mkStore curCCSAddr (costCentreFrom closure)
+ emit $ storeCurCCS (costCentreFrom closure)
ifProfiling :: FCode () -> FCode ()
ifProfiling code
@@ -269,7 +268,7 @@ emitSetCCC cc tick push
tmp <- newTemp ccsType -- TODO FIXME NOW
pushCostCentre tmp curCCS cc
when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp)))
- when push $ emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp)))
+ when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
pushCostCentre result ccs cc
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index f209005108..c3327138b3 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -253,7 +253,7 @@ callerSaveVolatileRegs = (caller_save, caller_load)
caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save)
caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save)
- system_regs = [ Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery
+ system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery
{- ,SparkHd,SparkTl,SparkBase,SparkLim -}
, BaseReg ]
@@ -366,6 +366,9 @@ callerSaves Hp = True
#ifdef CALLER_SAVES_HpLim
callerSaves HpLim = True
#endif
+#ifdef CALLER_SAVES_CCCS
+callerSaves CCCS = True
+#endif
#ifdef CALLER_SAVES_CurrentTSO
callerSaves CurrentTSO = True
#endif
@@ -385,7 +388,8 @@ baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
baseRegOffset Hp = oFFSET_StgRegTable_rHp
baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
-baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
+baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
+baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
baseRegOffset GCEnter1 = oFFSET_stgGCEnter1
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index b039d39960..1ea5d0c038 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -125,7 +125,7 @@ stmtToInstrs env stmt = case stmt of
CmmSwitch arg ids -> genSwitch env arg ids
-- Foreign Call
- CmmCall target res args _ ret
+ CmmCall target res args ret
-> genCall env target res args ret
-- Tail call
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
index 7ffda3d8f6..0d8aab146b 100644
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -879,7 +879,7 @@ cmmStmtConFold stmt
-> do addr' <- cmmExprConFold JumpReference addr
return $ CmmJump addr' regs
- CmmCall target regs args srt returns
+ CmmCall target regs args returns
-> do target' <- case target of
CmmCallee e conv -> do
e' <- cmmExprConFold CallReference e
@@ -888,7 +888,7 @@ cmmStmtConFold stmt
args' <- mapM (\(CmmHinted arg hint) -> do
arg' <- cmmExprConFold DataReference arg
return (CmmHinted arg' hint)) args
- return $ CmmCall target' regs args' srt returns
+ return $ CmmCall target' regs args' returns
CmmCondBranch test dest
-> do test' <- cmmExprConFold DataReference test
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 8c80ec40c1..a043af01f8 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -135,7 +135,7 @@ stmtToInstrs stmt = do
where ty = cmmExprType src
size = cmmTypeSize ty
- CmmCall target result_regs args _ _
+ CmmCall target result_regs args _
-> genCCall target result_regs args
CmmBranch id -> genBranch id
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 91a850d5fc..663b95b236 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -135,7 +135,7 @@ stmtToInstrs stmt = case stmt of
where ty = cmmExprType src
size = cmmTypeSize ty
- CmmCall target result_regs args _ _
+ CmmCall target result_regs args _
-> genCCall target result_regs args
CmmBranch id -> genBranch id
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 97baeec1ab..5f0f716281 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -160,7 +160,7 @@ stmtToInstrs stmt = do
where ty = cmmExprType src
size = cmmTypeSize ty
- CmmCall target result_regs args _ _
+ CmmCall target result_regs args _
-> genCCall is32Bit target result_regs args
CmmBranch id -> genBranch id
@@ -1996,7 +1996,7 @@ outOfLineCmmOp mop res args
targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl
let target = CmmCallee targetExpr CCallConv
- stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmUnsafe CmmMayReturn)
+ stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmMayReturn)
where
-- Assume we can call these functions directly, and that they're not in a dynamic library.
-- TODO: Why is this ok? Under linux this code will be in libm.so
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 204dce2e59..ceb9226594 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -1799,6 +1799,9 @@ primop TraceCcsOp "traceCcs#" GenPrimOp
has_side_effects = True
out_of_line = True
+primop GetCCCSOp "getCCCS#" GenPrimOp
+ State# s -> (# State# s, Addr# #)
+
------------------------------------------------------------------------
section "Etc"
{Miscellaneous built-ins}
diff --git a/docs/users_guide/profiling.xml b/docs/users_guide/profiling.xml
index a5a1d4911c..ee3b387e31 100644
--- a/docs/users_guide/profiling.xml
+++ b/docs/users_guide/profiling.xml
@@ -9,12 +9,6 @@
can answer questions like "why is my program so slow?", or "why is
my program using so much memory?".</para>
- <para>Note that multi-processor execution (e.g. <literal>+RTS
- -N2</literal>) is not currently supported with GHC's time and space
- profiling. However, there is a separate tool specifically for
- profiling concurrent and parallel programs: <ulink
- url="http://www.haskell.org/haskellwiki/ThreadScope">ThreadScope</ulink>.</para>
-
<para>Profiling a program is a three-step process:</para>
<orderedlist>
@@ -1359,6 +1353,44 @@ to re-read its input file:
</sect2>
</sect1>
+ <sect1 id="prof-threaded">
+ <title>Profiling Parallel and Concurrent Programs</title>
+
+ <para>Combining <option>-threaded</option>
+ and <option>-prof</option> is perfectly fine, and indeed it is
+ possible to profile a program running on multiple processors
+ with the <option>+RTS -N</option> option.<footnote>This feature
+ was added in GHC 7.4.1.</footnote>
+ </para>
+
+ <para>
+ Some caveats apply, however. In the current implementation, a
+ profiled program is likely to scale much less well than the
+ unprofiled program, because the profiling implementation uses
+ some shared data structures which require locking in the runtime
+ system. Furthermore, the memory allocation statistics collected
+ by the profiled program are stored in shared memory
+ but <emphasis>not</emphasis> locked (for speed), which means
+ that these figures might be inaccurate for parallel programs.
+ </para>
+
+ <para>
+ We strongly recommend that you
+ use <option>-fno-prof-count-entries</option> when compiling a
+ program to be profiled on multiple cores, because the entry
+ counts are also stored in shared memory, and continuously
+ updating them on multiple cores is extremely slow.
+ </para>
+
+ <para>
+ We also recommend
+ using <ulink url="http://www.haskell.org/haskellwiki/ThreadScope">ThreadScope</ulink>
+ for profiling parallel programs; it offers a GUI for visualising
+ parallel execution, and is complementary to the time and space
+ profiling features provided with GHC.
+ </para>
+ </sect1>
+
<sect1 id="hpc">
<title>Observing Code Coverage</title>
<indexterm><primary>code coverage</primary></indexterm>
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 641faa216e..11c02b4e3e 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -372,7 +372,7 @@
CCCS_ALLOC(bytes);
/* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
-#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), W_[CCCS])
+#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
#define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \
HP_CHK_GEN(alloc,liveness,reentry); \
diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h
index 329b1569ab..8d948f9b49 100644
--- a/includes/RtsAPI.h
+++ b/includes/RtsAPI.h
@@ -37,6 +37,15 @@ typedef struct StgClosure_ *HaskellObj;
*/
typedef struct Capability_ Capability;
+/*
+ * The public view of a Capability: we can be sure it starts with
+ * these two components (but it may have more private fields).
+ */
+typedef struct CapabilityPublic_ {
+ StgFunTable f;
+ StgRegTable r;
+} CapabilityPublic;
+
/* ----------------------------------------------------------------------------
RTS configuration settings, for passing to hs_init_ghc()
------------------------------------------------------------------------- */
diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c
index b02b6c86f0..a2c9160e95 100644
--- a/includes/mkDerivedConstants.c
+++ b/includes/mkDerivedConstants.c
@@ -222,6 +222,7 @@ main(int argc, char *argv[])
field_offset(StgRegTable, rSpLim);
field_offset(StgRegTable, rHp);
field_offset(StgRegTable, rHpLim);
+ field_offset(StgRegTable, rCCCS);
field_offset(StgRegTable, rCurrentTSO);
field_offset(StgRegTable, rCurrentNursery);
field_offset(StgRegTable, rHpAlloc);
diff --git a/includes/rts/prof/CCS.h b/includes/rts/prof/CCS.h
index 4692d166b5..9737fc9c18 100644
--- a/includes/rts/prof/CCS.h
+++ b/includes/rts/prof/CCS.h
@@ -114,8 +114,6 @@ typedef struct _IndexTable {
Pre-defined cost centres and cost centre stacks
-------------------------------------------------------------------------- */
-extern CostCentreStack * RTS_VAR(CCCS); /* current CCS */
-
#if IN_STG_CODE
extern StgWord CC_MAIN[];
@@ -153,6 +151,9 @@ extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set
extern CostCentre CC_PINNED[];
extern CostCentreStack CCS_PINNED[]; // pinned memory
+extern CostCentre CC_IDLE[];
+extern CostCentreStack CCS_IDLE[]; // capability is idle
+
#endif /* IN_STG_CODE */
extern unsigned int RTS_VAR(CC_ID); // global ids
@@ -165,7 +166,7 @@ extern unsigned int RTS_VAR(era);
* ---------------------------------------------------------------------------*/
CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *);
-void enterFunCCS (CostCentreStack *);
+void enterFunCCS (StgRegTable *reg, CostCentreStack *);
/* -----------------------------------------------------------------------------
Registering CCs and CCSs
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 45dc8369c7..fcfdede2ff 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -488,7 +488,6 @@ extern StgWord RTS_VAR(stable_ptr_table);
// Profiling.c
extern unsigned int RTS_VAR(era);
-extern StgWord RTS_VAR(CCCS); /* current CCS */
extern unsigned int RTS_VAR(entering_PAP);
extern StgWord RTS_VAR(CC_LIST); /* registered CC list */
extern StgWord RTS_VAR(CCS_LIST); /* registered CCS list */
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index 1d0c00c491..b7f0abda7d 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -80,6 +80,7 @@ typedef struct StgRegTable_ {
StgPtr rSpLim;
StgPtr rHp;
StgPtr rHpLim;
+ struct _CostCentreStack * rCCCS; // current cost-centre-stack
struct StgTSO_ * rCurrentTSO;
struct nursery_ * rNursery;
struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index 5397fc55df..a2d4a7e123 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -86,7 +86,7 @@ stg_PAP_apply
TICK_ENT_PAP();
LDV_ENTER(pap);
#ifdef PROFILING
- foreign "C" enterFunCCS(StgHeader_ccs(pap));
+ foreign "C" enterFunCCS(BaseReg "ptr", StgHeader_ccs(pap) "ptr");
#endif
// Reload the stack
diff --git a/rts/AutoApply.h b/rts/AutoApply.h
index 547c5d2f28..d0c5c3fe6b 100644
--- a/rts/AutoApply.h
+++ b/rts/AutoApply.h
@@ -22,7 +22,7 @@
TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \
TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \
pap = Hp + WDS(1) - size; \
- SET_HDR(pap, stg_PAP_info, W_[CCCS]); \
+ SET_HDR(pap, stg_PAP_info, CCCS); \
StgPAP_arity(pap) = HALF_W_(arity - m); \
StgPAP_fun(pap) = R1; \
StgPAP_n_args(pap) = HALF_W_(n); \
@@ -52,7 +52,7 @@
TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \
TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \
new_pap = Hp + WDS(1) - size; \
- SET_HDR(new_pap, stg_PAP_info, W_[CCCS]); \
+ SET_HDR(new_pap, stg_PAP_info, CCCS); \
StgPAP_arity(new_pap) = HALF_W_(arity - m); \
W_ n_args; \
n_args = TO_W_(StgPAP_n_args(pap)); \
@@ -78,10 +78,10 @@
// Jump to target, saving CCCS and restoring it on return
#if defined(PROFILING)
-#define jump_SAVE_CCCS(target) \
- Sp(-1) = W_[CCCS]; \
- Sp(-2) = stg_restore_cccs_info; \
- Sp_adj(-2); \
+#define jump_SAVE_CCCS(target) \
+ Sp(-1) = CCCS; \
+ Sp(-2) = stg_restore_cccs_info; \
+ Sp_adj(-2); \
jump (target)
#else
#define jump_SAVE_CCCS(target) jump (target)
diff --git a/rts/Capability.c b/rts/Capability.c
index 3b45dec360..6c84d1ec35 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -46,7 +46,7 @@ volatile StgWord waiting_for_gc = 0;
/* Let foreign code get the current Capability -- assuming there is one!
* This is useful for unsafe foreign calls because they are called with
* the current Capability held, but they are not passed it. For example,
- * see see the integer-gmp package which calls allocateLocal() in its
+ * see see the integer-gmp package which calls allocate() in its
* stgAllocForGMP() function (which gets called by gmp functions).
* */
Capability * rts_unsafeGetMyCapability (void)
@@ -265,6 +265,12 @@ initCapability( Capability *cap, nat i )
cap->context_switch = 0;
cap->pinned_object_block = NULL;
+#ifdef PROFILING
+ cap->r.rCCCS = CCS_SYSTEM;
+#else
+ cap->r.rCCCS = NULL;
+#endif
+
traceCapsetAssignCap(CAPSET_OSPROCESS_DEFAULT, i);
traceCapsetAssignCap(CAPSET_CLOCKDOMAIN_DEFAULT, i);
#if defined(THREADED_RTS)
@@ -453,6 +459,9 @@ releaseCapability_ (Capability* cap,
}
}
+#ifdef PROFILING
+ cap->r.rCCCS = CCS_IDLE;
+#endif
last_free_capability = cap;
debugTrace(DEBUG_sched, "freeing capability %d", cap->no);
}
@@ -604,6 +613,10 @@ waitForReturnCapability (Capability **pCap, Task *task)
}
+#ifdef PROFILING
+ cap->r.rCCCS = CCS_SYSTEM;
+#endif
+
ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
debugTrace(DEBUG_sched, "resuming capability %d", cap->no);
@@ -676,14 +689,19 @@ yieldCapability (Capability** pCap, Task *task)
task->next = NULL;
cap->n_spare_workers--;
}
- cap->running_task = task;
+
+ cap->running_task = task;
RELEASE_LOCK(&cap->lock);
break;
}
- debugTrace(DEBUG_sched, "resuming capability %d", cap->no);
+ debugTrace(DEBUG_sched, "resuming capability %d", cap->no);
ASSERT(cap->running_task == task);
+#ifdef PROFILING
+ cap->r.rCCCS = CCS_SYSTEM;
+#endif
+
*pCap = cap;
ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
diff --git a/rts/Exception.cmm b/rts/Exception.cmm
index 1192db732c..78907c4ba7 100644
--- a/rts/Exception.cmm
+++ b/rts/Exception.cmm
@@ -369,7 +369,7 @@ stg_catchzh
/* Set up the catch frame */
Sp = Sp - SIZEOF_StgCatchFrame;
- SET_HDR(Sp,stg_catch_frame_info,W_[CCCS]);
+ SET_HDR(Sp,stg_catch_frame_info,CCCS);
StgCatchFrame_handler(Sp) = R2;
StgCatchFrame_exceptions_blocked(Sp) =
@@ -427,7 +427,7 @@ stg_raisezh
*/
if (RtsFlags_ProfFlags_showCCSOnException(RtsFlags) != 0::I32) {
SAVE_THREAD_STATE();
- foreign "C" fprintCCS_stderr(W_[CCCS] "ptr",
+ foreign "C" fprintCCS_stderr(CCCS "ptr",
exception "ptr",
CurrentTSO "ptr") [];
LOAD_THREAD_STATE();
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index d17961145a..d80e101c59 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -167,10 +167,17 @@ __stg_gc_enter_1
be an orphaned BLOCKING_QUEUE now.
-------------------------------------------------------------------------- */
-INFO_TABLE_RET( stg_enter_checkbh, RET_SMALL, P_ unused)
+/* The stg_enter_checkbh frame has the same shape as an update frame: */
+#if defined(PROFILING)
+#define UPD_FRAME_PARAMS W_ unused1, W_ unused2, P_ unused3
+#else
+#define UPD_FRAME_PARAMS P_ unused1
+#endif
+
+INFO_TABLE_RET( stg_enter_checkbh, RET_SMALL, UPD_FRAME_PARAMS)
{
- R1 = Sp(1);
- Sp_adj(2);
+ R1 = StgUpdateFrame_updatee(Sp);
+ Sp = Sp + SIZEOF_StgUpdateFrame;
foreign "C" checkBlockingQueues(MyCapability() "ptr",
CurrentTSO) [R1];
ENTER();
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index da151e1342..2eac1cd834 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -614,7 +614,7 @@ do_apply:
// build a new PAP and return it.
StgPAP *new_pap;
new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m));
- SET_HDR(new_pap,&stg_PAP_info,CCCS);
+ SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS);
new_pap->arity = pap->arity - n;
new_pap->n_args = pap->n_args + m;
new_pap->fun = pap->fun;
@@ -659,7 +659,7 @@ do_apply:
StgPAP *pap;
nat i;
pap = (StgPAP *)allocate(cap, PAP_sizeW(m));
- SET_HDR(pap, &stg_PAP_info,CCCS);
+ SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS);
pap->arity = arity - n;
pap->fun = obj;
pap->n_args = m;
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 8836d3bfe6..2ca347e803 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -63,7 +63,7 @@ stg_newByteArrayzh
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
- SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
@@ -96,7 +96,7 @@ stg_newPinnedByteArrayzh
to BA_ALIGN bytes: */
p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
- SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
@@ -136,7 +136,7 @@ stg_newAlignedPinnedByteArrayzh
<alignment> is a power of 2, which is technically not guaranteed */
p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
- SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
@@ -157,7 +157,7 @@ stg_newArrayzh
("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
- SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
+ SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
@@ -225,7 +225,7 @@ stg_newMutVarzh
ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
mv = Hp - SIZEOF_StgMutVar + WDS(1);
- SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
+ SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
StgMutVar_var(mv) = R1;
RET_P(mv);
@@ -297,21 +297,21 @@ stg_atomicModifyMutVarzh
TICK_ALLOC_THUNK_2();
CCCS_ALLOC(THUNK_2_SIZE);
z = Hp - THUNK_2_SIZE + WDS(1);
- SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
+ SET_HDR(z, stg_ap_2_upd_info, CCCS);
LDV_RECORD_CREATE(z);
StgThunk_payload(z,0) = f;
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
y = z - THUNK_1_SIZE;
- SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
+ SET_HDR(y, stg_sel_0_upd_info, CCCS);
LDV_RECORD_CREATE(y);
StgThunk_payload(y,0) = z;
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
r = y - THUNK_1_SIZE;
- SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
+ SET_HDR(r, stg_sel_1_upd_info, CCCS);
LDV_RECORD_CREATE(r);
StgThunk_payload(r,0) = z;
@@ -353,7 +353,7 @@ stg_mkWeakzh
ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
w = Hp - SIZEOF_StgWeak + WDS(1);
- SET_HDR(w, stg_WEAK_info, W_[CCCS]);
+ SET_HDR(w, stg_WEAK_info, CCCS);
// We don't care about cfinalizer here.
// Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
@@ -397,14 +397,14 @@ stg_mkWeakForeignEnvzh
ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
w = Hp - SIZEOF_StgWeak + WDS(1);
- SET_HDR(w, stg_WEAK_info, W_[CCCS]);
+ SET_HDR(w, stg_WEAK_info, CCCS);
payload_words = 4;
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
- SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = WDS(payload_words);
StgArrWords_payload(p,0) = fptr;
@@ -877,7 +877,7 @@ stg_atomicallyzh
Sp = Sp - SIZEOF_StgAtomicallyFrame;
frame = Sp;
- SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
+ SET_HDR(frame,stg_atomically_frame_info, CCCS);
StgAtomicallyFrame_code(frame) = R1;
StgAtomicallyFrame_result(frame) = NO_TREC;
StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
@@ -903,7 +903,7 @@ stg_catchSTMzh
Sp = Sp - SIZEOF_StgCatchSTMFrame;
frame = Sp;
- SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
+ SET_HDR(frame, stg_catch_stm_frame_info, CCCS);
StgCatchSTMFrame_handler(frame) = R2;
StgCatchSTMFrame_code(frame) = R1;
@@ -941,7 +941,7 @@ stg_catchRetryzh
Sp = Sp - SIZEOF_StgCatchRetryFrame;
frame = Sp;
- SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
+ SET_HDR(frame, stg_catch_retry_frame_info, CCCS);
StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
StgCatchRetryFrame_first_code(frame) = R1;
StgCatchRetryFrame_alt_code(frame) = R2;
@@ -1153,7 +1153,7 @@ stg_newMVarzh
ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh );
mvar = Hp - SIZEOF_StgMVar + WDS(1);
- SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
+ SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS);
// MVARs start dirty: generation 0 has no mutable list
StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure;
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
@@ -1527,7 +1527,7 @@ stg_makeStableNamezh
*/
if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
- SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
+ SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS);
StgStableName_sn(sn_obj) = index;
snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
} else {
@@ -1578,7 +1578,7 @@ stg_newBCOzh
ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh );
bco = Hp - bytes + WDS(1);
- SET_HDR(bco, stg_BCO_info, W_[CCCS]);
+ SET_HDR(bco, stg_BCO_info, CCCS);
StgBCO_instrs(bco) = R1;
StgBCO_literals(bco) = R2;
@@ -1617,7 +1617,7 @@ stg_mkApUpd0zh
CCCS_ALLOC(SIZEOF_StgAP);
ap = Hp - SIZEOF_StgAP + WDS(1);
- SET_HDR(ap, stg_AP_info, W_[CCCS]);
+ SET_HDR(ap, stg_AP_info, CCCS);
StgAP_n_args(ap) = HALF_W_(0);
StgAP_fun(ap) = R1;
@@ -1668,7 +1668,7 @@ out:
ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
- SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
+ SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, CCCS);
StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
@@ -1683,7 +1683,7 @@ for:
allocated in the nursery. The GC will fill it in if/when the array
is promoted. */
- SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(nptrs_arr, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(nptrs_arr) = WDS(nptrs);
p = 0;
for2:
diff --git a/rts/Profiling.c b/rts/Profiling.c
index c393c8fa83..ac2708eda1 100644
--- a/rts/Profiling.c
+++ b/rts/Profiling.c
@@ -18,6 +18,7 @@
#include "Arena.h"
#include "RetainerProfile.h"
#include "Printer.h"
+#include "Capability.h"
#include <string.h>
@@ -51,16 +52,16 @@ FILE *prof_file;
static char *hp_filename; /* heap profile (hp2ps style) log file */
FILE *hp_file;
-/* The Current Cost Centre Stack (for attributing costs)
- */
-CostCentreStack *CCCS;
-
/* Linked lists to keep track of CCs and CCSs that haven't
* been declared in the log file yet
*/
CostCentre *CC_LIST = NULL;
CostCentreStack *CCS_LIST = NULL;
+#ifdef THREADED_RTS
+Mutex ccs_mutex;
+#endif
+
/*
* Built-in cost centres and cost-centre stacks:
*
@@ -92,6 +93,7 @@ CC_DECLARE(CC_GC, "GC", "GC", CC_NOT_CAF, );
CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_NOT_CAF, );
CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_NOT_CAF, );
CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", CC_NOT_CAF, );
+CC_DECLARE(CC_IDLE, "IDLE", "IDLE", CC_NOT_CAF, );
CCS_DECLARE(CCS_MAIN, CC_MAIN, );
CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
@@ -99,6 +101,7 @@ CCS_DECLARE(CCS_GC, CC_GC, );
CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
CCS_DECLARE(CCS_PINNED, CC_PINNED, );
+CCS_DECLARE(CCS_IDLE, CC_IDLE, );
/*
* Static Functions
@@ -143,7 +146,12 @@ initProfiling1 (void)
prof_arena = newArena();
/* for the benefit of allocate()... */
- CCCS = CCS_SYSTEM;
+ {
+ nat n;
+ for (n=0; n < n_capabilities; n++) {
+ capabilities[n].r.rCCCS = CCS_SYSTEM;
+ }
+ }
}
void
@@ -157,8 +165,6 @@ initProfiling2 (void)
{
CostCentreStack *ccs, *next;
- CCCS = CCS_SYSTEM;
-
/* Set up the log file, and dump the header and cost centre
* information into it.
*/
@@ -173,12 +179,14 @@ initProfiling2 (void)
REGISTER_CC(CC_OVERHEAD);
REGISTER_CC(CC_DONT_CARE);
REGISTER_CC(CC_PINNED);
+ REGISTER_CC(CC_IDLE);
REGISTER_CCS(CCS_SYSTEM);
REGISTER_CCS(CCS_GC);
REGISTER_CCS(CCS_OVERHEAD);
REGISTER_CCS(CCS_DONT_CARE);
REGISTER_CCS(CCS_PINNED);
+ REGISTER_CCS(CCS_IDLE);
REGISTER_CCS(CCS_MAIN);
/* find all the registered cost centre stacks, and make them
@@ -310,12 +318,17 @@ endProfiling ( void )
// implements c1 ++> c2, where c1 and c2 are equal depth
//
-static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn)
+static CostCentreStack *
+enterFunEqualStacks (CostCentreStack *ccs0,
+ CostCentreStack *ccsapp,
+ CostCentreStack *ccsfn)
{
- ASSERT(ccs->depth == ccsfn->depth);
- if (ccs == ccsfn) return;
- enterFunEqualStacks(ccs->prevStack, ccsfn->prevStack);
- CCCS = pushCostCentre(CCCS, ccsfn->cc);
+ ASSERT(ccsapp->depth == ccsfn->depth);
+ if (ccsapp == ccsfn) return ccs0;
+ return pushCostCentre(enterFunEqualStacks(ccs0,
+ ccsapp->prevStack,
+ ccsfn->prevStack),
+ ccsfn->cc);
}
// implements c1 ++> c2, where c2 is deeper than c1.
@@ -323,21 +336,25 @@ static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn)
// enterFunEqualStacks(), and then push on the elements that we
// dropped in reverse order.
//
-static void enterFunCurShorter (CostCentreStack *ccsfn, StgWord n)
+static CostCentreStack *
+enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
{
if (n == 0) {
- ASSERT(ccsfn->depth == CCCS->depth);
- enterFunEqualStacks(CCCS,ccsfn);
- return;
+ ASSERT(ccsfn->depth == ccsapp->depth);
+ return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
+ } else {
+ ASSERT(ccsfn->depth > ccsapp->depth);
+ return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
+ ccsfn->cc);
}
- enterFunCurShorter(ccsfn->prevStack, n-1);
- CCCS = pushCostCentre(CCCS, ccsfn->cc);
}
-void enterFunCCS ( CostCentreStack *ccsfn )
+void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
{
+ CostCentreStack *ccsapp;
+
// common case 1: both stacks are the same
- if (ccsfn == CCCS) {
+ if (ccsfn == reg->rCCCS) {
return;
}
@@ -346,34 +363,38 @@ void enterFunCCS ( CostCentreStack *ccsfn )
return;
}
+ ccsapp = reg->rCCCS;
+ reg->rCCCS = CCS_OVERHEAD;
+
// common case 3: the stacks are completely different (e.g. one is a
// descendent of MAIN and the other of a CAF): we append the whole
// of the function stack to the current CCS.
- if (ccsfn->root != CCCS->root) {
- CCCS = appendCCS(CCCS,ccsfn);
+ if (ccsfn->root != ccsapp->root) {
+ reg->rCCCS = appendCCS(ccsapp,ccsfn);
return;
}
- // uncommon case 4: CCCS is deeper than ccsfn
- if (CCCS->depth > ccsfn->depth) {
+ // uncommon case 4: ccsapp is deeper than ccsfn
+ if (ccsapp->depth > ccsfn->depth) {
nat i, n;
- CostCentreStack *tmp = CCCS;
- n = CCCS->depth - ccsfn->depth;
+ CostCentreStack *tmp = ccsapp;
+ n = ccsapp->depth - ccsfn->depth;
for (i = 0; i < n; i++) {
tmp = tmp->prevStack;
}
- enterFunEqualStacks(tmp,ccsfn);
+ reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
return;
}
// uncommon case 5: ccsfn is deeper than CCCS
- if (ccsfn->depth > CCCS->depth) {
- enterFunCurShorter(ccsfn, ccsfn->depth - CCCS->depth);
+ if (ccsfn->depth > ccsapp->depth) {
+ reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
+ ccsfn->depth - ccsapp->depth);
return;
}
// uncommon case 6: stacks are equal depth, but different
- enterFunEqualStacks(CCCS,ccsfn);
+ reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
}
/* -----------------------------------------------------------------------------
@@ -477,20 +498,41 @@ appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
CostCentreStack *
pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
{
- CostCentreStack *temp_ccs;
-
- if (ccs == EMPTY_STACK)
- return actualPush(ccs,cc);
- else {
- if (ccs->cc == cc)
+ CostCentreStack *temp_ccs, *ret;
+ IndexTable *ixtable;
+
+ if (ccs == EMPTY_STACK) {
+ ACQUIRE_LOCK(&ccs_mutex);
+ ret = actualPush(ccs,cc);
+ }
+ else
+ {
+ if (ccs->cc == cc) {
return ccs;
- else {
+ } else {
// check if we've already memoized this stack
- temp_ccs = isInIndexTable(ccs->indexTable,cc);
+ ixtable = ccs->indexTable;
+ temp_ccs = isInIndexTable(ixtable,cc);
- if (temp_ccs != EMPTY_STACK)
+ if (temp_ccs != EMPTY_STACK) {
return temp_ccs;
- else {
+ } else {
+
+ // not in the IndexTable, now we take the lock:
+ ACQUIRE_LOCK(&ccs_mutex);
+
+ if (ccs->indexTable != ixtable)
+ {
+ // someone modified ccs->indexTable while
+ // we did not hold the lock, so we must
+ // check it again:
+ temp_ccs = isInIndexTable(ixtable,cc);
+ if (temp_ccs != EMPTY_STACK)
+ {
+ RELEASE_LOCK(&ccs_mutex);
+ return temp_ccs;
+ }
+ }
temp_ccs = checkLoop(ccs,cc);
if (temp_ccs != NULL) {
// This CC is already in the stack somewhere.
@@ -510,13 +552,16 @@ pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
#endif
ccs->indexTable = addToIndexTable (ccs->indexTable,
new_ccs, cc, 1);
- return new_ccs;
+ ret = new_ccs;
} else {
- return actualPush (ccs,cc);
+ ret = actualPush (ccs,cc);
}
}
}
}
+
+ RELEASE_LOCK(&ccs_mutex);
+ return ret;
}
static CostCentreStack *
@@ -801,11 +846,12 @@ reportCCSProfiling( void )
fprintf(prof_file, " %s", prog_argv[count]);
fprintf(prof_file, "\n\n");
- fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us)\n",
+ fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
((double) total_prof_ticks *
- (double) RtsFlags.MiscFlags.tickInterval) / TIME_RESOLUTION,
+ (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities),
(unsigned long) total_prof_ticks,
- (int) TimeToUS(RtsFlags.MiscFlags.tickInterval));
+ (int) TimeToUS(RtsFlags.MiscFlags.tickInterval),
+ n_capabilities, n_capabilities > 1 ? "s" : "");
fprintf(prof_file, "\ttotal alloc = %11s bytes",
showStgWord64(total_alloc * sizeof(W_),
diff --git a/rts/Proftimer.c b/rts/Proftimer.c
index 76d7679000..569f087bb4 100644
--- a/rts/Proftimer.c
+++ b/rts/Proftimer.c
@@ -11,6 +11,7 @@
#include "Profiling.h"
#include "Proftimer.h"
+#include "Capability.h"
#ifdef PROFILING
static rtsBool do_prof_ticks = rtsFalse; // enable profiling ticks
@@ -73,7 +74,10 @@ handleProfTick(void)
#ifdef PROFILING
total_ticks++;
if (do_prof_ticks) {
- CCCS->time_ticks++;
+ nat n;
+ for (n=0; n < n_capabilities; n++) {
+ capabilities[n].r.rCCCS->time_ticks++;
+ }
}
#endif
diff --git a/rts/RetainerProfile.h b/rts/RetainerProfile.h
index b2adf71d9d..0e75327cde 100644
--- a/rts/RetainerProfile.h
+++ b/rts/RetainerProfile.h
@@ -21,6 +21,9 @@ void endRetainerProfiling ( void );
void retainerProfile ( void );
void resetStaticObjectForRetainerProfiling( StgClosure *static_objects );
+// flip is either 1 or 0, changed at the beginning of retainerProfile()
+// It is used to tell whether a retainer set has been touched so far
+// during this pass.
extern StgWord flip;
// extract the retainer set field from c
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 3e3290dd3d..650c4f9a04 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -1147,12 +1147,6 @@ error = rtsTrue;
errorBelch("Using large values for -N is not allowed by default. Link with -rtsopts to allow full control.");
stg_exit(EXIT_FAILURE);
}
-#if defined(PROFILING)
- if (nNodes > 1) {
- errorBelch("bad option %s: only -N1 is supported with profiling", rts_argv[arg]);
- error = rtsTrue;
- }
-#endif
RtsFlags.ParFlags.nNodes = (nat)nNodes;
}
) break;
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 8c305008ae..04a66e31df 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -501,7 +501,7 @@ run_thread:
// Costs for the scheduler are assigned to CCS_SYSTEM
stopHeapProfTimer();
#if defined(PROFILING)
- CCCS = CCS_SYSTEM;
+ cap->r.rCCCS = CCS_SYSTEM;
#endif
schedulePostRunThread(cap,t);
@@ -2262,7 +2262,7 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
if (raise_closure == NULL) {
raise_closure =
(StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
- SET_HDR(raise_closure, &stg_raise_info, CCCS);
+ SET_HDR(raise_closure, &stg_raise_info, cap->r.rCCCS);
raise_closure->payload[0] = exception;
}
updateThunk(cap, tso, ((StgUpdateFrame *)p)->updatee,
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 5ddc1acb40..e4b128f96e 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -42,7 +42,7 @@ INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, P_ unused)
INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ cccs)
{
#if defined(PROFILING)
- W_[CCCS] = Sp(1);
+ CCCS = Sp(1);
#endif
Sp_adj(2);
jump %ENTRY_CODE(Sp(0));
diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm
index be85999598..171ab52b96 100644
--- a/rts/StgStdThunks.cmm
+++ b/rts/StgStdThunks.cmm
@@ -30,8 +30,8 @@
#define NOUPD_FRAME_SIZE (SIZEOF_StgHeader)
#ifdef PROFILING
-#define SAVE_CCCS(fs) StgHeader_ccs(Sp-fs) = W_[CCCS]
-#define GET_SAVED_CCCS W_[CCCS] = StgHeader_ccs(Sp)
+#define SAVE_CCCS(fs) StgHeader_ccs(Sp-fs) = CCCS
+#define GET_SAVED_CCCS CCCS = StgHeader_ccs(Sp)
#define RET_PARAMS W_ unused1, W_ unused2
#else
#define SAVE_CCCS(fs) /* empty */
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 7892280dca..733c2d67c2 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -189,7 +189,7 @@ GarbageCollect (rtsBool force_major_gc,
#endif
#ifdef PROFILING
- CostCentreStack *prev_CCS;
+ CostCentreStack *save_CCS[n_capabilities];
#endif
ACQUIRE_SM_LOCK;
@@ -221,8 +221,10 @@ GarbageCollect (rtsBool force_major_gc,
// attribute any costs to CCS_GC
#ifdef PROFILING
- prev_CCS = CCCS;
- CCCS = CCS_GC;
+ for (n = 0; n < n_capabilities; n++) {
+ save_CCS[n] = capabilities[n].r.rCCCS;
+ capabilities[n].r.rCCCS = CCS_GC;
+ }
#endif
/* Approximate how much we allocated.
@@ -626,10 +628,8 @@ GarbageCollect (rtsBool force_major_gc,
#ifdef PROFILING
// resetStaticObjectForRetainerProfiling() must be called before
// zeroing below.
- if (n_gc_threads > 1) {
- barf("profiling is currently broken with multi-threaded GC");
- // ToDo: fix the gct->scavenged_static_objects below
- }
+
+ // ToDo: fix the gct->scavenged_static_objects below
resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
#endif
@@ -704,7 +704,9 @@ GarbageCollect (rtsBool force_major_gc,
// restore enclosing cost centre
#ifdef PROFILING
- CCCS = prev_CCS;
+ for (n = 0; n < n_capabilities; n++) {
+ capabilities[n].r.rCCCS = save_CCS[n];
+ }
#endif
#ifdef DEBUG
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 1dad6c8df0..be3badfbe4 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -608,7 +608,7 @@ allocate (Capability *cap, lnat n)
StgPtr p;
TICK_ALLOC_HEAP_NOCTR(n);
- CCS_ALLOC(CCCS,n);
+ CCS_ALLOC(cap->r.rCCCS,n);
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
@@ -719,7 +719,7 @@ allocatePinned (Capability *cap, lnat n)
}
TICK_ALLOC_HEAP_NOCTR(n);
- CCS_ALLOC(CCCS,n);
+ CCS_ALLOC(cap->r.rCCCS,n);
bd = cap->pinned_object_block;
diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs
index 2ffa81bb76..b255b92d28 100644
--- a/utils/genapply/GenApply.hs
+++ b/utils/genapply/GenApply.hs
@@ -301,7 +301,7 @@ genMkPAP regstatus macro jump ticker disamb
loadSpWordOff "W_" (sp_stk_args+stack_args_size-3)
<> text " = stg_restore_cccs_info;" $$
loadSpWordOff "W_" (sp_stk_args+stack_args_size-2)
- <> text " = W_[CCCS];"
+ <> text " = CCCS;"
else empty) $$
loadSpWordOff "W_" (sp_stk_args+stack_args_size-1)
<> text " = "