diff options
author | Dimitrios Vytiniotis <dimitris@microsoft.com> | 2011-11-29 19:20:33 +0000 |
---|---|---|
committer | Dimitrios Vytiniotis <dimitris@microsoft.com> | 2011-11-29 19:20:33 +0000 |
commit | 42e3b5bd3ee9c555adaaf1e5a12f7ddd71423c0c (patch) | |
tree | 0e8ca1886f847d878ab9b9126c2268e4c8b54a1b | |
parent | e4d87e140697bcb380cc51a5aee598409930281e (diff) | |
parent | 1f7433b7b998dda4dde6d09f22a37f637745c079 (diff) | |
download | haskell-42e3b5bd3ee9c555adaaf1e5a12f7ddd71423c0c.tar.gz |
Merge branch 'master' of http://darcs.haskell.org//ghc
50 files changed, 335 insertions, 204 deletions
diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index c8a1d85597..c82f517849 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -91,7 +91,7 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g Old.CmmCall (cmm_target target) (add_hints (get_conv target) Results ress) (add_hints (get_conv target) Arguments args) - Old.CmmUnsafe Old.CmmMayReturn + Old.CmmMayReturn last :: CmmNode O C -> () -> [Old.CmmStmt] last node _ = stmts diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index ef97a82aa9..885639b874 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -343,7 +343,8 @@ data GlobalReg | SpLim -- Stack limit | Hp -- Heap ptr; points to last occupied heap location. | HpLim -- Heap limit register - | CurrentTSO -- pointer to current thread's TSO + | CCCS -- Current cost-centre stack + | CurrentTSO -- pointer to current thread's TSO | CurrentNursery -- pointer to allocation area | HpAlloc -- allocation count for heap check failure @@ -395,6 +396,7 @@ instance Ord GlobalReg where compare SpLim SpLim = EQ compare Hp Hp = EQ compare HpLim HpLim = EQ + compare CCCS CCCS = EQ compare CurrentTSO CurrentTSO = EQ compare CurrentNursery CurrentNursery = EQ compare HpAlloc HpAlloc = EQ @@ -419,6 +421,8 @@ instance Ord GlobalReg where compare _ Hp = GT compare HpLim _ = LT compare _ HpLim = GT + compare CCCS _ = LT + compare _ CCCS = GT compare CurrentTSO _ = LT compare _ CurrentTSO = GT compare CurrentNursery _ = LT diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x index 1e2b20d4b3..ddd681d25e 100644 --- a/compiler/cmm/CmmLex.x +++ b/compiler/cmm/CmmLex.x @@ -106,8 +106,9 @@ $white_no_nl+ ; SpLim { global_reg SpLim } Hp { global_reg Hp } HpLim { global_reg HpLim } - CurrentTSO { global_reg CurrentTSO } - CurrentNursery { global_reg CurrentNursery } + CCCS { global_reg CCCS } + CurrentTSO { global_reg CurrentTSO } + CurrentNursery { global_reg CurrentNursery } HpAlloc { global_reg HpAlloc } BaseReg { global_reg BaseReg } diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index e03da8ccd7..ee53c1b6c7 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -133,7 +133,7 @@ lintCmmStmt platform labels = lint _ <- lintCmmExpr platform l _ <- lintCmmExpr platform r return () - lint (CmmCall target _res args _ _) = + lint (CmmCall target _res args _) = lintTarget platform target >> mapM_ (lintCmmExpr platform . hintlessCmm) args lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond platform e lint (CmmSwitch e branches) = do diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 1005448894..007b7a715e 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -59,7 +59,7 @@ cmmEliminateDeadBlocks blocks@(BasicBlock base_id _:_) = stmt m (CmmComment _) = m stmt m (CmmAssign _ e) = expr m e stmt m (CmmStore e1 e2) = expr (expr m e1) e2 - stmt m (CmmCall c _ as _ _) = f (actuals m as) c + stmt m (CmmCall c _ as _) = f (actuals m as) c where f m (CmmCallee e _) = expr m e f m (CmmPrim _) = m stmt m (CmmBranch b) = b:m @@ -266,8 +266,8 @@ lookForInline' u expr regset (stmt : rest) inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e) inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2) -inlineStmt u a (CmmCall target regs es srt ret) - = CmmCall (infn target) regs es' srt ret +inlineStmt u a (CmmCall target regs es ret) + = CmmCall (infn target) regs es' ret where infn (CmmCallee fn cconv) = CmmCallee (inlineExpr u a fn) cconv infn (CmmPrim p) = CmmPrim p es' = [ (CmmHinted (inlineExpr u a e) hint) | (CmmHinted e hint) <- es ] diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index bdb2c4c918..4e315ddbdf 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -775,8 +775,9 @@ isPtrGlobalReg Sp = True isPtrGlobalReg SpLim = True isPtrGlobalReg Hp = True isPtrGlobalReg HpLim = True -isPtrGlobalReg CurrentTSO = True -isPtrGlobalReg CurrentNursery = True +isPtrGlobalReg CCCS = True +isPtrGlobalReg CurrentTSO = True +isPtrGlobalReg CurrentNursery = True isPtrGlobalReg (VanillaReg _ VGcPtr) = True isPtrGlobalReg _ = False @@ -867,10 +868,9 @@ foreignCall conv_string results_code expr_code args_code vols safety ret results <- sequence results_code expr <- expr_code args <- sequence args_code - --code (stmtC (CmmCall (CmmCallee expr convention) results args safety)) case convention of -- Temporary hack so at least some functions are CmmSafe - CmmCallConv -> code (stmtC (CmmCall (CmmCallee expr convention) results args safety ret)) + CmmCallConv -> code (stmtC (CmmCall (CmmCallee expr convention) results args ret)) _ -> let expr' = adjCallTarget convention expr args in case safety of diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs index 3703de4e32..a8a9d5dde0 100644 --- a/compiler/cmm/OldCmm.hs +++ b/compiler/cmm/OldCmm.hs @@ -154,7 +154,6 @@ data CmmStmt -- Old-style CmmCallTarget [HintedCmmFormal] -- zero or more results [HintedCmmActual] -- zero or more arguments - CmmSafety -- whether to build a continuation CmmReturnInfo -- Some care is necessary when handling the arguments of these, see -- [Register parameter passing] and the hack in cmm/CmmOpt.hs @@ -192,7 +191,7 @@ instance UserOfLocalRegs CmmStmt where stmt (CmmComment {}) = id stmt (CmmAssign _ e) = gen e stmt (CmmStore e1 e2) = gen e1 . gen e2 - stmt (CmmCall target _ es _ _) = gen target . gen es + stmt (CmmCall target _ es _) = gen target . gen es stmt (CmmBranch _) = id stmt (CmmCondBranch e _) = gen e stmt (CmmSwitch e _) = gen e diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs index d2f03f78b7..07dfbf63bf 100644 --- a/compiler/cmm/OldPprCmm.hs +++ b/compiler/cmm/OldPprCmm.hs @@ -122,11 +122,10 @@ pprStmt platform stmt = case stmt of -- call "ccall" foo(x, y)[r1, r2]; -- ToDo ppr volatile - CmmCall (CmmCallee fn cconv) results args safety ret -> + CmmCall (CmmCallee fn cconv) results args ret -> sep [ pp_lhs <+> pp_conv , nest 2 (pprExpr9 platform fn <> parens (commafy (map ppr_ar args))) - <> brackets (pprPlatform platform safety) , case ret of CmmMayReturn -> empty CmmNeverReturns -> ptext $ sLit (" never returns") ] <> semi @@ -142,9 +141,9 @@ pprStmt platform stmt = case stmt of _ -> ptext (sLit("foreign")) <+> doubleQuotes (ppr cconv) -- Call a CallishMachOp, like sin or cos that might be implemented as a library call. - CmmCall (CmmPrim op) results args safety ret -> + CmmCall (CmmPrim op) results args ret -> pprStmt platform (CmmCall (CmmCallee (CmmLit lbl) CCallConv) - results args safety ret) + results args ret) where -- HACK: A CallishMachOp doesn't really correspond to a ForeignLabel, but we -- use one to get the label printed. diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 4f8a061bdd..270ce12670 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -193,7 +193,7 @@ pprStmt platform stmt = case stmt of where rep = cmmExprType src - CmmCall (CmmCallee fn cconv) results args safety ret -> + CmmCall (CmmCallee fn cconv) results args ret -> maybe_proto $$ fnCall where @@ -215,7 +215,7 @@ pprStmt platform stmt = case stmt of case fn of CmmLit (CmmLabel lbl) | StdCallConv <- cconv -> - let myCall = pprCall platform (pprCLabel platform lbl) cconv results args safety + let myCall = pprCall platform (pprCLabel platform lbl) cconv results args in (real_fun_proto lbl, myCall) -- stdcall functions must be declared with -- a function type, otherwise the C compiler @@ -223,22 +223,22 @@ pprStmt platform stmt = case stmt of -- can't add the @n suffix ourselves, because -- it isn't valid C. | CmmNeverReturns <- ret -> - let myCall = pprCall platform (pprCLabel platform lbl) cconv results args safety + let myCall = pprCall platform (pprCLabel platform lbl) cconv results args in (real_fun_proto lbl, myCall) | not (isMathFun lbl) -> let myCall = braces ( pprCFunType (char '*' <> text "ghcFunPtr") cconv results args <> semi $$ text "ghcFunPtr" <+> equals <+> cast_fn <> semi - $$ pprCall platform (text "ghcFunPtr") cconv results args safety <> semi + $$ pprCall platform (text "ghcFunPtr") cconv results args <> semi ) in (fun_proto lbl, myCall) _ -> (empty {- no proto -}, - pprCall platform cast_fn cconv results args safety <> semi) + pprCall platform cast_fn cconv results args <> semi) -- for a dynamic call, no declaration is necessary. - CmmCall (CmmPrim op) results args safety _ret -> - pprCall platform ppr_fn CCallConv results args' safety + CmmCall (CmmPrim op) results args _ret -> + pprCall platform ppr_fn CCallConv results args' where ppr_fn = pprCallishMachOp_for_C op -- The mem primops carry an extra alignment arg, must drop it. @@ -812,10 +812,10 @@ pprLocalReg (LocalReg uniq _) = char '_' <> ppr uniq -- Foreign Calls pprCall :: Platform -> SDoc -> CCallConv - -> [HintedCmmFormal] -> [HintedCmmActual] -> CmmSafety + -> [HintedCmmFormal] -> [HintedCmmActual] -> SDoc -pprCall platform ppr_fn cconv results args _ +pprCall platform ppr_fn cconv results args | not (is_cishCC cconv) = panic $ "pprCall: unknown calling convention" @@ -926,7 +926,7 @@ te_Lit _ = return () te_Stmt :: CmmStmt -> TE () te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r -te_Stmt (CmmCall _ rs es _ _) = mapM_ (te_temp.hintlessCmm) rs >> +te_Stmt (CmmCall _ rs es _) = mapM_ (te_temp.hintlessCmm) rs >> mapM_ (te_Expr.hintlessCmm) es te_Stmt (CmmCondBranch e _) = te_Expr e te_Stmt (CmmSwitch e _) = te_Expr e diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 76fbdcec8d..81ce84c264 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -271,6 +271,7 @@ pprGlobalReg gr SpLim -> ptext (sLit "SpLim") Hp -> ptext (sLit "Hp") HpLim -> ptext (sLit "HpLim") + CCCS -> ptext (sLit "CCCS") CurrentTSO -> ptext (sLit "CurrentTSO") CurrentNursery -> ptext (sLit "CurrentNursery") HpAlloc -> ptext (sLit "HpAlloc") diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs index e4fe386043..a36621bdaf 100644 --- a/compiler/codeGen/CgCase.lhs +++ b/compiler/codeGen/CgCase.lhs @@ -670,6 +670,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC restoreCurrentCostCentre (Just slot) freeit = do { sp_rel <- getSpRelOffset slot ; whenC freeit (freeStackSlots [slot]) - ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) } + ; stmtC (storeCurCCS (CmmLoad sp_rel bWord)) } \end{code} diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index 85d629dbaf..7bad8516d9 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -316,9 +316,10 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do -- Do the business ; funWrapper cl_info reg_args reg_save_code $ do { tickyEnterFun cl_info - ; enterCostCentreFun cc $ - CmmMachOp mo_wordSub [ CmmReg nodeReg - , CmmLit (mkIntCLit (funTag cl_info)) ] + ; enterCostCentreFun cc + (CmmMachOp mo_wordSub [ CmmReg nodeReg + , CmmLit (mkIntCLit (funTag cl_info)) ]) + (node : map snd reg_args) -- live regs ; cgExpr body } } @@ -482,7 +483,7 @@ emitBlackHoleCode is_single_entry = do stmtsC [ CmmStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize) (CmmReg (CmmGlobal CurrentTSO)), - CmmCall (CmmPrim MO_WriteBarrier) [] [] CmmUnsafe CmmMayReturn, + CmmCall (CmmPrim MO_WriteBarrier) [] [] CmmMayReturn, CmmStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo)) ] \end{code} @@ -580,7 +581,7 @@ link_caf cl_info _is_upd = do [ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint, CmmHinted (CmmReg nodeReg) AddrHint, CmmHinted hp_rel AddrHint ] - (Just [node]) False + (Just [node]) -- node is live, so save it. -- see Note [atomic CAF entry] in rts/sm/Storage.c diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index d96e9f8cfc..8d8b97d76a 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -127,7 +127,7 @@ emitForeignCall' safety results target args vols _srt ret let (caller_save, caller_load) = callerSaveVolatileRegs vols let caller_load' = if ret == CmmNeverReturns then [] else caller_load stmtsC caller_save - stmtC (CmmCall target results temp_args CmmUnsafe ret) + stmtC (CmmCall target results temp_args ret) stmtsC caller_load' | otherwise = do @@ -149,12 +149,12 @@ emitForeignCall' safety results target args vols _srt ret [ CmmHinted id AddrHint ] [ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint , CmmHinted (CmmLit (CmmInt (fromIntegral (fromEnum (playInterruptible safety))) wordWidth)) NoHint] - CmmUnsafe ret) - stmtC (CmmCall temp_target results temp_args CmmUnsafe ret) + ret) + stmtC (CmmCall temp_target results temp_args ret) stmtC (CmmCall (CmmCallee resumeThread CCallConv) [ CmmHinted new_base AddrHint ] [ CmmHinted (CmmReg (CmmLocal id)) AddrHint ] - CmmUnsafe ret) + ret) -- Assign the result to BaseReg: we -- might now have a different Capability! stmtC (CmmAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base))) @@ -240,8 +240,8 @@ emitLoadThreadState = do emitOpenNursery -- and load the current cost centre stack from the TSO when profiling: when opt_SccProfilingOn $ - stmtC (CmmStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)) + stmtC $ storeCurCCS $ + CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord emitOpenNursery :: Code emitOpenNursery = stmtsC [ diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index 6fe934b54c..72bbf6cc58 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -155,6 +155,8 @@ emitPrimOp [res] SparkOp [arg] live = do where newspark = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))) +emitPrimOp [res] GetCCCSOp [] _live + = stmtC (CmmAssign (CmmLocal res) curCCS) emitPrimOp [res] ReadMutVarOp [mutv] _ = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index c961e24147..3e247ff4d6 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -21,7 +21,7 @@ module CgProf ( enterCostCentreThunk, enterCostCentreFun, costCentreFrom, - curCCS, curCCSAddr, + curCCS, storeCurCCS, emitCostCentreDecl, emitCostCentreStackDecl, emitSetCCC, @@ -66,11 +66,10 @@ import Control.Monad -- Expression representing the current cost centre stack curCCS :: CmmExpr -curCCS = CmmLoad curCCSAddr bWord +curCCS = CmmReg (CmmGlobal CCCS) --- Address of current CCS variable, for storing into -curCCSAddr :: CmmExpr -curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS"))) +storeCurCCS :: CmmExpr -> CmmStmt +storeCurCCS e = CmmAssign (CmmGlobal CCCS) e mkCCostCentre :: CostCentre -> CmmLit mkCCostCentre cc = CmmLabel (mkCCLabel cc) @@ -135,14 +134,15 @@ profAlloc words ccs enterCostCentreThunk :: CmmExpr -> Code enterCostCentreThunk closure = ifProfiling $ do - stmtC $ CmmStore curCCSAddr (costCentreFrom closure) + stmtC $ storeCurCCS (costCentreFrom closure) -enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code -enterCostCentreFun ccs closure = +enterCostCentreFun :: CostCentreStack -> CmmExpr -> [GlobalReg] -> Code +enterCostCentreFun ccs closure vols = ifProfiling $ do if isCurrentCCS ccs - then emitRtsCall rtsPackageId (fsLit "enterFunCCS") - [CmmHinted (costCentreFrom closure) AddrHint] False + then emitRtsCallWithVols rtsPackageId (fsLit "enterFunCCS") + [CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint, + CmmHinted (costCentreFrom closure) AddrHint] vols else return () -- top-level function, nothing to do ifProfiling :: Code -> Code @@ -226,7 +226,7 @@ emitSetCCC cc tick push tmp <- newTemp bWord -- TODO FIXME NOW pushCostCentre tmp curCCS cc when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp))) - when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) + when push $ stmtC (storeCurCCS (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code pushCostCentre result ccs cc @@ -234,7 +234,6 @@ pushCostCentre result ccs cc rtsPackageId (fsLit "pushCostCentre") [CmmHinted ccs AddrHint, CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint] - False bumpSccCount :: CmmExpr -> CmmStmt bumpSccCount ccs diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index a0a5ac2554..5274a176a0 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -233,23 +233,22 @@ emitRtsCall :: PackageId -- ^ package the function is in -> FastString -- ^ name of function -> [CmmHinted CmmExpr] -- ^ function args - -> Bool -- ^ whether this is a safe call -> Code -- ^ cmm code -emitRtsCall pkg fun args safe = emitRtsCallGen [] pkg fun args Nothing safe +emitRtsCall pkg fun args = emitRtsCallGen [] pkg fun args Nothing -- The 'Nothing' says "save all global registers" -emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Bool -> Code -emitRtsCallWithVols pkg fun args vols safe - = emitRtsCallGen [] pkg fun args (Just vols) safe +emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Code +emitRtsCallWithVols pkg fun args vols + = emitRtsCallGen [] pkg fun args (Just vols) emitRtsCallWithResult :: LocalReg -> ForeignHint -> PackageId -> FastString - -> [CmmHinted CmmExpr] -> Bool -> Code + -> [CmmHinted CmmExpr] -> Code -emitRtsCallWithResult res hint pkg fun args safe - = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing safe +emitRtsCallWithResult res hint pkg fun args + = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing -- Make a call to an RTS C procedure emitRtsCallGen @@ -258,14 +257,10 @@ emitRtsCallGen -> FastString -> [CmmHinted CmmExpr] -> Maybe [GlobalReg] - -> Bool -- True <=> CmmSafe call -> Code -emitRtsCallGen res pkg fun args vols safe = do - safety <- if safe - then getSRTInfo >>= (return . CmmSafe) - else return CmmUnsafe +emitRtsCallGen res pkg fun args vols = do stmtsC caller_save - stmtC (CmmCall target res args safety CmmMayReturn) + stmtC (CmmCall target res args CmmMayReturn) stmtsC caller_load where (caller_save, caller_load) = callerSaveVolatileRegs vols @@ -291,7 +286,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save) caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save) - system_regs = [Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery, + system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery, {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ] regs_to_save = system_regs ++ vol_list @@ -389,6 +384,9 @@ callerSaves Hp = True #ifdef CALLER_SAVES_HpLim callerSaves HpLim = True #endif +#ifdef CALLER_SAVES_CCCS +callerSaves CCCS = True +#endif #ifdef CALLER_SAVES_CurrentTSO callerSaves CurrentTSO = True #endif @@ -428,6 +426,7 @@ baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")") baseRegOffset Hp = oFFSET_StgRegTable_rHp baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim +baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc @@ -1009,13 +1008,13 @@ fixStgRegStmt stmt CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src) - CmmCall target regs args srt returns -> + CmmCall target regs args returns -> let target' = case target of CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv other -> other args' = map (\(CmmHinted arg hint) -> (CmmHinted (fixStgRegExpr arg) hint)) args - in CmmCall target' regs args' srt returns + in CmmCall target' regs args' returns CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 78aabd82ce..7c739c61b6 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -210,8 +210,8 @@ loadThreadState tso stack = do openNursery, -- and load the current cost centre stack from the TSO when profiling: if opt_SccProfilingOn then - mkStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType) + storeCurCCS + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType) else mkNop] emitLoadThreadState :: LocalReg -> LocalReg -> FCode () emitLoadThreadState tso stack = emit $ loadThreadState tso stack diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index f8cc4256f4..d546c38a90 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -228,6 +228,9 @@ emitPrimOp [res] SparkOp [arg] [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] emit (mkAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) +emitPrimOp [res] GetCCCSOp [] + = emit (mkAssign (CmmLocal res) curCCS) + emitPrimOp [res] ReadMutVarOp [mutv] = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index 13c1be7f42..d9b3583382 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -21,7 +21,7 @@ module StgCmmProf ( dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, enterCostCentreThunk, costCentreFrom, - curCCS, curCCSAddr, + curCCS, storeCurCCS, emitSetCCC, saveCurrentCostCentre, restoreCurrentCostCentre, @@ -73,11 +73,10 @@ ccType :: CmmType -- Type of a cost centre ccType = bWord curCCS :: CmmExpr -curCCS = CmmLoad curCCSAddr ccsType +curCCS = CmmReg (CmmGlobal CCCS) --- Address of current CCS variable, for storing into -curCCSAddr :: CmmExpr -curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS"))) +storeCurCCS :: CmmExpr -> CmmAGraph +storeCurCCS e = mkAssign (CmmGlobal CCCS) e mkCCostCentre :: CostCentre -> CmmLit mkCCostCentre cc = CmmLabel (mkCCLabel cc) @@ -150,7 +149,7 @@ restoreCurrentCostCentre :: Maybe LocalReg -> FCode () restoreCurrentCostCentre Nothing = return () restoreCurrentCostCentre (Just local_cc) - = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc))) + = emit (storeCurCCS (CmmReg (CmmLocal local_cc))) ------------------------------------------------------------------------------- @@ -186,7 +185,7 @@ profAlloc words ccs enterCostCentreThunk :: CmmExpr -> FCode () enterCostCentreThunk closure = ifProfiling $ do - emit $ mkStore curCCSAddr (costCentreFrom closure) + emit $ storeCurCCS (costCentreFrom closure) ifProfiling :: FCode () -> FCode () ifProfiling code @@ -269,7 +268,7 @@ emitSetCCC cc tick push tmp <- newTemp ccsType -- TODO FIXME NOW pushCostCentre tmp curCCS cc when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp))) - when push $ emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) + when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () pushCostCentre result ccs cc diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index f209005108..c3327138b3 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -253,7 +253,7 @@ callerSaveVolatileRegs = (caller_save, caller_load) caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save) caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save) - system_regs = [ Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery + system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery {- ,SparkHd,SparkTl,SparkBase,SparkLim -} , BaseReg ] @@ -366,6 +366,9 @@ callerSaves Hp = True #ifdef CALLER_SAVES_HpLim callerSaves HpLim = True #endif +#ifdef CALLER_SAVES_CCCS +callerSaves CCCS = True +#endif #ifdef CALLER_SAVES_CurrentTSO callerSaves CurrentTSO = True #endif @@ -385,7 +388,8 @@ baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 baseRegOffset Hp = oFFSET_StgRegTable_rHp baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim -baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO +baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS +baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc baseRegOffset GCEnter1 = oFFSET_stgGCEnter1 diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index b039d39960..1ea5d0c038 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -125,7 +125,7 @@ stmtToInstrs env stmt = case stmt of CmmSwitch arg ids -> genSwitch env arg ids -- Foreign Call - CmmCall target res args _ ret + CmmCall target res args ret -> genCall env target res args ret -- Tail call diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 7ffda3d8f6..0d8aab146b 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -879,7 +879,7 @@ cmmStmtConFold stmt -> do addr' <- cmmExprConFold JumpReference addr return $ CmmJump addr' regs - CmmCall target regs args srt returns + CmmCall target regs args returns -> do target' <- case target of CmmCallee e conv -> do e' <- cmmExprConFold CallReference e @@ -888,7 +888,7 @@ cmmStmtConFold stmt args' <- mapM (\(CmmHinted arg hint) -> do arg' <- cmmExprConFold DataReference arg return (CmmHinted arg' hint)) args - return $ CmmCall target' regs args' srt returns + return $ CmmCall target' regs args' returns CmmCondBranch test dest -> do test' <- cmmExprConFold DataReference test diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 8c80ec40c1..a043af01f8 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -135,7 +135,7 @@ stmtToInstrs stmt = do where ty = cmmExprType src size = cmmTypeSize ty - CmmCall target result_regs args _ _ + CmmCall target result_regs args _ -> genCCall target result_regs args CmmBranch id -> genBranch id diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 91a850d5fc..663b95b236 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -135,7 +135,7 @@ stmtToInstrs stmt = case stmt of where ty = cmmExprType src size = cmmTypeSize ty - CmmCall target result_regs args _ _ + CmmCall target result_regs args _ -> genCCall target result_regs args CmmBranch id -> genBranch id diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 97baeec1ab..5f0f716281 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -160,7 +160,7 @@ stmtToInstrs stmt = do where ty = cmmExprType src size = cmmTypeSize ty - CmmCall target result_regs args _ _ + CmmCall target result_regs args _ -> genCCall is32Bit target result_regs args CmmBranch id -> genBranch id @@ -1996,7 +1996,7 @@ outOfLineCmmOp mop res args targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl let target = CmmCallee targetExpr CCallConv - stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmUnsafe CmmMayReturn) + stmtToInstrs (CmmCall target (catMaybes [res]) args' CmmMayReturn) where -- Assume we can call these functions directly, and that they're not in a dynamic library. -- TODO: Why is this ok? Under linux this code will be in libm.so diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 204dce2e59..ceb9226594 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -1799,6 +1799,9 @@ primop TraceCcsOp "traceCcs#" GenPrimOp has_side_effects = True out_of_line = True +primop GetCCCSOp "getCCCS#" GenPrimOp + State# s -> (# State# s, Addr# #) + ------------------------------------------------------------------------ section "Etc" {Miscellaneous built-ins} diff --git a/docs/users_guide/profiling.xml b/docs/users_guide/profiling.xml index a5a1d4911c..ee3b387e31 100644 --- a/docs/users_guide/profiling.xml +++ b/docs/users_guide/profiling.xml @@ -9,12 +9,6 @@ can answer questions like "why is my program so slow?", or "why is my program using so much memory?".</para> - <para>Note that multi-processor execution (e.g. <literal>+RTS - -N2</literal>) is not currently supported with GHC's time and space - profiling. However, there is a separate tool specifically for - profiling concurrent and parallel programs: <ulink - url="http://www.haskell.org/haskellwiki/ThreadScope">ThreadScope</ulink>.</para> - <para>Profiling a program is a three-step process:</para> <orderedlist> @@ -1359,6 +1353,44 @@ to re-read its input file: </sect2> </sect1> + <sect1 id="prof-threaded"> + <title>Profiling Parallel and Concurrent Programs</title> + + <para>Combining <option>-threaded</option> + and <option>-prof</option> is perfectly fine, and indeed it is + possible to profile a program running on multiple processors + with the <option>+RTS -N</option> option.<footnote>This feature + was added in GHC 7.4.1.</footnote> + </para> + + <para> + Some caveats apply, however. In the current implementation, a + profiled program is likely to scale much less well than the + unprofiled program, because the profiling implementation uses + some shared data structures which require locking in the runtime + system. Furthermore, the memory allocation statistics collected + by the profiled program are stored in shared memory + but <emphasis>not</emphasis> locked (for speed), which means + that these figures might be inaccurate for parallel programs. + </para> + + <para> + We strongly recommend that you + use <option>-fno-prof-count-entries</option> when compiling a + program to be profiled on multiple cores, because the entry + counts are also stored in shared memory, and continuously + updating them on multiple cores is extremely slow. + </para> + + <para> + We also recommend + using <ulink url="http://www.haskell.org/haskellwiki/ThreadScope">ThreadScope</ulink> + for profiling parallel programs; it offers a GUI for visualising + parallel execution, and is complementary to the time and space + profiling features provided with GHC. + </para> + </sect1> + <sect1 id="hpc"> <title>Observing Code Coverage</title> <indexterm><primary>code coverage</primary></indexterm> diff --git a/includes/Cmm.h b/includes/Cmm.h index 641faa216e..11c02b4e3e 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -372,7 +372,7 @@ CCCS_ALLOC(bytes); /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */ -#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), W_[CCCS]) +#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS) #define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \ HP_CHK_GEN(alloc,liveness,reentry); \ diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h index 329b1569ab..8d948f9b49 100644 --- a/includes/RtsAPI.h +++ b/includes/RtsAPI.h @@ -37,6 +37,15 @@ typedef struct StgClosure_ *HaskellObj; */ typedef struct Capability_ Capability; +/* + * The public view of a Capability: we can be sure it starts with + * these two components (but it may have more private fields). + */ +typedef struct CapabilityPublic_ { + StgFunTable f; + StgRegTable r; +} CapabilityPublic; + /* ---------------------------------------------------------------------------- RTS configuration settings, for passing to hs_init_ghc() ------------------------------------------------------------------------- */ diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index b02b6c86f0..a2c9160e95 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -222,6 +222,7 @@ main(int argc, char *argv[]) field_offset(StgRegTable, rSpLim); field_offset(StgRegTable, rHp); field_offset(StgRegTable, rHpLim); + field_offset(StgRegTable, rCCCS); field_offset(StgRegTable, rCurrentTSO); field_offset(StgRegTable, rCurrentNursery); field_offset(StgRegTable, rHpAlloc); diff --git a/includes/rts/prof/CCS.h b/includes/rts/prof/CCS.h index 4692d166b5..9737fc9c18 100644 --- a/includes/rts/prof/CCS.h +++ b/includes/rts/prof/CCS.h @@ -114,8 +114,6 @@ typedef struct _IndexTable { Pre-defined cost centres and cost centre stacks -------------------------------------------------------------------------- */ -extern CostCentreStack * RTS_VAR(CCCS); /* current CCS */ - #if IN_STG_CODE extern StgWord CC_MAIN[]; @@ -153,6 +151,9 @@ extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set extern CostCentre CC_PINNED[]; extern CostCentreStack CCS_PINNED[]; // pinned memory +extern CostCentre CC_IDLE[]; +extern CostCentreStack CCS_IDLE[]; // capability is idle + #endif /* IN_STG_CODE */ extern unsigned int RTS_VAR(CC_ID); // global ids @@ -165,7 +166,7 @@ extern unsigned int RTS_VAR(era); * ---------------------------------------------------------------------------*/ CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *); -void enterFunCCS (CostCentreStack *); +void enterFunCCS (StgRegTable *reg, CostCentreStack *); /* ----------------------------------------------------------------------------- Registering CCs and CCSs diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 45dc8369c7..fcfdede2ff 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -488,7 +488,6 @@ extern StgWord RTS_VAR(stable_ptr_table); // Profiling.c extern unsigned int RTS_VAR(era); -extern StgWord RTS_VAR(CCCS); /* current CCS */ extern unsigned int RTS_VAR(entering_PAP); extern StgWord RTS_VAR(CC_LIST); /* registered CC list */ extern StgWord RTS_VAR(CCS_LIST); /* registered CCS list */ diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h index 1d0c00c491..b7f0abda7d 100644 --- a/includes/stg/Regs.h +++ b/includes/stg/Regs.h @@ -80,6 +80,7 @@ typedef struct StgRegTable_ { StgPtr rSpLim; StgPtr rHp; StgPtr rHpLim; + struct _CostCentreStack * rCCCS; // current cost-centre-stack struct StgTSO_ * rCurrentTSO; struct nursery_ * rNursery; struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */ diff --git a/rts/Apply.cmm b/rts/Apply.cmm index 5397fc55df..a2d4a7e123 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -86,7 +86,7 @@ stg_PAP_apply TICK_ENT_PAP(); LDV_ENTER(pap); #ifdef PROFILING - foreign "C" enterFunCCS(StgHeader_ccs(pap)); + foreign "C" enterFunCCS(BaseReg "ptr", StgHeader_ccs(pap) "ptr"); #endif // Reload the stack diff --git a/rts/AutoApply.h b/rts/AutoApply.h index 547c5d2f28..d0c5c3fe6b 100644 --- a/rts/AutoApply.h +++ b/rts/AutoApply.h @@ -22,7 +22,7 @@ TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \ TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \ pap = Hp + WDS(1) - size; \ - SET_HDR(pap, stg_PAP_info, W_[CCCS]); \ + SET_HDR(pap, stg_PAP_info, CCCS); \ StgPAP_arity(pap) = HALF_W_(arity - m); \ StgPAP_fun(pap) = R1; \ StgPAP_n_args(pap) = HALF_W_(n); \ @@ -52,7 +52,7 @@ TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \ TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \ new_pap = Hp + WDS(1) - size; \ - SET_HDR(new_pap, stg_PAP_info, W_[CCCS]); \ + SET_HDR(new_pap, stg_PAP_info, CCCS); \ StgPAP_arity(new_pap) = HALF_W_(arity - m); \ W_ n_args; \ n_args = TO_W_(StgPAP_n_args(pap)); \ @@ -78,10 +78,10 @@ // Jump to target, saving CCCS and restoring it on return #if defined(PROFILING) -#define jump_SAVE_CCCS(target) \ - Sp(-1) = W_[CCCS]; \ - Sp(-2) = stg_restore_cccs_info; \ - Sp_adj(-2); \ +#define jump_SAVE_CCCS(target) \ + Sp(-1) = CCCS; \ + Sp(-2) = stg_restore_cccs_info; \ + Sp_adj(-2); \ jump (target) #else #define jump_SAVE_CCCS(target) jump (target) diff --git a/rts/Capability.c b/rts/Capability.c index 3b45dec360..6c84d1ec35 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -46,7 +46,7 @@ volatile StgWord waiting_for_gc = 0; /* Let foreign code get the current Capability -- assuming there is one! * This is useful for unsafe foreign calls because they are called with * the current Capability held, but they are not passed it. For example, - * see see the integer-gmp package which calls allocateLocal() in its + * see see the integer-gmp package which calls allocate() in its * stgAllocForGMP() function (which gets called by gmp functions). * */ Capability * rts_unsafeGetMyCapability (void) @@ -265,6 +265,12 @@ initCapability( Capability *cap, nat i ) cap->context_switch = 0; cap->pinned_object_block = NULL; +#ifdef PROFILING + cap->r.rCCCS = CCS_SYSTEM; +#else + cap->r.rCCCS = NULL; +#endif + traceCapsetAssignCap(CAPSET_OSPROCESS_DEFAULT, i); traceCapsetAssignCap(CAPSET_CLOCKDOMAIN_DEFAULT, i); #if defined(THREADED_RTS) @@ -453,6 +459,9 @@ releaseCapability_ (Capability* cap, } } +#ifdef PROFILING + cap->r.rCCCS = CCS_IDLE; +#endif last_free_capability = cap; debugTrace(DEBUG_sched, "freeing capability %d", cap->no); } @@ -604,6 +613,10 @@ waitForReturnCapability (Capability **pCap, Task *task) } +#ifdef PROFILING + cap->r.rCCCS = CCS_SYSTEM; +#endif + ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task); debugTrace(DEBUG_sched, "resuming capability %d", cap->no); @@ -676,14 +689,19 @@ yieldCapability (Capability** pCap, Task *task) task->next = NULL; cap->n_spare_workers--; } - cap->running_task = task; + + cap->running_task = task; RELEASE_LOCK(&cap->lock); break; } - debugTrace(DEBUG_sched, "resuming capability %d", cap->no); + debugTrace(DEBUG_sched, "resuming capability %d", cap->no); ASSERT(cap->running_task == task); +#ifdef PROFILING + cap->r.rCCCS = CCS_SYSTEM; +#endif + *pCap = cap; ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task); diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 1192db732c..78907c4ba7 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -369,7 +369,7 @@ stg_catchzh /* Set up the catch frame */ Sp = Sp - SIZEOF_StgCatchFrame; - SET_HDR(Sp,stg_catch_frame_info,W_[CCCS]); + SET_HDR(Sp,stg_catch_frame_info,CCCS); StgCatchFrame_handler(Sp) = R2; StgCatchFrame_exceptions_blocked(Sp) = @@ -427,7 +427,7 @@ stg_raisezh */ if (RtsFlags_ProfFlags_showCCSOnException(RtsFlags) != 0::I32) { SAVE_THREAD_STATE(); - foreign "C" fprintCCS_stderr(W_[CCCS] "ptr", + foreign "C" fprintCCS_stderr(CCCS "ptr", exception "ptr", CurrentTSO "ptr") []; LOAD_THREAD_STATE(); diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm index d17961145a..d80e101c59 100644 --- a/rts/HeapStackCheck.cmm +++ b/rts/HeapStackCheck.cmm @@ -167,10 +167,17 @@ __stg_gc_enter_1 be an orphaned BLOCKING_QUEUE now. -------------------------------------------------------------------------- */ -INFO_TABLE_RET( stg_enter_checkbh, RET_SMALL, P_ unused) +/* The stg_enter_checkbh frame has the same shape as an update frame: */ +#if defined(PROFILING) +#define UPD_FRAME_PARAMS W_ unused1, W_ unused2, P_ unused3 +#else +#define UPD_FRAME_PARAMS P_ unused1 +#endif + +INFO_TABLE_RET( stg_enter_checkbh, RET_SMALL, UPD_FRAME_PARAMS) { - R1 = Sp(1); - Sp_adj(2); + R1 = StgUpdateFrame_updatee(Sp); + Sp = Sp + SIZEOF_StgUpdateFrame; foreign "C" checkBlockingQueues(MyCapability() "ptr", CurrentTSO) [R1]; ENTER(); diff --git a/rts/Interpreter.c b/rts/Interpreter.c index da151e1342..2eac1cd834 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -614,7 +614,7 @@ do_apply: // build a new PAP and return it. StgPAP *new_pap; new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m)); - SET_HDR(new_pap,&stg_PAP_info,CCCS); + SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS); new_pap->arity = pap->arity - n; new_pap->n_args = pap->n_args + m; new_pap->fun = pap->fun; @@ -659,7 +659,7 @@ do_apply: StgPAP *pap; nat i; pap = (StgPAP *)allocate(cap, PAP_sizeW(m)); - SET_HDR(pap, &stg_PAP_info,CCCS); + SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS); pap->arity = arity - n; pap->fun = obj; pap->n_args = m; diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 8836d3bfe6..2ca347e803 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -63,7 +63,7 @@ stg_newByteArrayzh words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrWords_bytes(p) = n; RET_P(p); } @@ -96,7 +96,7 @@ stg_newPinnedByteArrayzh to BA_ALIGN bytes: */ p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrWords_bytes(p) = n; RET_P(p); } @@ -136,7 +136,7 @@ stg_newAlignedPinnedByteArrayzh <alignment> is a power of 2, which is technically not guaranteed */ p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1)); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrWords_bytes(p) = n; RET_P(p); } @@ -157,7 +157,7 @@ stg_newArrayzh ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); - SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); + SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); StgMutArrPtrs_ptrs(arr) = n; StgMutArrPtrs_size(arr) = size; @@ -225,7 +225,7 @@ stg_newMutVarzh ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh); mv = Hp - SIZEOF_StgMutVar + WDS(1); - SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]); + SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS); StgMutVar_var(mv) = R1; RET_P(mv); @@ -297,21 +297,21 @@ stg_atomicModifyMutVarzh TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); z = Hp - THUNK_2_SIZE + WDS(1); - SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]); + SET_HDR(z, stg_ap_2_upd_info, CCCS); LDV_RECORD_CREATE(z); StgThunk_payload(z,0) = f; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); y = z - THUNK_1_SIZE; - SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]); + SET_HDR(y, stg_sel_0_upd_info, CCCS); LDV_RECORD_CREATE(y); StgThunk_payload(y,0) = z; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); r = y - THUNK_1_SIZE; - SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]); + SET_HDR(r, stg_sel_1_upd_info, CCCS); LDV_RECORD_CREATE(r); StgThunk_payload(r,0) = z; @@ -353,7 +353,7 @@ stg_mkWeakzh ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh ); w = Hp - SIZEOF_StgWeak + WDS(1); - SET_HDR(w, stg_WEAK_info, W_[CCCS]); + SET_HDR(w, stg_WEAK_info, CCCS); // We don't care about cfinalizer here. // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or @@ -397,14 +397,14 @@ stg_mkWeakForeignEnvzh ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh ); w = Hp - SIZEOF_StgWeak + WDS(1); - SET_HDR(w, stg_WEAK_info, W_[CCCS]); + SET_HDR(w, stg_WEAK_info, CCCS); payload_words = 4; words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); - SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); + SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrWords_bytes(p) = WDS(payload_words); StgArrWords_payload(p,0) = fptr; @@ -877,7 +877,7 @@ stg_atomicallyzh Sp = Sp - SIZEOF_StgAtomicallyFrame; frame = Sp; - SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]); + SET_HDR(frame,stg_atomically_frame_info, CCCS); StgAtomicallyFrame_code(frame) = R1; StgAtomicallyFrame_result(frame) = NO_TREC; StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; @@ -903,7 +903,7 @@ stg_catchSTMzh Sp = Sp - SIZEOF_StgCatchSTMFrame; frame = Sp; - SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]); + SET_HDR(frame, stg_catch_stm_frame_info, CCCS); StgCatchSTMFrame_handler(frame) = R2; StgCatchSTMFrame_code(frame) = R1; @@ -941,7 +941,7 @@ stg_catchRetryzh Sp = Sp - SIZEOF_StgCatchRetryFrame; frame = Sp; - SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]); + SET_HDR(frame, stg_catch_retry_frame_info, CCCS); StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false; StgCatchRetryFrame_first_code(frame) = R1; StgCatchRetryFrame_alt_code(frame) = R2; @@ -1153,7 +1153,7 @@ stg_newMVarzh ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh ); mvar = Hp - SIZEOF_StgMVar + WDS(1); - SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]); + SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS); // MVARs start dirty: generation 0 has no mutable list StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; @@ -1527,7 +1527,7 @@ stg_makeStableNamezh */ if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) { sn_obj = Hp - SIZEOF_StgStableName + WDS(1); - SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]); + SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS); StgStableName_sn(sn_obj) = index; snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj; } else { @@ -1578,7 +1578,7 @@ stg_newBCOzh ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh ); bco = Hp - bytes + WDS(1); - SET_HDR(bco, stg_BCO_info, W_[CCCS]); + SET_HDR(bco, stg_BCO_info, CCCS); StgBCO_instrs(bco) = R1; StgBCO_literals(bco) = R2; @@ -1617,7 +1617,7 @@ stg_mkApUpd0zh CCCS_ALLOC(SIZEOF_StgAP); ap = Hp - SIZEOF_StgAP + WDS(1); - SET_HDR(ap, stg_AP_info, W_[CCCS]); + SET_HDR(ap, stg_AP_info, CCCS); StgAP_n_args(ap) = HALF_W_(0); StgAP_fun(ap) = R1; @@ -1668,7 +1668,7 @@ out: ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1); nptrs_arr = Hp - nptrs_arr_sz + WDS(1); - SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]); + SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, CCCS); StgMutArrPtrs_ptrs(ptrs_arr) = ptrs; StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards; @@ -1683,7 +1683,7 @@ for: allocated in the nursery. The GC will fill it in if/when the array is promoted. */ - SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]); + SET_HDR(nptrs_arr, stg_ARR_WORDS_info, CCCS); StgArrWords_bytes(nptrs_arr) = WDS(nptrs); p = 0; for2: diff --git a/rts/Profiling.c b/rts/Profiling.c index c393c8fa83..ac2708eda1 100644 --- a/rts/Profiling.c +++ b/rts/Profiling.c @@ -18,6 +18,7 @@ #include "Arena.h" #include "RetainerProfile.h" #include "Printer.h" +#include "Capability.h" #include <string.h> @@ -51,16 +52,16 @@ FILE *prof_file; static char *hp_filename; /* heap profile (hp2ps style) log file */ FILE *hp_file; -/* The Current Cost Centre Stack (for attributing costs) - */ -CostCentreStack *CCCS; - /* Linked lists to keep track of CCs and CCSs that haven't * been declared in the log file yet */ CostCentre *CC_LIST = NULL; CostCentreStack *CCS_LIST = NULL; +#ifdef THREADED_RTS +Mutex ccs_mutex; +#endif + /* * Built-in cost centres and cost-centre stacks: * @@ -92,6 +93,7 @@ CC_DECLARE(CC_GC, "GC", "GC", CC_NOT_CAF, ); CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_NOT_CAF, ); CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_NOT_CAF, ); CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", CC_NOT_CAF, ); +CC_DECLARE(CC_IDLE, "IDLE", "IDLE", CC_NOT_CAF, ); CCS_DECLARE(CCS_MAIN, CC_MAIN, ); CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, ); @@ -99,6 +101,7 @@ CCS_DECLARE(CCS_GC, CC_GC, ); CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, ); CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, ); CCS_DECLARE(CCS_PINNED, CC_PINNED, ); +CCS_DECLARE(CCS_IDLE, CC_IDLE, ); /* * Static Functions @@ -143,7 +146,12 @@ initProfiling1 (void) prof_arena = newArena(); /* for the benefit of allocate()... */ - CCCS = CCS_SYSTEM; + { + nat n; + for (n=0; n < n_capabilities; n++) { + capabilities[n].r.rCCCS = CCS_SYSTEM; + } + } } void @@ -157,8 +165,6 @@ initProfiling2 (void) { CostCentreStack *ccs, *next; - CCCS = CCS_SYSTEM; - /* Set up the log file, and dump the header and cost centre * information into it. */ @@ -173,12 +179,14 @@ initProfiling2 (void) REGISTER_CC(CC_OVERHEAD); REGISTER_CC(CC_DONT_CARE); REGISTER_CC(CC_PINNED); + REGISTER_CC(CC_IDLE); REGISTER_CCS(CCS_SYSTEM); REGISTER_CCS(CCS_GC); REGISTER_CCS(CCS_OVERHEAD); REGISTER_CCS(CCS_DONT_CARE); REGISTER_CCS(CCS_PINNED); + REGISTER_CCS(CCS_IDLE); REGISTER_CCS(CCS_MAIN); /* find all the registered cost centre stacks, and make them @@ -310,12 +318,17 @@ endProfiling ( void ) // implements c1 ++> c2, where c1 and c2 are equal depth // -static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn) +static CostCentreStack * +enterFunEqualStacks (CostCentreStack *ccs0, + CostCentreStack *ccsapp, + CostCentreStack *ccsfn) { - ASSERT(ccs->depth == ccsfn->depth); - if (ccs == ccsfn) return; - enterFunEqualStacks(ccs->prevStack, ccsfn->prevStack); - CCCS = pushCostCentre(CCCS, ccsfn->cc); + ASSERT(ccsapp->depth == ccsfn->depth); + if (ccsapp == ccsfn) return ccs0; + return pushCostCentre(enterFunEqualStacks(ccs0, + ccsapp->prevStack, + ccsfn->prevStack), + ccsfn->cc); } // implements c1 ++> c2, where c2 is deeper than c1. @@ -323,21 +336,25 @@ static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn) // enterFunEqualStacks(), and then push on the elements that we // dropped in reverse order. // -static void enterFunCurShorter (CostCentreStack *ccsfn, StgWord n) +static CostCentreStack * +enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n) { if (n == 0) { - ASSERT(ccsfn->depth == CCCS->depth); - enterFunEqualStacks(CCCS,ccsfn); - return; + ASSERT(ccsfn->depth == ccsapp->depth); + return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);; + } else { + ASSERT(ccsfn->depth > ccsapp->depth); + return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1), + ccsfn->cc); } - enterFunCurShorter(ccsfn->prevStack, n-1); - CCCS = pushCostCentre(CCCS, ccsfn->cc); } -void enterFunCCS ( CostCentreStack *ccsfn ) +void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn) { + CostCentreStack *ccsapp; + // common case 1: both stacks are the same - if (ccsfn == CCCS) { + if (ccsfn == reg->rCCCS) { return; } @@ -346,34 +363,38 @@ void enterFunCCS ( CostCentreStack *ccsfn ) return; } + ccsapp = reg->rCCCS; + reg->rCCCS = CCS_OVERHEAD; + // common case 3: the stacks are completely different (e.g. one is a // descendent of MAIN and the other of a CAF): we append the whole // of the function stack to the current CCS. - if (ccsfn->root != CCCS->root) { - CCCS = appendCCS(CCCS,ccsfn); + if (ccsfn->root != ccsapp->root) { + reg->rCCCS = appendCCS(ccsapp,ccsfn); return; } - // uncommon case 4: CCCS is deeper than ccsfn - if (CCCS->depth > ccsfn->depth) { + // uncommon case 4: ccsapp is deeper than ccsfn + if (ccsapp->depth > ccsfn->depth) { nat i, n; - CostCentreStack *tmp = CCCS; - n = CCCS->depth - ccsfn->depth; + CostCentreStack *tmp = ccsapp; + n = ccsapp->depth - ccsfn->depth; for (i = 0; i < n; i++) { tmp = tmp->prevStack; } - enterFunEqualStacks(tmp,ccsfn); + reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn); return; } // uncommon case 5: ccsfn is deeper than CCCS - if (ccsfn->depth > CCCS->depth) { - enterFunCurShorter(ccsfn, ccsfn->depth - CCCS->depth); + if (ccsfn->depth > ccsapp->depth) { + reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn, + ccsfn->depth - ccsapp->depth); return; } // uncommon case 6: stacks are equal depth, but different - enterFunEqualStacks(CCCS,ccsfn); + reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn); } /* ----------------------------------------------------------------------------- @@ -477,20 +498,41 @@ appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) CostCentreStack * pushCostCentre (CostCentreStack *ccs, CostCentre *cc) { - CostCentreStack *temp_ccs; - - if (ccs == EMPTY_STACK) - return actualPush(ccs,cc); - else { - if (ccs->cc == cc) + CostCentreStack *temp_ccs, *ret; + IndexTable *ixtable; + + if (ccs == EMPTY_STACK) { + ACQUIRE_LOCK(&ccs_mutex); + ret = actualPush(ccs,cc); + } + else + { + if (ccs->cc == cc) { return ccs; - else { + } else { // check if we've already memoized this stack - temp_ccs = isInIndexTable(ccs->indexTable,cc); + ixtable = ccs->indexTable; + temp_ccs = isInIndexTable(ixtable,cc); - if (temp_ccs != EMPTY_STACK) + if (temp_ccs != EMPTY_STACK) { return temp_ccs; - else { + } else { + + // not in the IndexTable, now we take the lock: + ACQUIRE_LOCK(&ccs_mutex); + + if (ccs->indexTable != ixtable) + { + // someone modified ccs->indexTable while + // we did not hold the lock, so we must + // check it again: + temp_ccs = isInIndexTable(ixtable,cc); + if (temp_ccs != EMPTY_STACK) + { + RELEASE_LOCK(&ccs_mutex); + return temp_ccs; + } + } temp_ccs = checkLoop(ccs,cc); if (temp_ccs != NULL) { // This CC is already in the stack somewhere. @@ -510,13 +552,16 @@ pushCostCentre (CostCentreStack *ccs, CostCentre *cc) #endif ccs->indexTable = addToIndexTable (ccs->indexTable, new_ccs, cc, 1); - return new_ccs; + ret = new_ccs; } else { - return actualPush (ccs,cc); + ret = actualPush (ccs,cc); } } } } + + RELEASE_LOCK(&ccs_mutex); + return ret; } static CostCentreStack * @@ -801,11 +846,12 @@ reportCCSProfiling( void ) fprintf(prof_file, " %s", prog_argv[count]); fprintf(prof_file, "\n\n"); - fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us)\n", + fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n", ((double) total_prof_ticks * - (double) RtsFlags.MiscFlags.tickInterval) / TIME_RESOLUTION, + (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities), (unsigned long) total_prof_ticks, - (int) TimeToUS(RtsFlags.MiscFlags.tickInterval)); + (int) TimeToUS(RtsFlags.MiscFlags.tickInterval), + n_capabilities, n_capabilities > 1 ? "s" : ""); fprintf(prof_file, "\ttotal alloc = %11s bytes", showStgWord64(total_alloc * sizeof(W_), diff --git a/rts/Proftimer.c b/rts/Proftimer.c index 76d7679000..569f087bb4 100644 --- a/rts/Proftimer.c +++ b/rts/Proftimer.c @@ -11,6 +11,7 @@ #include "Profiling.h" #include "Proftimer.h" +#include "Capability.h" #ifdef PROFILING static rtsBool do_prof_ticks = rtsFalse; // enable profiling ticks @@ -73,7 +74,10 @@ handleProfTick(void) #ifdef PROFILING total_ticks++; if (do_prof_ticks) { - CCCS->time_ticks++; + nat n; + for (n=0; n < n_capabilities; n++) { + capabilities[n].r.rCCCS->time_ticks++; + } } #endif diff --git a/rts/RetainerProfile.h b/rts/RetainerProfile.h index b2adf71d9d..0e75327cde 100644 --- a/rts/RetainerProfile.h +++ b/rts/RetainerProfile.h @@ -21,6 +21,9 @@ void endRetainerProfiling ( void ); void retainerProfile ( void ); void resetStaticObjectForRetainerProfiling( StgClosure *static_objects ); +// flip is either 1 or 0, changed at the beginning of retainerProfile() +// It is used to tell whether a retainer set has been touched so far +// during this pass. extern StgWord flip; // extract the retainer set field from c diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 3e3290dd3d..650c4f9a04 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -1147,12 +1147,6 @@ error = rtsTrue; errorBelch("Using large values for -N is not allowed by default. Link with -rtsopts to allow full control."); stg_exit(EXIT_FAILURE); } -#if defined(PROFILING) - if (nNodes > 1) { - errorBelch("bad option %s: only -N1 is supported with profiling", rts_argv[arg]); - error = rtsTrue; - } -#endif RtsFlags.ParFlags.nNodes = (nat)nNodes; } ) break; diff --git a/rts/Schedule.c b/rts/Schedule.c index 8c305008ae..04a66e31df 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -501,7 +501,7 @@ run_thread: // Costs for the scheduler are assigned to CCS_SYSTEM stopHeapProfTimer(); #if defined(PROFILING) - CCCS = CCS_SYSTEM; + cap->r.rCCCS = CCS_SYSTEM; #endif schedulePostRunThread(cap,t); @@ -2262,7 +2262,7 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception) if (raise_closure == NULL) { raise_closure = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1); - SET_HDR(raise_closure, &stg_raise_info, CCCS); + SET_HDR(raise_closure, &stg_raise_info, cap->r.rCCCS); raise_closure->payload[0] = exception; } updateThunk(cap, tso, ((StgUpdateFrame *)p)->updatee, diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index 5ddc1acb40..e4b128f96e 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -42,7 +42,7 @@ INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, P_ unused) INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ cccs) { #if defined(PROFILING) - W_[CCCS] = Sp(1); + CCCS = Sp(1); #endif Sp_adj(2); jump %ENTRY_CODE(Sp(0)); diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm index be85999598..171ab52b96 100644 --- a/rts/StgStdThunks.cmm +++ b/rts/StgStdThunks.cmm @@ -30,8 +30,8 @@ #define NOUPD_FRAME_SIZE (SIZEOF_StgHeader) #ifdef PROFILING -#define SAVE_CCCS(fs) StgHeader_ccs(Sp-fs) = W_[CCCS] -#define GET_SAVED_CCCS W_[CCCS] = StgHeader_ccs(Sp) +#define SAVE_CCCS(fs) StgHeader_ccs(Sp-fs) = CCCS +#define GET_SAVED_CCCS CCCS = StgHeader_ccs(Sp) #define RET_PARAMS W_ unused1, W_ unused2 #else #define SAVE_CCCS(fs) /* empty */ diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 7892280dca..733c2d67c2 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -189,7 +189,7 @@ GarbageCollect (rtsBool force_major_gc, #endif #ifdef PROFILING - CostCentreStack *prev_CCS; + CostCentreStack *save_CCS[n_capabilities]; #endif ACQUIRE_SM_LOCK; @@ -221,8 +221,10 @@ GarbageCollect (rtsBool force_major_gc, // attribute any costs to CCS_GC #ifdef PROFILING - prev_CCS = CCCS; - CCCS = CCS_GC; + for (n = 0; n < n_capabilities; n++) { + save_CCS[n] = capabilities[n].r.rCCCS; + capabilities[n].r.rCCCS = CCS_GC; + } #endif /* Approximate how much we allocated. @@ -626,10 +628,8 @@ GarbageCollect (rtsBool force_major_gc, #ifdef PROFILING // resetStaticObjectForRetainerProfiling() must be called before // zeroing below. - if (n_gc_threads > 1) { - barf("profiling is currently broken with multi-threaded GC"); - // ToDo: fix the gct->scavenged_static_objects below - } + + // ToDo: fix the gct->scavenged_static_objects below resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects); #endif @@ -704,7 +704,9 @@ GarbageCollect (rtsBool force_major_gc, // restore enclosing cost centre #ifdef PROFILING - CCCS = prev_CCS; + for (n = 0; n < n_capabilities; n++) { + capabilities[n].r.rCCCS = save_CCS[n]; + } #endif #ifdef DEBUG diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 1dad6c8df0..be3badfbe4 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -608,7 +608,7 @@ allocate (Capability *cap, lnat n) StgPtr p; TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); + CCS_ALLOC(cap->r.rCCCS,n); if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; @@ -719,7 +719,7 @@ allocatePinned (Capability *cap, lnat n) } TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); + CCS_ALLOC(cap->r.rCCCS,n); bd = cap->pinned_object_block; diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs index 2ffa81bb76..b255b92d28 100644 --- a/utils/genapply/GenApply.hs +++ b/utils/genapply/GenApply.hs @@ -301,7 +301,7 @@ genMkPAP regstatus macro jump ticker disamb loadSpWordOff "W_" (sp_stk_args+stack_args_size-3) <> text " = stg_restore_cccs_info;" $$ loadSpWordOff "W_" (sp_stk_args+stack_args_size-2) - <> text " = W_[CCCS];" + <> text " = CCCS;" else empty) $$ loadSpWordOff "W_" (sp_stk_args+stack_args_size-1) <> text " = " |