summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-10-27 13:47:27 +0100
committerSimon Marlow <marlowsd@gmail.com>2011-11-02 16:34:05 +0000
commit7bb0447df9a783c222c2a077e35e5013c7c68d91 (patch)
tree78d6d2a14f7e42df5cda32199c71ced973f169ef /rts
parentbd72eeb184a95ae0ae79ccad19c8ccc2b45a12e0 (diff)
downloadhaskell-7bb0447df9a783c222c2a077e35e5013c7c68d91.tar.gz
Overhaul of infrastructure for profiling, coverage (HPC) and breakpoints
User visible changes ==================== Profilng -------- Flags renamed (the old ones are still accepted for now): OLD NEW --------- ------------ -auto-all -fprof-auto -auto -fprof-exported -caf-all -fprof-cafs New flags: -fprof-auto Annotates all bindings (not just top-level ones) with SCCs -fprof-top Annotates just top-level bindings with SCCs -fprof-exported Annotates just exported bindings with SCCs -fprof-no-count-entries Do not maintain entry counts when profiling (can make profiled code go faster; useful with heap profiling where entry counts are not used) Cost-centre stacks have a new semantics, which should in most cases result in more useful and intuitive profiles. If you find this not to be the case, please let me know. This is the area where I have been experimenting most, and the current solution is probably not the final version, however it does address all the outstanding bugs and seems to be better than GHC 7.2. Stack traces ------------ +RTS -xc now gives more information. If the exception originates from a CAF (as is common, because GHC tends to lift exceptions out to the top-level), then the RTS walks up the stack and reports the stack in the enclosing update frame(s). Result: +RTS -xc is much more useful now - but you still have to compile for profiling to get it. I've played around a little with adding 'head []' to GHC itself, and +RTS -xc does pinpoint the problem quite accurately. I plan to add more facilities for stack tracing (e.g. in GHCi) in the future. Coverage (HPC) -------------- * derived instances are now coloured yellow if they weren't used * likewise record field names * entry counts are more accurate (hpc --fun-entry-count) * tab width is now correct (markup was previously off in source with tabs) Internal changes ================ In Core, the Note constructor has been replaced by Tick (Tickish b) (Expr b) which is used to represent all the kinds of source annotation we support: profiling SCCs, HPC ticks, and GHCi breakpoints. Depending on the properties of the Tickish, different transformations apply to Tick. See CoreUtils.mkTick for details. Tickets ======= This commit closes the following tickets, test cases to follow: - Close #2552: not a bug, but the behaviour is now more intuitive (test is T2552) - Close #680 (test is T680) - Close #1531 (test is result001) - Close #949 (test is T949) - Close #2466: test case has bitrotted (doesn't compile against current version of vector-space package)
Diffstat (limited to 'rts')
-rw-r--r--rts/Apply.cmm25
-rw-r--r--rts/AutoApply.h11
-rw-r--r--rts/Exception.cmm4
-rw-r--r--rts/PrimOps.cmm4
-rw-r--r--rts/ProfHeap.c4
-rw-r--r--rts/Profiling.c1200
-rw-r--r--rts/Profiling.h3
-rw-r--r--rts/Proftimer.c2
-rw-r--r--rts/RaiseAsync.c2
-rw-r--r--rts/RtsFlags.c7
-rw-r--r--rts/StgMiscClosures.cmm16
-rw-r--r--rts/Updates.cmm2
-rw-r--r--rts/sm/Storage.c6
13 files changed, 677 insertions, 609 deletions
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index f9ac3b353c..5397fc55df 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -85,8 +85,9 @@ stg_PAP_apply
// profiling
TICK_ENT_PAP();
LDV_ENTER(pap);
- // Enter PAP cost centre
- ENTER_CCS_PAP_CL(pap);
+#ifdef PROFILING
+ foreign "C" enterFunCCS(StgHeader_ccs(pap));
+#endif
// Reload the stack
W_ i;
@@ -175,11 +176,9 @@ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP")
TICK_ENT_AP();
LDV_ENTER(ap);
+ ENTER_CCS_THUNK(ap);
- // Enter PAP cost centre
- ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL
-
- // Reload the stack
+ // Reload the stack
W_ i;
W_ p;
p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload;
@@ -245,11 +244,9 @@ INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD")
TICK_ENT_AP();
LDV_ENTER(ap);
+ ENTER_CCS_THUNK(ap);
- // Enter PAP cost centre
- ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL
-
- // Reload the stack
+ // Reload the stack
W_ i;
W_ p;
p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload;
@@ -326,9 +323,7 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
TICK_ENT_AP();
LDV_ENTER(ap);
-
- // Enter PAP cost centre
- ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL
+ ENTER_CCS_THUNK(ap);
// Reload the stack
W_ i;
@@ -379,9 +374,7 @@ INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK,
TICK_ENT_AP();
LDV_ENTER(ap);
-
- // Enter PAP cost centre
- ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL
+ ENTER_CCS_THUNK(ap);
// Reload the stack
W_ i;
diff --git a/rts/AutoApply.h b/rts/AutoApply.h
index bbec1224ff..547c5d2f28 100644
--- a/rts/AutoApply.h
+++ b/rts/AutoApply.h
@@ -76,5 +76,16 @@
Sp_adj(n+1); \
jump %ENTRY_CODE(Sp(0));
+// Jump to target, saving CCCS and restoring it on return
+#if defined(PROFILING)
+#define jump_SAVE_CCCS(target) \
+ Sp(-1) = W_[CCCS]; \
+ Sp(-2) = stg_restore_cccs_info; \
+ Sp_adj(-2); \
+ jump (target)
+#else
+#define jump_SAVE_CCCS(target) jump (target)
+#endif
+
#endif /* APPLY_H */
diff --git a/rts/Exception.cmm b/rts/Exception.cmm
index 591fa7ab9b..9f48f5d8f5 100644
--- a/rts/Exception.cmm
+++ b/rts/Exception.cmm
@@ -426,7 +426,9 @@ stg_raisezh
* the info was only displayed for an *uncaught* exception.
*/
if (RtsFlags_ProfFlags_showCCSOnException(RtsFlags) != 0::I32) {
- foreign "C" fprintCCS_stderr(W_[CCCS] "ptr") [];
+ SAVE_THREAD_STATE();
+ foreign "C" fprintCCS_stderr(W_[CCCS] "ptr", CurrentTSO "ptr") [];
+ LOAD_THREAD_STATE();
}
#endif
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index bb4f73bbab..c96e459975 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -1204,6 +1204,8 @@ stg_takeMVarzh
// into the heap check generated by the code generator, so we
// have to do it in stg_gc_gen (see HeapStackCheck.cmm).
HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh);
+ TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0);
+ CCCS_ALLOC(SIZEOF_StgMVarTSOQueue);
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
@@ -1369,6 +1371,8 @@ stg_putMVarzh
// see Note [mvar-heap-check] above
HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh);
+ TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0);
+ CCCS_ALLOC(SIZEOF_StgMVarTSOQueue);
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index 9d95b4ccc0..56c44519fb 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -919,12 +919,12 @@ heapCensusChain( Census *census, bdescr *bd )
for (; bd != NULL; bd = bd->link) {
// HACK: pretend a pinned block is just one big ARR_WORDS
- // owned by CCS_SYSTEM. These blocks can be full of holes due
+ // owned by CCS_PINNED. These blocks can be full of holes due
// to alignment constraints so we can't traverse the memory
// and do a proper census.
if (bd->flags & BF_PINNED) {
StgClosure arr;
- SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_SYSTEM);
+ SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_PINNED);
heapProfObject(census, &arr, bd->blocks * BLOCK_SIZE_W, rtsTrue);
continue;
}
diff --git a/rts/Profiling.c b/rts/Profiling.c
index 5648f31e00..55495cdf94 100644
--- a/rts/Profiling.c
+++ b/rts/Profiling.c
@@ -36,12 +36,11 @@ Arena *prof_arena;
unsigned int CC_ID = 1;
unsigned int CCS_ID = 1;
-unsigned int HP_ID = 1;
/* figures for the profiling report.
*/
static StgWord64 total_alloc;
-static lnat total_prof_ticks;
+static lnat total_prof_ticks;
/* Globals for opening the profiling log file(s)
*/
@@ -55,7 +54,7 @@ FILE *hp_file;
*/
CostCentreStack *CCCS;
-/* Linked lists to keep track of cc's and ccs's that haven't
+/* Linked lists to keep track of CCs and CCSs that haven't
* been declared in the log file yet
*/
CostCentre *CC_LIST = NULL;
@@ -78,67 +77,59 @@ CostCentreStack *CCS_LIST = NULL;
* itself. These are costs that would not be incurred
* during non-profiled execution of the program.
*
- * SUBSUMED is the one-and-only CCS placed on top-level functions.
- * It indicates that all costs are to be attributed to the
- * enclosing cost centre stack. SUBSUMED never accumulates
- * any costs. The is_caf flag is set on the subsumed cost
- * centre.
- *
* DONT_CARE is a placeholder cost-centre we assign to static
* constructors. It should *never* accumulate any costs.
+ *
+ * PINNED accumulates memory allocated to pinned objects, which
+ * cannot be profiled separately because we cannot reliably
+ * traverse pinned memory.
*/
-CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
-CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
-CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
-CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
-CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
-CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
+CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_NOT_CAF, );
+CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", CC_NOT_CAF, );
+CC_DECLARE(CC_GC, "GC", "GC", CC_NOT_CAF, );
+CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_NOT_CAF, );
+CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_NOT_CAF, );
+CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", CC_NOT_CAF, );
CCS_DECLARE(CCS_MAIN, CC_MAIN, );
CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
CCS_DECLARE(CCS_GC, CC_GC, );
CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
-CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
-CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
+CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
+CCS_DECLARE(CCS_PINNED, CC_PINNED, );
-/*
- * Uniques for the XML log-file format
- */
-#define CC_UQ 1
-#define CCS_UQ 2
-#define TC_UQ 3
-#define HEAP_OBJ_UQ 4
-#define TIME_UPD_UQ 5
-#define HEAP_UPD_UQ 6
-
-/*
+/*
* Static Functions
*/
-static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
- CostCentreStack *new_ccs );
-static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
-static void count_ticks ( CostCentreStack *ccs );
-static void inherit_costs ( CostCentreStack *ccs );
-static void findCCSMaxLens ( CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len );
-static void logCCS ( CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len );
+static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
+ CostCentreStack *ccs2 );
+static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
+ CostCentreStack *new_ccs );
+static rtsBool ignoreCCS ( CostCentreStack *ccs );
+static void countTickss ( CostCentreStack *ccs );
+static void inheritCosts ( CostCentreStack *ccs );
+static void findCCSMaxLens ( CostCentreStack *ccs,
+ nat indent,
+ nat *max_label_len,
+ nat *max_module_len );
+static void logCCS ( CostCentreStack *ccs,
+ nat indent,
+ nat max_label_len,
+ nat max_module_len );
static void reportCCS ( CostCentreStack *ccs );
-static void DecCCS ( CostCentreStack *ccs );
-static void DecBackEdge ( CostCentreStack *ccs,
- CostCentreStack *oldccs );
-static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
+static CostCentreStack * checkLoop ( CostCentreStack *ccs,
+ CostCentre *cc );
static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
-static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
-static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
-static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
+static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
+static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
+static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
CostCentre *, unsigned int );
static void ccsSetSelected ( CostCentreStack *ccs );
-static void initTimeProfiling ( void );
-static void initProfilingLogFile( void );
-
-static void reportCCS_XML ( CostCentreStack *ccs );
+static void initTimeProfiling ( void );
+static void initProfilingLogFile ( void );
/* -----------------------------------------------------------------------------
Initialise the profiling environment
@@ -147,11 +138,11 @@ static void reportCCS_XML ( CostCentreStack *ccs );
void
initProfiling1 (void)
{
- // initialise our arena
- prof_arena = newArena();
+ // initialise our arena
+ prof_arena = newArena();
- /* for the benefit of allocate()... */
- CCCS = CCS_SYSTEM;
+ /* for the benefit of allocate()... */
+ CCCS = CCS_SYSTEM;
}
void
@@ -163,93 +154,57 @@ freeProfiling (void)
void
initProfiling2 (void)
{
- CostCentreStack *ccs, *next;
-
- CCCS = CCS_SYSTEM;
-
- /* Set up the log file, and dump the header and cost centre
- * information into it. */
- initProfilingLogFile();
-
- /* Register all the cost centres / stacks in the program
- * CC_MAIN gets link = 0, all others have non-zero link.
- */
- REGISTER_CC(CC_MAIN);
- REGISTER_CC(CC_SYSTEM);
- REGISTER_CC(CC_GC);
- REGISTER_CC(CC_OVERHEAD);
- REGISTER_CC(CC_SUBSUMED);
- REGISTER_CC(CC_DONT_CARE);
-
- REGISTER_CCS(CCS_SYSTEM);
- REGISTER_CCS(CCS_GC);
- REGISTER_CCS(CCS_OVERHEAD);
- REGISTER_CCS(CCS_SUBSUMED);
- REGISTER_CCS(CCS_DONT_CARE);
- REGISTER_CCS(CCS_MAIN);
-
- /* find all the "special" cost centre stacks, and make them children
- * of CCS_MAIN.
- */
- ASSERT(CCS_LIST == CCS_MAIN);
- CCS_LIST = CCS_LIST->prevStack;
- CCS_MAIN->prevStack = NULL;
- CCS_MAIN->root = CC_MAIN;
- ccsSetSelected(CCS_MAIN);
- DecCCS(CCS_MAIN);
-
- for (ccs = CCS_LIST; ccs != NULL; ) {
- next = ccs->prevStack;
- ccs->prevStack = NULL;
- ActualPush_(CCS_MAIN,ccs->cc,ccs);
- ccs->root = ccs->cc;
- ccs = next;
- }
-
- if (RtsFlags.CcFlags.doCostCentres) {
- initTimeProfiling();
- }
-
- if (RtsFlags.ProfFlags.doHeapProfile) {
- initHeapProfiling();
- }
-}
-
-// Decide whether closures with this CCS should contribute to the heap
-// profile.
-static void
-ccsSetSelected( CostCentreStack *ccs )
-{
- if (RtsFlags.ProfFlags.modSelector) {
- if (! strMatchesSelector( ccs->cc->module,
- RtsFlags.ProfFlags.modSelector ) ) {
- ccs->selected = 0;
- return;
- }
+ CostCentreStack *ccs, *next;
+
+ CCCS = CCS_SYSTEM;
+
+ /* Set up the log file, and dump the header and cost centre
+ * information into it.
+ */
+ initProfilingLogFile();
+
+ /* Register all the cost centres / stacks in the program
+ * CC_MAIN gets link = 0, all others have non-zero link.
+ */
+ REGISTER_CC(CC_MAIN);
+ REGISTER_CC(CC_SYSTEM);
+ REGISTER_CC(CC_GC);
+ REGISTER_CC(CC_OVERHEAD);
+ REGISTER_CC(CC_DONT_CARE);
+ REGISTER_CC(CC_PINNED);
+
+ REGISTER_CCS(CCS_SYSTEM);
+ REGISTER_CCS(CCS_GC);
+ REGISTER_CCS(CCS_OVERHEAD);
+ REGISTER_CCS(CCS_DONT_CARE);
+ REGISTER_CCS(CCS_PINNED);
+ REGISTER_CCS(CCS_MAIN);
+
+ /* find all the registered cost centre stacks, and make them
+ * children of CCS_MAIN.
+ */
+ ASSERT(CCS_LIST == CCS_MAIN);
+ CCS_LIST = CCS_LIST->prevStack;
+ CCS_MAIN->prevStack = NULL;
+ CCS_MAIN->root = CCS_MAIN;
+ ccsSetSelected(CCS_MAIN);
+
+ // make CCS_MAIN the parent of all the pre-defined CCSs.
+ for (ccs = CCS_LIST; ccs != NULL; ) {
+ next = ccs->prevStack;
+ ccs->prevStack = NULL;
+ actualPush_(CCS_MAIN,ccs->cc,ccs);
+ ccs->root = ccs;
+ ccs = next;
}
- if (RtsFlags.ProfFlags.ccSelector) {
- if (! strMatchesSelector( ccs->cc->label,
- RtsFlags.ProfFlags.ccSelector ) ) {
- ccs->selected = 0;
- return;
- }
- }
- if (RtsFlags.ProfFlags.ccsSelector) {
- CostCentreStack *c;
- for (c = ccs; c != NULL; c = c->prevStack) {
- if ( strMatchesSelector( c->cc->label,
- RtsFlags.ProfFlags.ccsSelector )) {
- break;
- }
- }
- if (c == NULL) {
- ccs->selected = 0;
- return;
- }
+
+ if (RtsFlags.CcFlags.doCostCentres) {
+ initTimeProfiling();
}
- ccs->selected = 1;
- return;
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ initHeapProfiling();
+ }
}
@@ -294,21 +249,6 @@ initProfilingLogFile(void)
RtsFlags.ProfFlags.doHeapProfile = 0;
return;
}
-
- if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
- /* dump the time, and the profiling interval */
- fprintf(prof_file, "\"%s\"\n", time_str());
- fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval);
-
- /* declare all the cost centres */
- {
- CostCentre *cc;
- for (cc = CC_LIST; cc != NULL; cc = cc->link) {
- fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n",
- CC_UQ, cc->ccID, cc->label, cc->module);
- }
- }
- }
}
if (RtsFlags.ProfFlags.doHeapProfile) {
@@ -329,40 +269,151 @@ initProfilingLogFile(void)
void
initTimeProfiling(void)
{
- /* Start ticking */
- startProfTimer();
+ /* Start ticking */
+ startProfTimer();
};
void
endProfiling ( void )
{
- if (RtsFlags.CcFlags.doCostCentres) {
- stopProfTimer();
- }
- if (RtsFlags.ProfFlags.doHeapProfile) {
- endHeapProfiling();
- }
+ if (RtsFlags.CcFlags.doCostCentres) {
+ stopProfTimer();
+ }
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ endHeapProfiling();
+ }
}
/* -----------------------------------------------------------------------------
- Set cost centre stack when entering a function.
+ Set CCCS when entering a function.
+
+ The algorithm is as follows.
+
+ ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
+
+ where
+
+ dropCommonPrefix A B
+ -- returns the suffix of B after removing any prefix common
+ -- to both A and B.
+
+ e.g.
+
+ <a,b,c> ++> <> = <a,b,c>
+ <a,b,c> ++> <d> = <a,b,c,d>
+ <a,b,c> ++> <a,b> = <a,b,c>
+ <a,b> ++> <a,b,c> = <a,b,c>
+ <a,b,c> ++> <a,b,d> = <a,b,c,d>
+
-------------------------------------------------------------------------- */
-rtsBool entering_PAP;
-void
-EnterFunCCS ( CostCentreStack *ccsfn )
+// implements c1 ++> c2, where c1 and c2 are equal depth
+//
+static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn)
{
- /* PAP_entry has already set CCCS for us */
- if (entering_PAP) {
- entering_PAP = rtsFalse;
- return;
- }
+ ASSERT(ccs->depth == ccsfn->depth);
+ if (ccs == ccsfn) return;
+ enterFunEqualStacks(ccs->prevStack, ccsfn->prevStack);
+ CCCS = pushCostCentre(CCCS, ccsfn->cc);
+}
+
+// implements c1 ++> c2, where c2 is deeper than c1.
+// Drop elements of c2 until we have equal stacks, call
+// enterFunEqualStacks(), and then push on the elements that we
+// dropped in reverse order.
+//
+static void enterFunCurShorter (CostCentreStack *ccsfn, StgWord n)
+{
+ if (n == 0) {
+ ASSERT(ccsfn->depth == CCCS->depth);
+ enterFunEqualStacks(CCCS,ccsfn);
+ return;
+ }
+ enterFunCurShorter(ccsfn->prevStack, n-1);
+ CCCS = pushCostCentre(CCCS, ccsfn->cc);
+}
+
+void enterFunCCS ( CostCentreStack *ccsfn )
+{
+ // common case 1: both stacks are the same
+ if (ccsfn == CCCS) {
+ return;
+ }
+
+ // common case 2: the function stack is empty, or just CAF
+ if (ccsfn->prevStack == CCS_MAIN) {
+ return;
+ }
+
+ // common case 3: the stacks are completely different (e.g. one is a
+ // descendent of MAIN and the other of a CAF): we append the whole
+ // of the function stack to the current CCS.
+ if (ccsfn->root != CCCS->root) {
+ CCCS = appendCCS(CCCS,ccsfn);
+ return;
+ }
+
+ // uncommon case 4: CCCS is deeper than ccsfn
+ if (CCCS->depth > ccsfn->depth) {
+ nat i, n;
+ CostCentreStack *tmp = CCCS;
+ n = CCCS->depth - ccsfn->depth;
+ for (i = 0; i < n; i++) {
+ tmp = tmp->prevStack;
+ }
+ enterFunEqualStacks(tmp,ccsfn);
+ return;
+ }
+
+ // uncommon case 5: ccsfn is deeper than CCCS
+ if (ccsfn->depth > CCCS->depth) {
+ enterFunCurShorter(ccsfn, ccsfn->depth - CCCS->depth);
+ return;
+ }
- if (ccsfn->root->is_caf == CC_IS_CAF) {
- CCCS = AppendCCS(CCCS,ccsfn);
- } else {
- CCCS = ccsfn;
- }
+ // uncommon case 6: stacks are equal depth, but different
+ enterFunEqualStacks(CCCS,ccsfn);
+}
+
+/* -----------------------------------------------------------------------------
+ Decide whether closures with this CCS should contribute to the heap
+ profile.
+ -------------------------------------------------------------------------- */
+
+static void
+ccsSetSelected (CostCentreStack *ccs)
+{
+ if (RtsFlags.ProfFlags.modSelector) {
+ if (! strMatchesSelector (ccs->cc->module,
+ RtsFlags.ProfFlags.modSelector) ) {
+ ccs->selected = 0;
+ return;
+ }
+ }
+ if (RtsFlags.ProfFlags.ccSelector) {
+ if (! strMatchesSelector (ccs->cc->label,
+ RtsFlags.ProfFlags.ccSelector) ) {
+ ccs->selected = 0;
+ return;
+ }
+ }
+ if (RtsFlags.ProfFlags.ccsSelector) {
+ CostCentreStack *c;
+ for (c = ccs; c != NULL; c = c->prevStack)
+ {
+ if ( strMatchesSelector (c->cc->label,
+ RtsFlags.ProfFlags.ccsSelector) ) {
+ break;
+ }
+ }
+ if (c == NULL) {
+ ccs->selected = 0;
+ return;
+ }
+ }
+
+ ccs->selected = 1;
+ return;
}
/* -----------------------------------------------------------------------------
@@ -370,211 +421,192 @@ EnterFunCCS ( CostCentreStack *ccsfn )
-------------------------------------------------------------------------- */
#ifdef DEBUG
-CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
+CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
CostCentreStack *
-PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
-#define PushCostCentre _PushCostCentre
+pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
+#define pushCostCentre _pushCostCentre
{
IF_DEBUG(prof,
traceBegin("pushing %s on ", cc->label);
debugCCS(ccs);
traceEnd(););
- return PushCostCentre(ccs,cc);
+ return pushCostCentre(ccs,cc);
}
#endif
-CostCentreStack *
-PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
-{
- CostCentreStack *temp_ccs;
-
- if (ccs == EMPTY_STACK)
- return ActualPush(ccs,cc);
- else {
- if (ccs->cc == cc)
- return ccs;
- else {
- /* check if we've already memoized this stack */
- temp_ccs = IsInIndexTable(ccs->indexTable,cc);
-
- if (temp_ccs != EMPTY_STACK)
- return temp_ccs;
- else {
- temp_ccs = CheckLoop(ccs,cc);
- if (temp_ccs != NULL) {
- /* we have recursed to an older CCS. Mark this in
- * the index table, and emit a "back edge" into the
- * log file.
- */
- ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
- DecBackEdge(temp_ccs,ccs);
- return temp_ccs;
- } else {
- return ActualPush(ccs,cc);
- }
- }
- }
- }
-}
-
-static CostCentreStack *
-CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
-{
- while (ccs != EMPTY_STACK) {
- if (ccs->cc == cc)
- return ccs;
- ccs = ccs->prevStack;
- }
- return NULL;
-}
-
/* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
#ifdef DEBUG
-CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
+CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
CostCentreStack *
-AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
-#define AppendCCS _AppendCCS
-{
- IF_DEBUG(prof,
- if (ccs1 != ccs2) {
- debugBelch("Appending ");
- debugCCS(ccs1);
- debugBelch(" to ");
- debugCCS(ccs2);
- debugBelch("\n");});
- return AppendCCS(ccs1,ccs2);
+appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
+#define appendCCS _appendCCS
+{
+ IF_DEBUG(prof,
+ if (ccs1 != ccs2) {
+ debugBelch("Appending ");
+ debugCCS(ccs1);
+ debugBelch(" to ");
+ debugCCS(ccs2);
+ debugBelch("\n");});
+ return appendCCS(ccs1,ccs2);
}
#endif
CostCentreStack *
-AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
+appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
{
- CostCentreStack *ccs = NULL;
+ if (ccs1 == ccs2) {
+ return ccs1;
+ }
+
+ if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
+ // stop at a CAF element
+ return ccs1;
+ }
- if (ccs1 == ccs2) {
- return ccs1;
- }
+ return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
+}
- if (ccs2->cc->is_caf == CC_IS_CAF) {
- return ccs1;
- }
-
- if (ccs2->prevStack != NULL) {
- ccs = AppendCCS(ccs1, ccs2->prevStack);
- }
+// Pick one:
+// #define RECURSION_DROPS
+#define RECURSION_TRUNCATES
- return PushCostCentre(ccs,ccs2->cc);
+CostCentreStack *
+pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
+{
+ CostCentreStack *temp_ccs;
+
+ if (ccs == EMPTY_STACK)
+ return actualPush(ccs,cc);
+ else {
+ if (ccs->cc == cc)
+ return ccs;
+ else {
+ // check if we've already memoized this stack
+ temp_ccs = isInIndexTable(ccs->indexTable,cc);
+
+ if (temp_ccs != EMPTY_STACK)
+ return temp_ccs;
+ else {
+ temp_ccs = checkLoop(ccs,cc);
+ if (temp_ccs != NULL) {
+ // This CC is already in the stack somewhere.
+ // This could be recursion, or just calling
+ // another function with the same CC.
+ // A number of policies are possible at this
+ // point, we implement two here:
+ // - truncate the stack to the previous instance
+ // of this CC
+ // - ignore this push, return the same stack.
+ //
+ CostCentreStack *new_ccs;
+#if defined(RECURSION_TRUNCATES)
+ new_ccs = temp_ccs;
+#else // defined(RECURSION_DROPS)
+ new_ccs = ccs;
+#endif
+ ccs->indexTable = addToIndexTable (ccs->indexTable,
+ new_ccs, cc, 1);
+ return new_ccs;
+ } else {
+ return actualPush (ccs,cc);
+ }
+ }
+ }
+ }
}
static CostCentreStack *
-ActualPush ( CostCentreStack *ccs, CostCentre *cc )
+checkLoop (CostCentreStack *ccs, CostCentre *cc)
{
- CostCentreStack *new_ccs;
-
- /* allocate space for a new CostCentreStack */
- new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
-
- return ActualPush_(ccs, cc, new_ccs);
+ while (ccs != EMPTY_STACK) {
+ if (ccs->cc == cc)
+ return ccs;
+ ccs = ccs->prevStack;
+ }
+ return NULL;
}
static CostCentreStack *
-ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
+actualPush (CostCentreStack *ccs, CostCentre *cc)
{
- /* assign values to each member of the structure */
- new_ccs->ccsID = CCS_ID++;
- new_ccs->cc = cc;
- new_ccs->prevStack = ccs;
-
- new_ccs->indexTable = EMPTY_TABLE;
-
- /* Initialise the various _scc_ counters to zero
- */
- new_ccs->scc_count = 0;
-
- /* Initialize all other stats here. There should be a quick way
- * that's easily used elsewhere too
- */
- new_ccs->time_ticks = 0;
- new_ccs->mem_alloc = 0;
- new_ccs->inherited_ticks = 0;
- new_ccs->inherited_alloc = 0;
-
- new_ccs->root = ccs->root;
+ CostCentreStack *new_ccs;
- // Set the selected field.
- ccsSetSelected(new_ccs);
+ // allocate space for a new CostCentreStack
+ new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
- /* update the memoization table for the parent stack */
- if (ccs != EMPTY_STACK)
- ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
- 0/*not a back edge*/);
-
- /* make sure this CC is declared at the next heap/time sample */
- DecCCS(new_ccs);
-
- /* return a pointer to the new stack */
- return new_ccs;
+ return actualPush_(ccs, cc, new_ccs);
}
-
static CostCentreStack *
-IsInIndexTable(IndexTable *it, CostCentre *cc)
+actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
{
- while (it!=EMPTY_TABLE)
- {
- if (it->cc==cc)
- return it->ccs;
- else
- it = it->next;
+ /* assign values to each member of the structure */
+ new_ccs->ccsID = CCS_ID++;
+ new_ccs->cc = cc;
+ new_ccs->prevStack = ccs;
+ new_ccs->root = ccs->root;
+ new_ccs->depth = ccs->depth + 1;
+
+ new_ccs->indexTable = EMPTY_TABLE;
+
+ /* Initialise the various _scc_ counters to zero
+ */
+ new_ccs->scc_count = 0;
+
+ /* Initialize all other stats here. There should be a quick way
+ * that's easily used elsewhere too
+ */
+ new_ccs->time_ticks = 0;
+ new_ccs->mem_alloc = 0;
+ new_ccs->inherited_ticks = 0;
+ new_ccs->inherited_alloc = 0;
+
+ // Set the selected field.
+ ccsSetSelected(new_ccs);
+
+ /* update the memoization table for the parent stack */
+ if (ccs != EMPTY_STACK) {
+ ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
+ 0/*not a back edge*/);
}
-
- /* otherwise we never found it so return EMPTY_TABLE */
- return EMPTY_TABLE;
+
+ /* return a pointer to the new stack */
+ return new_ccs;
}
-static IndexTable *
-AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
- CostCentre *cc, unsigned int back_edge)
+static CostCentreStack *
+isInIndexTable(IndexTable *it, CostCentre *cc)
{
- IndexTable *new_it;
-
- new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
+ while (it!=EMPTY_TABLE)
+ {
+ if (it->cc == cc)
+ return it->ccs;
+ else
+ it = it->next;
+ }
- new_it->cc = cc;
- new_it->ccs = new_ccs;
- new_it->next = it;
- new_it->back_edge = back_edge;
- return new_it;
+ /* otherwise we never found it so return EMPTY_TABLE */
+ return EMPTY_TABLE;
}
-static void
-DecCCS(CostCentreStack *ccs)
+static IndexTable *
+addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
+ CostCentre *cc, unsigned int back_edge)
{
- if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
- if (ccs->prevStack == EMPTY_STACK)
- fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
- ccs->ccsID, ccs->cc->ccID);
- else
- fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
- ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
- }
-}
+ IndexTable *new_it;
-static void
-DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
-{
- if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
- if (ccs->prevStack == EMPTY_STACK)
- fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
- ccs->ccsID, ccs->cc->ccID);
- else
- fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
- ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
- }
+ new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
+
+ new_it->cc = cc;
+ new_it->ccs = new_ccs;
+ new_it->next = it;
+ new_it->back_edge = back_edge;
+ return new_it;
}
/* -----------------------------------------------------------------------------
@@ -585,12 +617,13 @@ DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
* reports, so as not to cause confusion.
*/
static rtsBool
-cc_to_ignore (CostCentre *cc)
+ignoreCC (CostCentre *cc)
{
- if ( cc == CC_OVERHEAD
+ if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
+ ( cc == CC_OVERHEAD
|| cc == CC_DONT_CARE
|| cc == CC_GC
- || cc == CC_SYSTEM) {
+ || cc == CC_SYSTEM)) {
return rtsTrue;
} else {
return rtsFalse;
@@ -598,13 +631,14 @@ cc_to_ignore (CostCentre *cc)
}
static rtsBool
-ccs_to_ignore (CostCentreStack *ccs)
+ignoreCCS (CostCentreStack *ccs)
{
- if ( ccs == CCS_OVERHEAD
- || ccs == CCS_DONT_CARE
- || ccs == CCS_GC
- || ccs == CCS_SYSTEM) {
- return rtsTrue;
+ if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
+ ( ccs == CCS_OVERHEAD
+ || ccs == CCS_DONT_CARE
+ || ccs == CCS_GC
+ || ccs == CCS_SYSTEM)) {
+ return rtsTrue;
} else {
return rtsFalse;
}
@@ -617,88 +651,89 @@ ccs_to_ignore (CostCentreStack *ccs)
static CostCentre *sorted_cc_list;
static void
-aggregate_cc_costs( CostCentreStack *ccs )
+aggregateCCCosts( CostCentreStack *ccs )
{
- IndexTable *i;
+ IndexTable *i;
- ccs->cc->mem_alloc += ccs->mem_alloc;
- ccs->cc->time_ticks += ccs->time_ticks;
+ ccs->cc->mem_alloc += ccs->mem_alloc;
+ ccs->cc->time_ticks += ccs->time_ticks;
- for (i = ccs->indexTable; i != 0; i = i->next) {
- if (!i->back_edge) {
- aggregate_cc_costs(i->ccs);
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ if (!i->back_edge) {
+ aggregateCCCosts(i->ccs);
+ }
}
- }
}
static void
-insert_cc_in_sorted_list( CostCentre *new_cc )
+insertCCInSortedList( CostCentre *new_cc )
{
- CostCentre **prev, *cc;
+ CostCentre **prev, *cc;
- prev = &sorted_cc_list;
- for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
- if (new_cc->time_ticks > cc->time_ticks) {
- new_cc->link = cc;
- *prev = new_cc;
- return;
- } else {
- prev = &(cc->link);
+ prev = &sorted_cc_list;
+ for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
+ if (new_cc->time_ticks > cc->time_ticks) {
+ new_cc->link = cc;
+ *prev = new_cc;
+ return;
+ } else {
+ prev = &(cc->link);
+ }
}
- }
- new_cc->link = NULL;
- *prev = new_cc;
+ new_cc->link = NULL;
+ *prev = new_cc;
}
static void
-report_per_cc_costs( void )
+reportPerCCCosts( void )
{
- CostCentre *cc, *next;
- nat max_label_len, max_module_len;
+ CostCentre *cc, *next;
+ nat max_label_len, max_module_len;
- aggregate_cc_costs(CCS_MAIN);
- sorted_cc_list = NULL;
+ aggregateCCCosts(CCS_MAIN);
+ sorted_cc_list = NULL;
- max_label_len = max_module_len = 0;
+ max_label_len = 11; // no shorter than the "COST CENTRE" header
+ max_module_len = 7; // no shorter than the "MODULE" header
- for (cc = CC_LIST; cc != NULL; cc = next) {
- next = cc->link;
- if (cc->time_ticks > total_prof_ticks/100
- || cc->mem_alloc > total_alloc/100
- || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
- insert_cc_in_sorted_list(cc);
-
- max_label_len = stg_max(strlen(cc->label), max_label_len);
- max_module_len = stg_max(strlen(cc->module), max_module_len);
+ for (cc = CC_LIST; cc != NULL; cc = next) {
+ next = cc->link;
+ if (cc->time_ticks > total_prof_ticks/100
+ || cc->mem_alloc > total_alloc/100
+ || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
+ insertCCInSortedList(cc);
+
+ max_label_len = stg_max(strlen(cc->label), max_label_len);
+ max_module_len = stg_max(strlen(cc->module), max_module_len);
+ }
}
- }
-
- fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
- fprintf(prof_file, "%6s %6s", "%time", "%alloc");
- if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
- fprintf(prof_file, " %5s %9s", "ticks", "bytes");
- }
- fprintf(prof_file, "\n\n");
-
- for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
- if (cc_to_ignore(cc)) {
- continue;
- }
- fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module);
- fprintf(prof_file, "%6.1f %6.1f",
- total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
- total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
- total_alloc * 100)
- );
-
- if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
- fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
- (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
- }
- fprintf(prof_file, "\n");
- }
- fprintf(prof_file,"\n\n");
+ fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
+ fprintf(prof_file, "%6s %6s", "%time", "%alloc");
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5s %9s", "ticks", "bytes");
+ }
+ fprintf(prof_file, "\n\n");
+
+ for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
+ if (ignoreCC(cc)) {
+ continue;
+ }
+ fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module);
+ fprintf(prof_file, "%6.1f %6.1f",
+ total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
+ total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
+ total_alloc * 100)
+ );
+
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
+ (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
+ }
+ fprintf(prof_file, "\n");
+ }
+
+ fprintf(prof_file,"\n\n");
}
/* -----------------------------------------------------------------------------
@@ -706,22 +741,18 @@ report_per_cc_costs( void )
-------------------------------------------------------------------------- */
static void
-fprint_header( nat max_label_len, nat max_module_len )
+fprintHeader( nat max_label_len, nat max_module_len )
{
- fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
+ fprintf(prof_file, "%-*s %-*s%6s %11s %11s %11s\n", max_label_len, "", max_module_len, "", "", "", "individual", "inherited");
- fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
- fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
+ fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
+ fprintf(prof_file, "%6s %11s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
- if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
- fprintf(prof_file, " %5s %9s", "ticks", "bytes");
-#if defined(PROFILING_DETAIL_COUNTS)
- fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
- "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
-#endif
- }
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5s %9s", "ticks", "bytes");
+ }
- fprintf(prof_file, "\n\n");
+ fprintf(prof_file, "\n\n");
}
void
@@ -734,17 +765,9 @@ reportCCSProfiling( void )
total_prof_ticks = 0;
total_alloc = 0;
- count_ticks(CCS_MAIN);
+ countTickss(CCS_MAIN);
- switch (RtsFlags.CcFlags.doCostCentres) {
- case 0:
- return;
- case COST_CENTRES_XML:
- gen_XML_logfile();
- return;
- default:
- break;
- }
+ if (RtsFlags.CcFlags.doCostCentres == 0) return;
fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
time_str(), "Final");
@@ -769,92 +792,83 @@ reportCCSProfiling( void )
showStgWord64(total_alloc * sizeof(W_),
temp, rtsTrue/*commas*/));
-#if defined(PROFILING_DETAIL_COUNTS)
- fprintf(prof_file, " (%lu closures)", total_allocs);
-#endif
fprintf(prof_file, " (excludes profiling overheads)\n\n");
- report_per_cc_costs();
+ reportPerCCCosts();
- inherit_costs(CCS_MAIN);
+ inheritCosts(CCS_MAIN);
reportCCS(pruneCCSTree(CCS_MAIN));
}
static void
findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) {
- CostCentre *cc;
- IndexTable *i;
-
- cc = ccs->cc;
-
- *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label));
- *max_module_len = stg_max(*max_module_len, strlen(cc->module));
-
- for (i = ccs->indexTable; i != 0; i = i->next) {
- if (!i->back_edge) {
- findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len);
+ CostCentre *cc;
+ IndexTable *i;
+
+ cc = ccs->cc;
+
+ *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label));
+ *max_module_len = stg_max(*max_module_len, strlen(cc->module));
+
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ if (!i->back_edge) {
+ findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len);
+ }
}
- }
}
static void
logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len)
{
- CostCentre *cc;
- IndexTable *i;
+ CostCentre *cc;
+ IndexTable *i;
- cc = ccs->cc;
-
- /* Only print cost centres with non 0 data ! */
-
- if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
- ! ccs_to_ignore(ccs))
- /* force printing of *all* cost centres if -P -P */
+ cc = ccs->cc;
+
+ /* Only print cost centres with non 0 data ! */
+
+ if (!ignoreCCS(ccs))
+ /* force printing of *all* cost centres if -Pa */
{
- fprintf(prof_file, "%-*s%-*s %-*s",
- indent, "", max_label_len-indent, cc->label, max_module_len, cc->module);
+ fprintf(prof_file, "%-*s%-*s %-*s",
+ indent, "", max_label_len-indent, cc->label, max_module_len, cc->module);
- fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f",
- ccs->ccsID, (double) ccs->scc_count,
- total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
- total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
- total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
- total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
+ fprintf(prof_file, "%6ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
+ ccs->ccsID, ccs->scc_count,
+ total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
+ total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
+ total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
+ total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
);
- if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
- fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
- (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
-#if defined(PROFILING_DETAIL_COUNTS)
- fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
- ccs->mem_allocs, ccs->thunk_count,
- ccs->function_count, ccs->pap_count,
- ccs->subsumed_fun_count, ccs->subsumed_caf_count,
- ccs->caffun_subsumed);
-#endif
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
+ (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
+ }
+ fprintf(prof_file, "\n");
}
- fprintf(prof_file, "\n");
- }
- for (i = ccs->indexTable; i != 0; i = i->next) {
- if (!i->back_edge) {
- logCCS(i->ccs, indent+1, max_label_len, max_module_len);
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ if (!i->back_edge) {
+ logCCS(i->ccs, indent+1, max_label_len, max_module_len);
+ }
}
- }
}
static void
reportCCS(CostCentreStack *ccs)
{
- nat max_label_len, max_module_len;
- max_label_len = max_module_len = 0;
-
- findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len);
-
- fprint_header(max_label_len, max_module_len);
- logCCS(ccs, 0, max_label_len, max_module_len);
+ nat max_label_len, max_module_len;
+
+ max_label_len = 11; // no shorter than "COST CENTRE" header
+ max_module_len = 7; // no shorter than "MODULE" header
+
+ findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len);
+
+ fprintHeader(max_label_len, max_module_len);
+ logCCS(ccs, 0, max_label_len, max_module_len);
}
@@ -862,138 +876,176 @@ reportCCS(CostCentreStack *ccs)
* ticks/allocations.
*/
static void
-count_ticks(CostCentreStack *ccs)
+countTickss(CostCentreStack *ccs)
{
- IndexTable *i;
-
- if (!ccs_to_ignore(ccs)) {
- total_alloc += ccs->mem_alloc;
- total_prof_ticks += ccs->time_ticks;
- }
- for (i = ccs->indexTable; i != NULL; i = i->next)
- if (!i->back_edge) {
- count_ticks(i->ccs);
+ IndexTable *i;
+
+ if (!ignoreCCS(ccs)) {
+ total_alloc += ccs->mem_alloc;
+ total_prof_ticks += ccs->time_ticks;
}
+ for (i = ccs->indexTable; i != NULL; i = i->next)
+ if (!i->back_edge) {
+ countTickss(i->ccs);
+ }
}
/* Traverse the cost centre stack tree and inherit ticks & allocs.
*/
static void
-inherit_costs(CostCentreStack *ccs)
+inheritCosts(CostCentreStack *ccs)
{
- IndexTable *i;
+ IndexTable *i;
- if (ccs_to_ignore(ccs)) { return; }
+ if (ignoreCCS(ccs)) { return; }
- ccs->inherited_ticks += ccs->time_ticks;
- ccs->inherited_alloc += ccs->mem_alloc;
+ ccs->inherited_ticks += ccs->time_ticks;
+ ccs->inherited_alloc += ccs->mem_alloc;
- for (i = ccs->indexTable; i != NULL; i = i->next)
- if (!i->back_edge) {
- inherit_costs(i->ccs);
- ccs->inherited_ticks += i->ccs->inherited_ticks;
- ccs->inherited_alloc += i->ccs->inherited_alloc;
- }
-
- return;
+ for (i = ccs->indexTable; i != NULL; i = i->next)
+ if (!i->back_edge) {
+ inheritCosts(i->ccs);
+ ccs->inherited_ticks += i->ccs->inherited_ticks;
+ ccs->inherited_alloc += i->ccs->inherited_alloc;
+ }
+
+ return;
}
+//
+// Prune CCSs with zero entries, zero ticks or zero allocation from
+// the tree, unless COST_CENTRES_ALL is on.
+//
static CostCentreStack *
-pruneCCSTree( CostCentreStack *ccs )
+pruneCCSTree (CostCentreStack *ccs)
{
- CostCentreStack *ccs1;
- IndexTable *i, **prev;
-
- prev = &ccs->indexTable;
- for (i = ccs->indexTable; i != 0; i = i->next) {
- if (i->back_edge) { continue; }
+ CostCentreStack *ccs1;
+ IndexTable *i, **prev;
+
+ prev = &ccs->indexTable;
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ if (i->back_edge) { continue; }
+
+ ccs1 = pruneCCSTree(i->ccs);
+ if (ccs1 == NULL) {
+ *prev = i->next;
+ } else {
+ prev = &(i->next);
+ }
+ }
+
+ if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
+ /* force printing of *all* cost centres if -P -P */ )
- ccs1 = pruneCCSTree(i->ccs);
- if (ccs1 == NULL) {
- *prev = i->next;
+ || ( ccs->indexTable != 0 )
+ || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
+ ) {
+ return ccs;
} else {
- prev = &(i->next);
+ return NULL;
}
- }
-
- if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
- /* force printing of *all* cost centres if -P -P */ )
-
- || ( ccs->indexTable != 0 )
- || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
- ) {
- return ccs;
- } else {
- return NULL;
- }
}
-/* -----------------------------------------------------------------------------
- Generate the XML time/allocation profile
- -------------------------------------------------------------------------- */
-
void
-gen_XML_logfile( void )
+fprintCCS( FILE *f, CostCentreStack *ccs )
{
- fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
-
- reportCCS_XML(pruneCCSTree(CCS_MAIN));
-
- fprintf(prof_file, " 0\n");
+ fprintf(f,"<");
+ for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
+ fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
+ if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
+ fprintf(f,",");
+ }
+ }
+ fprintf(f,">");
}
-static void
-reportCCS_XML(CostCentreStack *ccs)
+// Returns: True if the call stack ended with CAF
+static rtsBool fprintCallStack (CostCentreStack *ccs)
{
- CostCentre *cc;
- IndexTable *i;
-
- if (ccs_to_ignore(ccs)) { return; }
-
- cc = ccs->cc;
-
- fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64,
- ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc);
-
- for (i = ccs->indexTable; i != 0; i = i->next) {
- if (!i->back_edge) {
- reportCCS_XML(i->ccs);
+ CostCentreStack *prev;
+
+ fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
+ prev = ccs->prevStack;
+ while (prev && prev != CCS_MAIN) {
+ ccs = prev;
+ fprintf(stderr, ",\n called from %s.%s",
+ ccs->cc->module, ccs->cc->label);
+ prev = ccs->prevStack;
}
- }
-}
+ fprintf(stderr, "\n");
-void
-fprintCCS( FILE *f, CostCentreStack *ccs )
-{
- fprintf(f,"<");
- for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
- fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
- if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
- fprintf(f,",");
- }
- }
- fprintf(f,">");
+ return (!strncmp(ccs->cc->label, "CAF", 3));
}
/* For calling from .cmm code, where we can't reliably refer to stderr */
void
-fprintCCS_stderr( CostCentreStack *ccs )
+fprintCCS_stderr (CostCentreStack *ccs, StgTSO *tso)
{
- fprintCCS(stderr, ccs);
+ rtsBool is_caf;
+ StgPtr frame;
+ StgStack *stack;
+ CostCentreStack *prev_ccs;
+ nat depth = 0;
+ const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks
+
+ fprintf(stderr, "*** Exception raised (reporting due to +RTS -xc), stack trace:\n ");
+ is_caf = fprintCallStack(ccs);
+
+ // traverse the stack down to the enclosing update frame to
+ // find out where this CCS was evaluated from...
+
+ stack = tso->stackobj;
+ frame = stack->sp;
+ prev_ccs = ccs;
+
+ for (; is_caf && depth < MAX_DEPTH; depth++)
+ {
+ switch (get_itbl((StgClosure*)frame)->type)
+ {
+ case UPDATE_FRAME:
+ ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
+ frame += sizeofW(StgUpdateFrame);
+ if (ccs == CCS_MAIN) {
+ goto done;
+ }
+ if (ccs == prev_ccs) {
+ // ignore if this is the same as the previous stack,
+ // we're probably in library code and haven't
+ // accumulated any more interesting stack items
+ // since the last update frame.
+ break;
+ }
+ prev_ccs = ccs;
+ fprintf(stderr, " --> evaluated by: ");
+ is_caf = fprintCallStack(ccs);
+ break;
+ case UNDERFLOW_FRAME:
+ stack = ((StgUnderflowFrame*)frame)->next_chunk;
+ frame = stack->sp;
+ break;
+ case STOP_FRAME:
+ goto done;
+ default:
+ frame += stack_frame_sizeW((StgClosure*)frame);
+ break;
+ }
+ }
+done:
+ return;
}
#ifdef DEBUG
void
debugCCS( CostCentreStack *ccs )
{
- debugBelch("<");
- for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
- debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
- if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
- debugBelch(",");
- }
- }
- debugBelch(">");
+ debugBelch("<");
+ for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
+ debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
+ if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
+ debugBelch(",");
+ }
+ }
+ debugBelch(">");
}
#endif /* DEBUG */
diff --git a/rts/Profiling.h b/rts/Profiling.h
index 3e365fe536..2ee3311c81 100644
--- a/rts/Profiling.h
+++ b/rts/Profiling.h
@@ -30,13 +30,12 @@ extern FILE *hp_file;
#ifdef PROFILING
-void gen_XML_logfile ( void );
void reportCCSProfiling ( void );
void PrintNewStackDecls ( void );
void fprintCCS( FILE *f, CostCentreStack *ccs );
-void fprintCCS_stderr( CostCentreStack *ccs );
+void fprintCCS_stderr (CostCentreStack *ccs, StgTSO *tso);
#ifdef DEBUG
void debugCCS( CostCentreStack *ccs );
diff --git a/rts/Proftimer.c b/rts/Proftimer.c
index dfcc709625..82838184b7 100644
--- a/rts/Proftimer.c
+++ b/rts/Proftimer.c
@@ -65,11 +65,13 @@ initProfTimer( void )
startHeapProfTimer();
}
+nat total_ticks = 0;
void
handleProfTick(void)
{
#ifdef PROFILING
+ total_ticks++;
if (do_prof_ticks) {
CCCS->time_ticks++;
}
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 775505f887..acc87b1938 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -739,7 +739,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
*/
if (RtsFlags.ProfFlags.showCCSOnException)
{
- fprintCCS_stderr(tso->prof.CCCS);
+ fprintCCS_stderr(tso->prof.CCCS,tso);
}
#endif
// ASSUMES: the thread is not already complete or dead
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 7009ea23a6..d2b4945c19 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -258,14 +258,12 @@ usage_text[] = {
" -B Sound the bell at the start of each garbage collection",
#if defined(PROFILING)
"",
-" -px Time/allocation profile (XML) (output file <program>.prof)",
" -p Time/allocation profile (output file <program>.prof)",
" -P More detailed Time/Allocation profile",
" -Pa Give information about *all* cost centres",
# if defined(PROFILING)
"",
-" -hx Heap residency profile (XML) (output file <program>.prof)",
" -h<break-down> Heap residency profile (hp2ps) (output file <program>.hp)",
" break-down: c = cost centre stack (default)",
" m = module",
@@ -936,10 +934,7 @@ error = rtsTrue;
OPTION_SAFE;
PROFILING_BUILD_ONLY(
switch (rts_argv[arg][2]) {
- case 'x':
- RtsFlags.CcFlags.doCostCentres = COST_CENTRES_XML;
- break;
- case 'a':
+ case 'a':
RtsFlags.CcFlags.doCostCentres = COST_CENTRES_ALL;
break;
default:
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index b4a037d5d6..26f24f6f39 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -36,6 +36,19 @@ INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, P_ unused)
}
/* ----------------------------------------------------------------------------
+ Restore a saved cost centre
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ cccs)
+{
+#if defined(PROFILING)
+ W_[CCCS] = Sp(1);
+#endif
+ Sp_adj(2);
+ jump %ENTRY_CODE(Sp(0));
+}
+
+/* ----------------------------------------------------------------------------
Support for the bytecode interpreter.
------------------------------------------------------------------------- */
@@ -226,9 +239,6 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM")
LDV_ENTER(R1);
- /* Enter PAP cost centre */
- ENTER_CCS_PAP_CL(R1);
-
/* For ticky-ticky, change the perm_ind to a normal ind on first
* entry, so the number of ent_perm_inds is the number of *thunks*
* entered again, not the number of subsequent entries.
diff --git a/rts/Updates.cmm b/rts/Updates.cmm
index 789bdd5e53..0b43b9cdf1 100644
--- a/rts/Updates.cmm
+++ b/rts/Updates.cmm
@@ -41,7 +41,7 @@ INFO_TABLE_RET( stg_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS)
W_ updatee;
updatee = StgUpdateFrame_updatee(Sp);
-
+
/* remove the update frame from the stack */
Sp = Sp + SIZEOF_StgUpdateFrame;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 82e89a5470..1dad6c8df0 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -607,6 +607,9 @@ allocate (Capability *cap, lnat n)
bdescr *bd;
StgPtr p;
+ TICK_ALLOC_HEAP_NOCTR(n);
+ CCS_ALLOC(CCCS,n);
+
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
@@ -638,9 +641,6 @@ allocate (Capability *cap, lnat n)
/* small allocation (<LARGE_OBJECT_THRESHOLD) */
- TICK_ALLOC_HEAP_NOCTR(n);
- CCS_ALLOC(CCCS,n);
-
bd = cap->r.rCurrentAlloc;
if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {