summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ghc/includes/ClosureMacros.h38
-rw-r--r--ghc/includes/Closures.h8
-rw-r--r--ghc/includes/Stg.h4
-rw-r--r--ghc/includes/StgLdvProf.h132
-rw-r--r--ghc/includes/StgMacros.h33
-rw-r--r--ghc/includes/StgProf.h5
-rw-r--r--ghc/includes/StgRetainerProf.h75
-rw-r--r--ghc/includes/Updates.h6
-rw-r--r--ghc/rts/Exception.hc18
-rw-r--r--ghc/rts/GC.c89
-rw-r--r--ghc/rts/HeapStackCheck.hc3
-rw-r--r--ghc/rts/Itimer.c10
-rw-r--r--ghc/rts/Itimer.h9
-rw-r--r--ghc/rts/LdvProfile.c857
-rw-r--r--ghc/rts/LdvProfile.h63
-rw-r--r--ghc/rts/PrimOps.hc20
-rw-r--r--ghc/rts/ProfHeap.c449
-rw-r--r--ghc/rts/ProfHeap.h9
-rw-r--r--ghc/rts/Profiling.c74
-rw-r--r--ghc/rts/Profiling.h8
-rw-r--r--ghc/rts/Proftimer.c62
-rw-r--r--ghc/rts/Proftimer.h16
-rw-r--r--ghc/rts/RetainerProfile.c2303
-rw-r--r--ghc/rts/RetainerProfile.h29
-rw-r--r--ghc/rts/RetainerSet.c587
-rw-r--r--ghc/rts/RetainerSet.h139
-rw-r--r--ghc/rts/RtsFlags.c70
-rw-r--r--ghc/rts/RtsStartup.c44
-rw-r--r--ghc/rts/Schedule.c61
-rw-r--r--ghc/rts/Schedule.h11
-rw-r--r--ghc/rts/Stats.c116
-rw-r--r--ghc/rts/Stats.h17
-rw-r--r--ghc/rts/StgMiscClosures.hc84
-rw-r--r--ghc/rts/StgStartup.hc6
-rw-r--r--ghc/rts/StgStdThunks.hc14
-rw-r--r--ghc/rts/Storage.c50
-rw-r--r--ghc/rts/Storage.h55
-rw-r--r--ghc/rts/Updates.hc18
-rw-r--r--ghc/rts/Weak.c23
39 files changed, 5198 insertions, 417 deletions
diff --git a/ghc/includes/ClosureMacros.h b/ghc/includes/ClosureMacros.h
index 0690981adf..70470e93b5 100644
--- a/ghc/includes/ClosureMacros.h
+++ b/ghc/includes/ClosureMacros.h
@@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
- * $Id: ClosureMacros.h,v 1.32 2001/02/06 11:41:04 rrt Exp $
+ * $Id: ClosureMacros.h,v 1.33 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -79,8 +79,39 @@ static __inline__ StgFunPtr get_entry(const StgInfoTable *itbl) {
-------------------------------------------------------------------------- */
#ifdef PROFILING
-#define SET_PROF_HDR(c,ccs_) (c)->header.prof.ccs = ccs_
-#define SET_STATIC_PROF_HDR(ccs_) prof : { ccs : ccs_ },
+#ifdef DEBUG_RETAINER
+/*
+ For the sake of debugging, we take the safest way for the moment. Actually, this
+ is useful to check the sanity of heap before beginning retainer profiling.
+ flip is defined in RetainerProfile.c, and declared as extern in RetainerProfile.h.
+ Note: change those functions building Haskell objects from C datatypes, i.e.,
+ all rts_mk???() functions in RtsAPI.c, as well.
+ */
+extern StgWord flip;
+#define SET_PROF_HDR(c,ccs_) \
+ ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = (retainerSet *)((StgWord)NULL | flip))
+#else
+/*
+ For retainer profiling only: we do not have to set (c)->header.prof.hp.rs to
+ NULL | flip (flip is defined in RetainerProfile.c) because even when flip
+ is 1, rs is invalid and will be initialized to NULL | flip later when
+ the closure *c is visited.
+ */
+/*
+#define SET_PROF_HDR(c,ccs_) \
+ ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = NULL)
+ */
+/*
+ The following macro works for both retainer profiling and LDV profiling:
+ for retainer profiling, ldvTime remains 0, so rs fields are initialized to 0.
+ See the invariants on ldvTime.
+ */
+#define SET_PROF_HDR(c,ccs_) \
+ ((c)->header.prof.ccs = ccs_, \
+ LDV_recordCreate((c)))
+#endif // DEBUG_RETAINER
+#define SET_STATIC_PROF_HDR(ccs_) \
+ prof : { ccs : ccs_, hp : { rs : NULL } },
#else
#define SET_PROF_HDR(c,ccs)
#define SET_STATIC_PROF_HDR(ccs)
@@ -109,6 +140,7 @@ static __inline__ StgFunPtr get_entry(const StgInfoTable *itbl) {
#define SET_TICKY_HDR(c,stuff)
#define SET_STATIC_TICKY_HDR(stuff)
#endif
+
#define SET_HDR(c,info,ccs) \
{ \
SET_INFO(c,info); \
diff --git a/ghc/includes/Closures.h b/ghc/includes/Closures.h
index 5f0b570954..0f413b58a1 100644
--- a/ghc/includes/Closures.h
+++ b/ghc/includes/Closures.h
@@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
- * $Id: Closures.h,v 1.28 2001/10/03 13:57:42 simonmar Exp $
+ * $Id: Closures.h,v 1.29 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -20,7 +20,11 @@
-------------------------------------------------------------------------- */
typedef struct {
- CostCentreStack *ccs;
+ CostCentreStack *ccs;
+ union {
+ RetainerSet *rs; // Retainer Set
+ StgWord ldvw; // Lag/Drag/Void Word
+ } hp;
} StgProfHeader;
/* -----------------------------------------------------------------------------
diff --git a/ghc/includes/Stg.h b/ghc/includes/Stg.h
index 9fc9c77317..9d302a7d27 100644
--- a/ghc/includes/Stg.h
+++ b/ghc/includes/Stg.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Stg.h,v 1.39 2001/10/27 21:44:54 sof Exp $
+ * $Id: Stg.h,v 1.40 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -150,6 +150,8 @@ typedef StgWord64 LW_;
/* Profiling information */
#include "StgProf.h"
+#include "StgRetainerProf.h"
+#include "StgLdvProf.h"
/* Storage format definitions */
#include "Closures.h"
diff --git a/ghc/includes/StgLdvProf.h b/ghc/includes/StgLdvProf.h
new file mode 100644
index 0000000000..7ece7316da
--- /dev/null
+++ b/ghc/includes/StgLdvProf.h
@@ -0,0 +1,132 @@
+/* -----------------------------------------------------------------------------
+ * $Id: StgLdvProf.h,v 1.1 2001/11/22 14:25:11 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STGLDVPROF_H
+#define STGLDVPROF_H
+
+#ifdef PROFILING
+
+// Engine
+
+// declared in LdvProfile.c
+extern nat ldvTime;
+
+// LdvGenInfo stores the statistics for one specific census.
+typedef struct {
+ double time; // the time in MUT time at the corresponding census is made
+
+ // We employ int instead of nat, for some values may be negative temporarily,
+ // e.g., dragNew.
+
+ // computed at each census
+ int inherentlyUsed; // total size of 'inherently used' closures
+ int notUsed; // total size of 'never used' closures
+ int used; // total size of 'used at least once' closures
+
+ /*
+ voidNew and dragNew are updated when a closure is destroyed.
+ For instance, when a 'never used' closure of size s and creation time
+ t is destroyed at time u, voidNew of eras t through u - 1 is increased
+ by s.
+ Likewise, when a 'used at least once' closure of size s and last use time
+ t is destroyed at time u, dragNew of eras t + 1 through u - 1 is increase
+ by s.
+ In our implementation, voidNew and dragNew are computed indirectly: instead
+ of updating voidNew or dragNew of all intervening eras, we update that
+ of the end two eras (one is increased and the other is decreased).
+ */
+ int voidNew; // current total size of 'destroyed without being used' closures
+ int dragNew; // current total size of 'used at least once and waiting to die'
+ // closures
+
+ // computed post-mortem
+ int voidTotal; // total size of closures in 'void' state
+ // lagTotal == notUsed - voidTotal // in 'lag' state
+ int dragTotal; // total size of closures in 'drag' state
+ // useTotal == used - dragTotal // in 'use' state
+} LdvGenInfo;
+
+extern LdvGenInfo *gi;
+
+// retrieves the LDV word from closure c
+#define LDVW(c) (((StgClosure *)(c))->header.prof.hp.ldvw)
+
+/*
+ An LDV word is divided into 3 parts: state bits (LDV_STATE_MASK), creation
+ time bits (LDV_CREATE_MASK), and last use time bits (LDV_LAST_MASK).
+ */
+#if SIZEOF_VOID_P == 8
+#define LDV_SHIFT 30
+#define LDV_STATE_MASK 0x1000000000000000
+#define LDV_CREATE_MASK 0x0FFFFFFFC0000000
+#define LDV_LAST_MASK 0x000000003FFFFFFF
+#define LDV_STATE_CREATE 0x0000000000000000
+#define LDV_STATE_USE 0x1000000000000000
+#else
+#define LDV_SHIFT 15
+#define LDV_STATE_MASK 0x40000000
+#define LDV_CREATE_MASK 0x3FFF8000
+#define LDV_LAST_MASK 0x00007FFF
+#define LDV_STATE_CREATE 0x00000000
+#define LDV_STATE_USE 0x40000000
+#endif // SIZEOF_VOID_P
+
+// Stores the creation time for closure c.
+// This macro is called at the very moment of closure creation.
+//
+// NOTE: this initializes LDVW(c) to zero, which ensures that there
+// is no conflict between retainer profiling and LDV profiling,
+// because retainer profiling also expects LDVW(c) to be initialised
+// to zero.
+#define LDV_recordCreate(c) \
+ LDVW((c)) = (ldvTime << LDV_SHIFT) | LDV_STATE_CREATE
+
+// Stores the last use time for closure c.
+// This macro *must* be called whenever a closure is used, that is, it is
+// entered.
+#define LDV_recordUse(c) \
+ { \
+ if (ldvTime > 0) \
+ LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | \
+ ldvTime | \
+ LDV_STATE_USE; \
+ }
+
+// Creates a 0-filled slop of size 'howManyBackwards' backwards from the
+// address 'from'.
+//
+// Invoked when:
+// 1) Hp is incremented and exceeds HpLim (in Updates.hc).
+// 2) copypart() is called (in GC.c).
+#define FILL_SLOP(from, howManyBackwards) \
+ if (ldvTime > 0) { \
+ int i; \
+ for (i = 0;i < (howManyBackwards); i++) \
+ ((StgWord *)(from))[-i] = 0; \
+ }
+
+// Informs the LDV profiler that closure c has just been evacuated.
+// Evacuated objects are no longer needed, so we just store its original size in
+// the LDV field.
+#define SET_EVACUAEE_FOR_LDV(c, size) \
+ LDVW((c)) = (size)
+
+// Macros called when a closure is entered.
+// The closure is not an 'inherently used' one.
+// The closure is not IND or IND_OLDGEN because neither is considered for LDV
+// profiling.
+#define LDV_ENTER(c) LDV_recordUse((c))
+
+#else // !PROFILING
+
+#define LDV_ENTER(c)
+
+#endif // PROFILING
+#endif // STGLDVPROF_H
diff --git a/ghc/includes/StgMacros.h b/ghc/includes/StgMacros.h
index da3a42548b..6bd5887008 100644
--- a/ghc/includes/StgMacros.h
+++ b/ghc/includes/StgMacros.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: StgMacros.h,v 1.41 2001/11/08 16:37:54 simonmar Exp $
+ * $Id: StgMacros.h,v 1.42 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -144,7 +144,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
tag_assts \
(r) = (P_)ret; \
JMP_(stg_chk_##layout); \
- }
+ }
#define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \
DO_GRAN_ALLOCATE(hp_headroom) \
@@ -153,7 +153,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
tag_assts \
(r) = (P_)ret; \
JMP_(stg_chk_##layout); \
- }
+ }
/* -----------------------------------------------------------------------------
A Heap Check in a case alternative are much simpler: everything is
@@ -186,7 +186,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
HpAlloc = (headroom); \
tag_assts \
JMP_(stg_gc_enter_##ptrs); \
- }
+ }
#define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts) \
DO_GRAN_ALLOCATE(headroom) \
@@ -194,7 +194,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
HpAlloc = (headroom); \
tag_assts \
JMP_(stg_gc_seq_##ptrs); \
- }
+ }
#define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \
DO_GRAN_ALLOCATE(hp_headroom) \
@@ -202,7 +202,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
HpAlloc = (hp_headroom); \
tag_assts \
JMP_(stg_gc_enter_##ptrs); \
- }
+ }
/* Heap checks for branches of a primitive case / unboxed tuple return */
@@ -214,7 +214,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
HpAlloc = (headroom); \
tag_assts \
JMP_(lbl); \
- }
+ }
#define HP_CHK_NOREGS(headroom,tag_assts) \
GEN_HP_CHK_ALT(headroom,stg_gc_noregs,tag_assts);
@@ -298,7 +298,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
R9.w = (W_)LIVENESS_MASK(liveness); \
R10.w = (W_)reentry; \
JMP_(stg_gen_chk); \
- }
+ }
#define HP_CHK_GEN_TICKY(headroom,liveness,reentry,tag_assts) \
HP_CHK_GEN(headroom,liveness,reentry,tag_assts); \
@@ -435,12 +435,29 @@ EXTINFO_RTS(stg_gen_chk_info);
} \
SET_INFO(R1.cl,&stg_BLACKHOLE_info)
# else
+# ifndef PROFILING
# define UPD_BH_UPDATABLE(info) \
TICK_UPD_BH_UPDATABLE(); \
SET_INFO(R1.cl,&stg_BLACKHOLE_info)
# define UPD_BH_SINGLE_ENTRY(info) \
TICK_UPD_BH_SINGLE_ENTRY(); \
SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info)
+# else
+// An object is replaced by a blackhole, so we fill the slop with zeros.
+//
+// Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+//
+# define UPD_BH_UPDATABLE(info) \
+ TICK_UPD_BH_UPDATABLE(); \
+ LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl); \
+ SET_INFO(R1.cl,&stg_BLACKHOLE_info); \
+ LDV_recordCreate(R1.cl)
+# define UPD_BH_SINGLE_ENTRY(info) \
+ TICK_UPD_BH_SINGLE_ENTRY(); \
+ LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl); \
+ SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info) \
+ LDV_recordCreate(R1.cl)
+# endif /* PROFILING */
# endif
#else /* !EAGER_BLACKHOLING */
# define UPD_BH_UPDATABLE(thunk) /* nothing */
diff --git a/ghc/includes/StgProf.h b/ghc/includes/StgProf.h
index 2c89d94a0e..825c8461f1 100644
--- a/ghc/includes/StgProf.h
+++ b/ghc/includes/StgProf.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: StgProf.h,v 1.13 2001/10/18 13:46:47 simonmar Exp $
+ * $Id: StgProf.h,v 1.14 2001/11/22 14:25:11 simonmar Exp $
*
* (c) The GHC Team, 1998
*
@@ -349,9 +349,6 @@ extern CostCentreStack *CCS_LIST; /* registered CCS list */
#define ENTER_CCS_PAP_CL(closure) \
ENTER_CCS_PAP((closure)->header.prof.ccs)
- /* temp EW */
-#define STATIC_CCS_REF(ccs) (ccs)
-
/* -----------------------------------------------------------------------------
When not profiling, these macros do nothing...
-------------------------------------------------------------------------- */
diff --git a/ghc/includes/StgRetainerProf.h b/ghc/includes/StgRetainerProf.h
new file mode 100644
index 0000000000..2b77772a1b
--- /dev/null
+++ b/ghc/includes/StgRetainerProf.h
@@ -0,0 +1,75 @@
+/* -----------------------------------------------------------------------------
+ * $Id: StgRetainerProf.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ *
+ * Retainer profiling
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STGRETAINERPROF_H
+#define STGRETAINERPROF_H
+
+/*
+ Type 'retainer' defines the retainer identity.
+
+ Invariant:
+ 1. The retainer identity of a given retainer cannot change during
+ program execution, no matter where it is actually stored.
+ For instance, the memory address of a retainer cannot be used as
+ its retainer identity because its location may change during garbage
+ collections.
+ 2. Type 'retainer' must come with comparison operations as well as
+ an equality operation. That it, <, >, and == must be supported -
+ this is necessary to store retainers in a sorted order in retainer sets.
+ Therefore, you cannot use a huge structure type as 'retainer', for instance.
+
+ We illustrate three possibilities of defining 'retainer identity'.
+ Choose one of the following three compiler directives:
+
+ Retainer scheme 1 (RETAINER_SCHEME_INFO) : retainer = info table
+ Retainer scheme 2 (RETAINER_SCHEME_CCS) : retainer = cost centre stack
+ Retainer scheme 3 (RETAINER_SCHEME_CC) : retainer = cost centre
+*/
+
+// #define RETAINER_SCHEME_INFO
+#define RETAINER_SCHEME_CCS
+// #define RETAINER_SCHEME_CC
+
+#ifdef RETAINER_SCHEME_INFO
+struct _StgInfoTable;
+typedef struct _StgInfoTable *retainer;
+#endif
+
+#ifdef RETAINER_SCHEME_CCS
+typedef CostCentreStack *retainer;
+#endif
+
+#ifdef RETAINER_SCHEME_CC
+typedef CostCentre *retainer;
+#endif
+
+/*
+ Type 'retainerSet' defines an abstract datatype for sets of retainers.
+
+ Invariants:
+ A retainer set stores its elements in increasing order (in element[] array).
+ */
+
+typedef struct _RetainerSet {
+ nat num; // number of elements
+ nat cost; // cost associated with this retainer set
+ StgWord hashKey; // hash key for this retainer set
+ struct _RetainerSet *link; // link to the next retainer set in the bucket
+ int id; // unique id of this retainer set (used when printing)
+ // Its absolute value is interpreted as its true id; if id is
+ // negative, it indicates that this retainer set has had a postive
+ // cost after some retainer profiling.
+ retainer element[0]; // elements of this retainer set
+ // do not put anything below here!
+} RetainerSet;
+
+//
+// retainerSet - interface: see rts/RetainerSet.h
+//
+
+#endif /* STGRETAINERPROF_H */
diff --git a/ghc/includes/Updates.h b/ghc/includes/Updates.h
index d20332433f..3b2461b39b 100644
--- a/ghc/includes/Updates.h
+++ b/ghc/includes/Updates.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Updates.h,v 1.25 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: Updates.h,v 1.26 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -178,7 +178,9 @@ extern void awakenBlockedQueue(StgTSO *q);
------------------------------------------------------------------------- */
#if defined(PROFILING)
-#define PUSH_STD_CCCS(frame) frame->header.prof.ccs = CCCS
+// frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) is unnecessary
+// because it is not used anyhow.
+#define PUSH_STD_CCCS(frame) (frame->header.prof.ccs = CCCS)
#else
#define PUSH_STD_CCCS(frame)
#endif
diff --git a/ghc/rts/Exception.hc b/ghc/rts/Exception.hc
index f7b58878ce..8cb24e9b75 100644
--- a/ghc/rts/Exception.hc
+++ b/ghc/rts/Exception.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Exception.hc,v 1.21 2001/08/17 14:44:54 simonmar Exp $
+ * $Id: Exception.hc,v 1.22 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -260,8 +260,8 @@ CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_5_entry,RET_VEC(Sp[SP_OFF],5));
CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_6_entry,RET_VEC(Sp[SP_OFF],6));
CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_7_entry,RET_VEC(Sp[SP_OFF],7));
-#ifdef PROFILING
-#define CATCH_FRAME_BITMAP 7
+#if defined(PROFILING)
+#define CATCH_FRAME_BITMAP 15
#else
#define CATCH_FRAME_BITMAP 3
#endif
@@ -355,7 +355,7 @@ FN_(raisezh_fast)
* the info was only displayed for an *uncaught* exception.
*/
if (RtsFlags.ProfFlags.showCCSOnException) {
- STGCALL2(print_ccs,stderr,CCCS);
+ STGCALL2(fprintCCS,stderr,CCCS);
}
#endif
@@ -365,8 +365,18 @@ FN_(raisezh_fast)
* is the exception raise. It is used to overwrite all the
* thunks which are currently under evaluataion.
*/
+ /*
+ // @LDV profiling
+ // stg_raise_info has THUNK as its closure type. Since a THUNK takes at least
+ // MIN_UPD_SIZE words in its payload, MIN_UPD_SIZE is more approprate than 1.
+ // It seems that 1 does not cause any problem unless profiling is performed.
+ // However, when LDV profiling goes on, we need to linearly scan small object pool,
+ // where raise_closure is stored, so we should use MIN_UPD_SIZE.
raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
sizeofW(StgClosure)+1);
+ */
+ raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
+ sizeofW(StgClosure)+MIN_UPD_SIZE);
SET_HDR(raise_closure, &stg_raise_info, CCCS);
raise_closure->payload[0] = R1.cl;
diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c
index 3ecde2b5eb..8a9e2ace2a 100644
--- a/ghc/rts/GC.c
+++ b/ghc/rts/GC.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: GC.c,v 1.126 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: GC.c,v 1.127 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team 1998-1999
*
@@ -42,6 +42,9 @@
#include "FrontPanel.h"
#endif
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
+
/* STATIC OBJECT LIST.
*
* During GC:
@@ -602,6 +605,13 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
}
}
+#ifdef PROFILING
+ // We call processHeapClosureForDead() on every closure destroyed during
+ // the current garbage collection, so we invoke LdvCensusForDead().
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV)
+ LdvCensusForDead(N);
+#endif
+
// NO MORE EVACUATION AFTER THIS POINT!
// Finally: compaction of the oldest generation.
if (major_gc && oldest_gen->steps[0].is_compacted) {
@@ -933,6 +943,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
if (major_gc) { gcCAFs(); }
#endif
+#ifdef PROFILING
+ // resetStaticObjectForRetainerProfiling() must be called before
+ // zeroing below.
+ resetStaticObjectForRetainerProfiling();
+#endif
+
// zero the scavenged static object list
if (major_gc) {
zero_static_object_list(scavenged_static_objects);
@@ -963,7 +979,6 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
// restore enclosing cost centre
#ifdef PROFILING
- heapCensus();
CCCS = prev_CCS;
#endif
@@ -1271,6 +1286,10 @@ static __inline__ StgClosure *
copy(StgClosure *src, nat size, step *stp)
{
P_ to, from, dest;
+#ifdef PROFILING
+ // @LDV profiling
+ nat size_org = size;
+#endif
TICK_GC_WORDS_COPIED(size);
/* Find out where we're going, using the handy "to" pointer in
@@ -1300,6 +1319,12 @@ copy(StgClosure *src, nat size, step *stp)
dest = stp->hp;
stp->hp = to;
upd_evacuee(src,(StgClosure *)dest);
+#ifdef PROFILING
+ // @LDV profiling
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ SET_EVACUAEE_FOR_LDV(src, size_org);
+#endif
return (StgClosure *)dest;
}
@@ -1309,10 +1334,14 @@ copy(StgClosure *src, nat size, step *stp)
*/
-static __inline__ StgClosure *
+static StgClosure *
copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
{
P_ dest, to, from;
+#ifdef PROFILING
+ // @LDV profiling
+ nat size_to_copy_org = size_to_copy;
+#endif
TICK_GC_WORDS_COPIED(size_to_copy);
if (stp->gen_no < evac_gen) {
@@ -1334,6 +1363,17 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
dest = stp->hp;
stp->hp += size_to_reserve;
upd_evacuee(src,(StgClosure *)dest);
+#ifdef PROFILING
+ // @LDV profiling
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ // size_to_copy_org is wrong because the closure already occupies size_to_reserve
+ // words.
+ SET_EVACUAEE_FOR_LDV(src, size_to_reserve);
+ // fill the slop
+ if (size_to_reserve - size_to_copy_org > 0)
+ FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org));
+#endif
return (StgClosure *)dest;
}
@@ -2162,9 +2202,23 @@ scavenge(step *stp)
}
case IND_PERM:
- if (stp->gen_no != 0) {
- SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
- }
+ if (stp->gen->no != 0) {
+#ifdef PROFILING
+ // @LDV profiling
+ // No need to call LDV_recordDead_FILL_SLOP_DYNAMIC() because an
+ // IND_OLDGEN_PERM closure is larger than an IND_PERM closure.
+ LDV_recordDead((StgClosure *)p, sizeofW(StgInd));
+#endif
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+ //
+ SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that p has just been created.
+ LDV_recordCreate((StgClosure *)p);
+#endif
+ }
// fall through
case IND_OLDGEN_PERM:
((StgIndOldGen *)p)->indirectee =
@@ -3590,7 +3644,17 @@ threadLazyBlackHole(StgTSO *tso)
#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
belch("Unexpected lazy BHing required at 0x%04x",(int)bh);
#endif
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh is now dead.
+ LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
SET_INFO(bh,&stg_BLACKHOLE_info);
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh has just been created.
+ LDV_recordCreate(bh);
+#endif
}
update_frame = update_frame->link;
@@ -3832,7 +3896,20 @@ threadSqueezeStack(StgTSO *tso)
}
}
#endif
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh is now dead.
+ LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+ //
SET_INFO(bh,&stg_BLACKHOLE_info);
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh has just been created.
+ LDV_recordCreate(bh);
+#endif
}
}
diff --git a/ghc/rts/HeapStackCheck.hc b/ghc/rts/HeapStackCheck.hc
index 5fa5f100e8..52a998537b 100644
--- a/ghc/rts/HeapStackCheck.hc
+++ b/ghc/rts/HeapStackCheck.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: HeapStackCheck.hc,v 1.18 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: HeapStackCheck.hc,v 1.19 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -47,7 +47,6 @@
* ThreadRunGHC thread.
*/
-
#define GC_GENERIC \
if (Hp > HpLim) { \
Hp -= HpAlloc; \
diff --git a/ghc/rts/Itimer.c b/ghc/rts/Itimer.c
index 2ec3ea97c8..d1821f63d0 100644
--- a/ghc/rts/Itimer.c
+++ b/ghc/rts/Itimer.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Itimer.c,v 1.25 2001/11/21 20:55:10 sof Exp $
+ * $Id: Itimer.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1995-1999
*
@@ -142,6 +142,8 @@ initialize_virtual_timer(nat ms)
}
}
+ initProfTimer();
+
return 0;
}
@@ -158,6 +160,10 @@ initialize_virtual_timer(nat ms)
timestamp = getourtimeofday();
+#ifdef PROFILING
+ initProfTimer();
+#endif
+
it.it_value.tv_sec = ms / 1000;
it.it_value.tv_usec = 1000 * (ms - (1000 * it.it_value.tv_sec));
it.it_interval = it.it_value;
@@ -178,6 +184,8 @@ initialize_virtual_timer(nat ms)
timestamp = getourtimeofday();
+ initProfTimer();
+
se.sigev_notify = SIGEV_SIGNAL;
se.sigev_signo = SIGVTALRM;
se.sigev_value.sival_int = SIGVTALRM;
diff --git a/ghc/rts/Itimer.h b/ghc/rts/Itimer.h
index f3a185ad18..9de549cddd 100644
--- a/ghc/rts/Itimer.h
+++ b/ghc/rts/Itimer.h
@@ -1,7 +1,7 @@
/* -----------------------------------------------------------------------------
- * $Id: Itimer.h,v 1.8 2001/11/21 20:55:10 sof Exp $
+ * $Id: Itimer.h,v 1.9 2001/11/22 14:25:12 simonmar Exp $
*
- * (c) The GHC Team 1998-1999
+ * (c) The GHC Team 1998-2001
*
* Interval timer for profiling and pre-emptive scheduling.
*
@@ -15,11 +15,6 @@
*/
#define CS_MIN_MILLISECS TICK_MILLISECS /* milliseconds per slice */
-extern rtsBool do_prof_ticks; /* profiling ticks on/off */
-
-/* Total number of ticks since startup */
-extern lnat total_ticks;
-
int initialize_virtual_timer ( nat ms );
int install_vtalrm_handler ( void );
void block_vtalrm_signal ( void );
diff --git a/ghc/rts/LdvProfile.c b/ghc/rts/LdvProfile.c
new file mode 100644
index 0000000000..59a758ff6b
--- /dev/null
+++ b/ghc/rts/LdvProfile.c
@@ -0,0 +1,857 @@
+/* -----------------------------------------------------------------------------
+ * $Id: LdvProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Stg.h"
+#include "Rts.h"
+#include "LdvProfile.h"
+#include "RtsFlags.h"
+#include "Itimer.h"
+#include "Proftimer.h"
+#include "Profiling.h"
+#include "Stats.h"
+#include "Storage.h"
+#include "RtsUtils.h"
+#include "Schedule.h"
+
+/*
+ ldvTime stores the current LDV time, that is, the current era. It
+ is one larger than the number of times LDV profiling has been
+ performed, i.e.,
+ ldvTime - 1 == the number of time LDV profiling was executed
+ == the number of censuses made so far.
+ RESTRICTION:
+ ldvTime must be no longer than LDV_SHIFT (15 or 30) bits.
+ Invariants:
+ LDV profiling is turned off if ldvTime is 0.
+ LDV profiling is turned on if ldvTime is > 0.
+ ldvTime is initialized to 1 in initLdvProfiling().
+ If LDV profiling is not performed, ldvTime must remain 0 (e.g., when we
+ are doing retainer profiling).
+ ldvTime is set to 1 in initLdvProfiling().
+ ldvTime is set back to 0 in shutdownHaskell().
+ In the meanwhile, ldvTime increments.
+*/
+nat ldvTime = 0;
+#
+// ldvTimeSave is set in LdvCensusKillAll(), and stores the final number of
+// times that LDV profiling was proformed.
+static nat ldvTimeSave;
+
+// gi[] stores the statistics obtained at each heap census.
+// gi[0] is not used. See initLdvProfiling().
+LdvGenInfo *gi;
+
+#define giINCREMENT 32 // allocation unit for gi[]
+static nat giLength; // current length of gi[]
+
+// giMax is initialized to 2^LDV_SHIFT in initLdvProfiling().
+// When ldvTime reaches giMax, the profiling stops because a closure can
+// store only up to (giMax - 1) as its creation or last use time.
+static nat giMax;
+
+/* --------------------------------------------------------------------------
+ * Fills in the slop when a *dynamic* closure changes its type.
+ * First calls LDV_recordDead() to declare the closure is dead, and then
+ * fills in the slop.
+ *
+ * Invoked when:
+ * 1) blackholing, UPD_BH_UPDATABLE() and UPD_BH_SINGLE_ENTRY (in
+ * includes/StgMacros.h), threadLazyBlackHole() and
+ * threadSqueezeStack() (in GC.c).
+ * 2) updating with indirection closures, updateWithIndirection()
+ * and updateWithPermIndirection() (in Storage.h).
+ *
+ * LDV_recordDead_FILL_SLOP_DYNAMIC() is not called on 'inherently used'
+ * closures such as TSO. It is not called on PAP because PAP is not updatable.
+ * ----------------------------------------------------------------------- */
+void
+LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p )
+{
+ if (ldvTime > 0) {
+ StgInfoTable *inf = get_itbl((p));
+ nat nw, i;
+ switch (inf->type) {
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_SELECTOR:
+ nw = MIN_UPD_SIZE;
+ break;
+ case THUNK:
+ nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
+ if (nw < MIN_UPD_SIZE)
+ nw = MIN_UPD_SIZE;
+ break;
+ case AP_UPD:
+ nw = sizeofW(StgPAP) - sizeofW(StgHeader) + ((StgPAP *)p)->n_args;
+ break;
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
+ break;
+ default:
+ barf("Unexpected closure type %u in LDV_recordDead_FILL_SLOP_DYNAMIC()", inf->type);
+ break;
+ }
+ LDV_recordDead((StgClosure *)(p), nw + sizeofW(StgHeader));
+ for (i = 0; i < nw; i++) {
+ ((StgClosure *)(p))->payload[i] = 0;
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Initialize gi[ldvTime].
+ * ----------------------------------------------------------------------- */
+static inline void
+giInitForCurrentEra(void)
+{
+ gi[ldvTime].notUsed = 0;
+ gi[ldvTime].inherentlyUsed = 0;
+ gi[ldvTime].used = 0;
+
+ gi[ldvTime].voidNew = 0;
+ gi[ldvTime].dragNew = 0;
+}
+
+/* --------------------------------------------------------------------------
+ * Increases ldvTime by 1 and initialize gi[ldvTime].
+ * Reallocates gi[] and increases its size if needed.
+ * ----------------------------------------------------------------------- */
+static void
+incrementLdvTime( void )
+{
+ ldvTime++;
+
+ if (ldvTime == giMax) {
+ fprintf(stderr,
+ "Lag/Drag/Void profiling limit %u reached. "
+ "Please increase the profiling interval with -L option.\n",
+ giLength);
+ barf("Current profiling interval = %f seconds",
+ (float)RtsFlags.ProfFlags.profileInterval / 1000.0 );
+ }
+
+ if (ldvTime % giINCREMENT == 0) {
+ gi = stgReallocBytes(gi, sizeof(LdvGenInfo) * (giLength + giINCREMENT),
+ "incrementLdvTime");
+ giLength += giINCREMENT;
+ }
+
+ // What a stupid bug I struggled against for such a long time! I
+ // placed giInitForCurrentEra() before the above rellocation part,
+ // and it cost me three hours!
+ giInitForCurrentEra();
+}
+
+/* --------------------------------------------------------------------------
+ * Initialization code for LDV profiling.
+ * ----------------------------------------------------------------------- */
+void
+initLdvProfiling( void )
+{
+ nat p;
+
+ gi = stgMallocBytes(sizeof(LdvGenInfo) * giINCREMENT, "initLdvProfiling");
+ giLength = giINCREMENT;
+
+ ldvTime = 1; // turn on LDV profiling.
+ giInitForCurrentEra();
+
+ // giMax = 2^LDV_SHIFT
+ giMax = 1;
+ for (p = 0; p < LDV_SHIFT; p++)
+ giMax *= 2;
+}
+
+/* --------------------------------------------------------------------------
+ * This function must be called before f-closing prof_file.
+ * Still hp_file is open; see endHeapProfiling() in ProfHeap.c.
+ * ----------------------------------------------------------------------- */
+void
+endLdvProfiling( void )
+{
+ nat t;
+ int sumVoidNew, sumDragNew;
+
+ // Now we compute voidTotal and dragTotal of each LdvGenInfo structure.
+ sumVoidNew = 0;
+ sumDragNew = 0;
+ for (t = 0; t < ldvTimeSave; t++) {
+ sumVoidNew += gi[t].voidNew;
+ sumDragNew += gi[t].dragNew;
+ gi[t].voidTotal = sumVoidNew;
+ gi[t].dragTotal = sumDragNew;
+ }
+
+ // t = 0 is wrong (because ldvTime == 0 indicates LDV profiling is
+ // turned off.
+ for (t = 1;t < ldvTimeSave; t++) {
+ fprintf(hp_file, "MARK %f\n", gi[t].time);
+ fprintf(hp_file, "BEGIN_SAMPLE %f\n", gi[t].time);
+ fprintf(hp_file, "VOID\t%u\n", gi[t].voidTotal * sizeof(StgWord));
+ fprintf(hp_file, "LAG\t%u\n", (gi[t].notUsed - gi[t].voidTotal) * sizeof(StgWord));
+ fprintf(hp_file, "USE\t%u\n", (gi[t].used - gi[t].dragTotal) * sizeof(StgWord));
+ fprintf(hp_file, "INHERENT_USE\t%u\n", gi[t].inherentlyUsed * sizeof(StgWord));
+ fprintf(hp_file, "DRAG\t%u\n", gi[t].dragTotal * sizeof(StgWord));
+ fprintf(hp_file, "END_SAMPLE %f\n", gi[t].time);
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Print the statistics.
+ * This function is called after each retainer profiling.
+ * ----------------------------------------------------------------------- */
+static void
+outputLdvSet( void )
+{
+}
+
+/* --------------------------------------------------------------------------
+ * This function is eventually called on every object in the heap
+ * during a census. Any census is initiated immediately after a major
+ * garbage collection, and we exploit this fact in the implementation.
+ * If c is an 'inherently used' closure, gi[ldvTime].inherentlyUsed is
+ * updated. If c is an ordinary closure, either gi[ldvTime].notUsed or
+ * gi[ldvTime].used is updated.
+ * ----------------------------------------------------------------------- */
+static inline nat
+processHeapClosure(StgClosure *c)
+{
+ nat size;
+ StgInfoTable *info;
+
+ info = get_itbl(c);
+
+ ASSERT(
+ ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime &&
+ ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0
+ );
+ ASSERT(
+ ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
+ (
+ (LDVW(c) & LDV_LAST_MASK) <= ldvTime &&
+ (LDVW(c) & LDV_LAST_MASK) > 0
+ )
+ );
+
+ switch (info->type) {
+ /*
+ 'inherently used' cases: add to gi[ldvTime].inherentlyUsed
+ */
+
+ case TSO:
+ size = tso_sizeW((StgTSO *)c);
+ goto inherently_used;
+
+ case MVAR:
+ size = sizeofW(StgMVar);
+ goto inherently_used;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+ goto inherently_used;
+
+ case ARR_WORDS:
+ size = arr_words_sizeW((StgArrWords *)c);
+ goto inherently_used;
+
+ case WEAK:
+ case MUT_VAR:
+ case MUT_CONS:
+ case FOREIGN:
+ case BCO:
+ case STABLE_NAME:
+ size = sizeW_fromITBL(info);
+ goto inherently_used;
+
+ /*
+ ordinary cases: add to gi[ldvTime].notUsed if c is not being used.
+ add to gi[ldvTime].used if c is being used.
+ */
+ case THUNK:
+ size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+ break;
+
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_SELECTOR:
+ size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+ break;
+
+ case AP_UPD:
+ case PAP:
+ size = pap_sizeW((StgPAP *)c);
+ break;
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+
+ case BLACKHOLE_BQ:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ size = sizeW_fromITBL(info);
+ break;
+
+ case IND_PERM:
+ size = sizeofW(StgInd);
+ break;
+
+ case IND_OLDGEN_PERM:
+ size = sizeofW(StgIndOldGen);
+ break;
+
+ /*
+ Error case
+ */
+ case IND: // IND cannot appear after major GCs.
+ case IND_OLDGEN: // IND_OLDGEN cannot appear major GCs.
+ case EVACUATED: // EVACUATED is encountered only during GCs.
+ // static objects
+ case IND_STATIC:
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ case THUNK_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // others
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in processHeapClosure(): %d", info->type);
+ return 0;
+ }
+
+ /*
+ ordinary cases:
+ We can compute either gi[ldvTime].notUsed or gi[ldvTime].used; the other
+ can be computed from the total sum of costs.
+ At the moment, we choose to compute gi[ldvTime].notUsed, which seems to
+ be smaller than gi[ldvTime].used.
+ */
+
+ // ignore closures that don't satisfy our constraints.
+ if (closureSatisfiesConstraints(c)) {
+ if ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+ gi[ldvTime].notUsed += size - sizeofW(StgProfHeader);
+ else
+ gi[ldvTime].used += size - sizeofW(StgProfHeader);
+ }
+ return size;
+
+inherently_used:
+ // ignore closures that don't satisfy our constraints.
+ if (closureSatisfiesConstraints(c)) {
+ gi[ldvTime].inherentlyUsed += size - sizeofW(StgProfHeader);
+ }
+ return size;
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every closure in the heap blocks
+ * begining at bd during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processHeap( bdescr *bd )
+{
+ StgPtr p;
+ nat size;
+
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ size = processHeapClosure((StgClosure *)p);
+ p += size;
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every closure in the small object pool
+ * during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processSmallObjectPool( void )
+{
+ bdescr *bd;
+ StgPtr p;
+ nat size;
+
+ bd = small_alloc_list;
+
+ // first block
+ if (bd == NULL)
+ return;
+
+ p = bd->start;
+ while (p < alloc_Hp) {
+ size = processHeapClosure((StgClosure *)p);
+ p += size;
+ while (p < alloc_Hp && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == alloc_Hp);
+
+ bd = bd->link;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ size = processHeapClosure((StgClosure *)p);
+ p += size;
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every (large) closure in the object
+ * chain beginning at bd during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processChain( bdescr *bd )
+{
+ while (bd != NULL) {
+ // bd->free - bd->start is not an accurate measurement of the
+ // object size. Actually it is always zero, so we compute its
+ // size explicitly.
+ processHeapClosure((StgClosure *)bd->start);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Starts a census for LDV profiling.
+ * Invariants:
+ * Any call to LdvCensus() is preceded by a major garbage collection.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensus( void )
+{
+ nat g, s;
+
+ // ldvTime == 0 means that LDV profiling is currently turned off.
+ if (ldvTime == 0)
+ return;
+
+ stat_startLDV();
+ //
+ // Todo: when we perform LDV profiling, the Haskell mutator time seems to
+ // be affected by -S or -s runtime option. For instance, the
+ // following two options should result in nearly same
+ // profiling outputs, but the second run (without -Sstderr
+ // option) spends almost twice as long in the Haskell
+ // mutator as the first run:
+ //
+ // 1) +RTS -Sstderr -hL -RTS
+ // 2) +RTS -hL -RTS
+ //
+ // This is quite a subtle bug because this wierd phenomenon is not
+ // observed in retainer profiling, yet mut_user_time_during_LDV() is
+ // completely orthogonal to mut_user_time_during_RP(). However, the
+ // overall shapes of the resultant graphs are almost the same.
+ //
+ gi[ldvTime].time = mut_user_time_during_LDV();
+ if (RtsFlags.GcFlags.generations == 1) {
+ //
+ // Todo: support LDV for two-space garbage collection.
+ //
+ barf("Lag/Drag/Void profiling not supported with -G1");
+ } else {
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g == 0 && s == 0) {
+ // after a major GC, the nursery must be empty,
+ // and no need to call processNursery().
+ ASSERT(MainCapability.r.rNursery->start ==
+ MainCapability.r.rNursery->free);
+ processSmallObjectPool();
+ processChain(generations[g].steps[s].large_objects);
+ } else{
+ processHeap(generations[g].steps[s].blocks);
+ processChain(generations[g].steps[s].large_objects);
+ }
+ }
+ }
+ outputLdvSet(); // output to hp_file
+ stat_endLDV(); // output to prof_file
+
+ incrementLdvTime();
+}
+
+/* --------------------------------------------------------------------------
+ * This function is called eventually on every object destroyed during
+ * a garbage collection, whether it is a major garbage collection or
+ * not. If c is an 'inherently used' closure, nothing happens. If c
+ * is an ordinary closure, LDV_recordDead() is called on c with its
+ * proper size which excludes the profiling header portion in the
+ * closure. Returns the size of the closure, including the profiling
+ * header portion, so that the caller can find the next closure.
+ * ----------------------------------------------------------------------- */
+static inline nat
+processHeapClosureForDead( StgClosure *c )
+{
+ nat size;
+ StgInfoTable *info;
+
+ info = get_itbl(c);
+
+ if (info->type != EVACUATED) {
+ ASSERT(((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime &&
+ ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0);
+ ASSERT(((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
+ (
+ (LDVW(c) & LDV_LAST_MASK) <= ldvTime &&
+ (LDVW(c) & LDV_LAST_MASK) > 0
+ ));
+ }
+
+ switch (info->type) {
+ /*
+ 'inherently used' cases: do nothing.
+ */
+
+ case TSO:
+ size = tso_sizeW((StgTSO *)c);
+ return size;
+
+ case MVAR:
+ size = sizeofW(StgMVar);
+ return size;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+ return size;
+
+ case ARR_WORDS:
+ size = arr_words_sizeW((StgArrWords *)c);
+ return size;
+
+ case WEAK:
+ case MUT_VAR:
+ case MUT_CONS:
+ case FOREIGN:
+ case BCO:
+ case STABLE_NAME:
+ size = sizeW_fromITBL(info);
+ return size;
+
+ /*
+ ordinary cases: call LDV_recordDead().
+ */
+
+ case THUNK:
+ size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+ break;
+
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_SELECTOR:
+ size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+ break;
+
+ case AP_UPD:
+ case PAP:
+ size = pap_sizeW((StgPAP *)c);
+ break;
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+
+ case BLACKHOLE_BQ:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ size = sizeW_fromITBL(info);
+ break;
+
+ case IND_PERM:
+ size = sizeofW(StgInd);
+ break;
+
+ case IND_OLDGEN_PERM:
+ size = sizeofW(StgIndOldGen);
+ break;
+
+ /*
+ 'Ingore' cases
+ */
+ // Why can we ignore IND/IND_OLDGEN closures? We assume that
+ // any census is preceded by a major garbage collection, which
+ // IND/IND_OLDGEN closures cannot survive. Therefore, it is no
+ // use considering IND/IND_OLDGEN closures in the meanwhile
+ // because they will perish before the next census at any
+ // rate.
+ case IND:
+ size = sizeofW(StgInd);
+ return size;
+
+ case IND_OLDGEN:
+ size = sizeofW(StgIndOldGen);
+ return size;
+
+ case EVACUATED:
+ // The size of the evacuated closure is currently stored in
+ // the LDV field. See SET_EVACUAEE_FOR_LDV() in
+ // includes/StgLdvProf.h.
+ return LDVW(c);
+
+ /*
+ Error case
+ */
+ // static objects
+ case IND_STATIC:
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ case THUNK_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // others
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in processHeapClosureForDead(): %d", info->type);
+ return 0;
+ }
+
+ // Found a dead closure: record its size
+ LDV_recordDead(c, size);
+ return size;
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the
+ * heap blocks starting at bd.
+ * ----------------------------------------------------------------------- */
+static void
+processHeapForDead( bdescr *bd )
+{
+ StgPtr p;
+
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the nursery.
+ * ----------------------------------------------------------------------- */
+static void
+processNurseryForDead( void )
+{
+ StgPtr p, bdLimit;
+ bdescr *bd;
+
+ bd = MainCapability.r.rNursery;
+ while (bd->start < bd->free) {
+ p = bd->start;
+ bdLimit = bd->start + BLOCK_SIZE_W;
+ while (p < bd->free && p < bdLimit) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && p < bdLimit && !*p) // skip slop
+ p++;
+ }
+ bd = bd->link;
+ if (bd == NULL)
+ break;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the
+ * small object pool.
+ * ----------------------------------------------------------------------- */
+static void
+processSmallObjectPoolForDead( void )
+{
+ bdescr *bd;
+ StgPtr p;
+
+ bd = small_alloc_list;
+
+ // first block
+ if (bd == NULL)
+ return;
+
+ p = bd->start;
+ while (p < alloc_Hp) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < alloc_Hp && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == alloc_Hp);
+
+ bd = bd->link;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the closure
+ * chain.
+ * ----------------------------------------------------------------------- */
+static void
+processChainForDead( bdescr *bd )
+{
+ // Any object still in the chain is dead!
+ while (bd != NULL) {
+ processHeapClosureForDead((StgClosure *)bd->start);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Start a census for *dead* closures, and calls
+ * processHeapClosureForDead() on every closure which died in the
+ * current garbage collection. This function is called from a garbage
+ * collector right before tidying up, when all dead closures are still
+ * stored in the heap and easy to identify. Generations 0 through N
+ * have just beed garbage collected.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensusForDead( nat N )
+{
+ nat g, s;
+
+ // ldvTime == 0 means that LDV profiling is currently turned off.
+ if (ldvTime == 0)
+ return;
+
+ if (RtsFlags.GcFlags.generations == 1) {
+ //
+ // Todo: support LDV for two-space garbage collection.
+ //
+ barf("Lag/Drag/Void profiling not supported with -G1");
+ } else {
+ for (g = 0; g <= N; g++)
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g == 0 && s == 0) {
+ processSmallObjectPoolForDead();
+ processNurseryForDead();
+ processChainForDead(generations[g].steps[s].large_objects);
+ } else{
+ processHeapForDead(generations[g].steps[s].blocks);
+ processChainForDead(generations[g].steps[s].large_objects);
+ }
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Regard any closure in the current heap as dead or moribund and update
+ * LDV statistics accordingly.
+ * Called from shutdownHaskell() in RtsStartup.c.
+ * Also, stops LDV profiling by resetting ldvTime to 0.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensusKillAll( void )
+{
+ LdvCensusForDead(RtsFlags.GcFlags.generations - 1);
+
+ // record the time when LDV profiling stops.
+ ldvTimeSave = ldvTime;
+
+ // and, stops LDV profiling.
+ ldvTime = 0;
+}
+
+#endif /* PROFILING */
diff --git a/ghc/rts/LdvProfile.h b/ghc/rts/LdvProfile.h
new file mode 100644
index 0000000000..b722fbc8ae
--- /dev/null
+++ b/ghc/rts/LdvProfile.h
@@ -0,0 +1,63 @@
+/* -----------------------------------------------------------------------------
+ * $Id: LdvProfile.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef LDVPROFILE_H
+#define LDVPROFILE_H
+
+#ifdef PROFILING
+
+#include "ProfHeap.h"
+
+void LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p );
+
+// Precesses a closure 'c' being destroyed whose size is 'size'.
+// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
+// such as TSO; they should not be involved in computing dragNew or voidNew.
+//
+// Note: ldvTime is 0 if LDV profiling is turned off.
+// ldvTime is > 0 if LDV profiling is turned on.
+// size does not include StgProfHeader.
+//
+// Even though ldvTime is checked in both LdvCensusForDead() and
+// LdvCensusKillAll(), we still need to make sure that ldvTime is > 0 because
+// LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
+// when a thunk is replaced by an indirection object.
+
+static inline void
+LDV_recordDead( StgClosure *c, nat size )
+{
+ if (ldvTime > 0 && closureSatisfiesConstraints(c)) {
+ nat t;
+ size -= sizeofW(StgProfHeader);
+ if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
+ t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
+ if (t < ldvTime) {
+ gi[t].voidNew += (int)size;
+ gi[ldvTime].voidNew -= (int)size;
+ }
+ } else {
+ t = LDVW((c)) & LDV_LAST_MASK;
+ if (t + 1 < ldvTime) {
+ gi[t + 1].dragNew += size;
+ gi[ldvTime].dragNew -= size;
+ }
+ }
+ }
+}
+
+extern void initLdvProfiling ( void );
+extern void endLdvProfiling ( void );
+extern void LdvCensus ( void );
+extern void LdvCensusForDead ( nat );
+extern void LdvCensusKillAll ( void );
+
+#endif /* PROFILING */
+
+#endif /* LDVPROFILE_H */
diff --git a/ghc/rts/PrimOps.hc b/ghc/rts/PrimOps.hc
index d36c18e0af..2036768e85 100644
--- a/ghc/rts/PrimOps.hc
+++ b/ghc/rts/PrimOps.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: PrimOps.hc,v 1.84 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: PrimOps.hc,v 1.85 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -420,7 +420,25 @@ FN_(finalizzeWeakzh_fast)
}
/* kill it */
+#ifdef PROFILING
+ // @LDV profiling
+ // A weak pointer is inherently used, so we do not need to call
+ // LDV_recordDead_FILL_SLOP_DYNAMIC():
+ // LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
+ // or, LDV_recordDead():
+ // LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
+ // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
+ // large as weak pointers, so there is no need to fill the slop, either.
+ // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
+#endif
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+ //
w->header.info = &stg_DEAD_WEAK_info;
+#ifdef PROFILING
+ // @LDV profiling
+ LDV_recordCreate((StgClosure *)w);
+#endif
f = ((StgWeak *)w)->finalizer;
w->link = ((StgWeak *)w)->link;
diff --git a/ghc/rts/ProfHeap.c b/ghc/rts/ProfHeap.c
index 5597792d77..fc4f421012 100644
--- a/ghc/rts/ProfHeap.c
+++ b/ghc/rts/ProfHeap.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: ProfHeap.c,v 1.25 2001/08/14 13:40:09 sewardj Exp $
+ * $Id: ProfHeap.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -25,6 +25,8 @@
#include "Stats.h"
#include "Hash.h"
#include "StrHash.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
#ifdef DEBUG_HEAP_PROF
#include "Printer.h"
@@ -95,7 +97,7 @@ strToCtr(const char *str)
for (; ctr != NULL; prev = ctr, ctr = ctr->next_bucket ) {
if (!strcmp(ctr->str, str)) {
insertHashTable( str_to_ctr, (W_)str, ctr );
-#ifdef DEBUG
+#ifdef DEBUG_CTR
fprintf(stderr,"strToCtr: existing ctr for `%s'\n",str);
#endif
return ctr;
@@ -109,7 +111,7 @@ strToCtr(const char *str)
ctr->next = all_ctrs;
all_ctrs = ctr;
-#ifdef DEBUG
+#ifdef DEBUG_CTR
fprintf(stderr,"strToCtr: new ctr for `%s'\n",str);
#endif
@@ -175,23 +177,17 @@ initHeapProfiling(void)
fprintf(hp_file, "JOB \"%s", prog_argv[0]);
-# ifdef PROFILING
- switch (RtsFlags.ProfFlags.doHeapProfile) {
- case HEAP_BY_CCS: fprintf(hp_file, " -h%c", CCchar); break;
- case HEAP_BY_MOD: fprintf(hp_file, " -h%c", MODchar); break;
- case HEAP_BY_DESCR: fprintf(hp_file, " -h%c", DESCRchar); break;
- case HEAP_BY_TYPE: fprintf(hp_file, " -h%c", TYPEchar); break;
- default: /* nothing */
+#ifdef PROFILING
+ {
+ int count;
+ for(count = 1; count < prog_argc; count++)
+ fprintf(hp_file, " %s", prog_argv[count]);
+ fprintf(hp_file, " +RTS ");
+ for(count = 0; count < rts_argc; count++)
+ fprintf(hp_file, "%s ", rts_argv[count]);
+ fprintf(hp_file, "\n");
}
- if (RtsFlags.ProfFlags.ccSelector)
- fprintf(hp_file, " -hc{%s}", RtsFlags.ProfFlags.ccSelector);
- if (RtsFlags.ProfFlags.modSelector)
- fprintf(hp_file, " -hm{%s}", RtsFlags.ProfFlags.modSelector);
- if (RtsFlags.ProfFlags.descrSelector)
- fprintf(hp_file, " -hd{%s}", RtsFlags.ProfFlags.descrSelector);
- if (RtsFlags.ProfFlags.typeSelector)
- fprintf(hp_file, " -hy{%s}", RtsFlags.ProfFlags.typeSelector);
-# endif /* PROFILING */
+#endif /* PROFILING */
fprintf(hp_file, "\"\n" );
@@ -224,6 +220,17 @@ endHeapProfiling(void)
return;
}
+#ifdef PROFILING
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_RETAINER:
+ endRetainerProfiling();
+ break;
+ case HEAP_BY_LDV:
+ endLdvProfiling();
+ break;
+ }
+#endif
+
seconds = mut_user_time();
fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", seconds);
fprintf(hp_file, "END_SAMPLE %0.2f\n", seconds);
@@ -417,24 +424,48 @@ clearCCSResid(CostCentreStack *ccs)
}
static void
-fprint_ccs(FILE *fp, CostCentreStack *ccs, nat components)
+fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
{
- CostCentre *cc;
- CostCentreStack *prev;
+ char buf[max_length+1];
+ nat next_offset = 0;
+ nat written;
+ char *template;
+
+ // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
+ if (ccs == CCS_MAIN) {
+ fprintf(fp, "MAIN");
+ return;
+ }
- cc = ccs->cc;
- prev = ccs->prevStack;
+ // keep printing components of the stack until we run out of space
+ // in the buffer. If we run out of space, end with "...".
+ for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
- if (prev == NULL
- || prev->cc->is_caf != CC_IS_BORING
- || components == 1) {
- fprintf(fp,"%s",cc->label);
- return;
+ // CAF cost centres print as M.CAF, but we leave the module
+ // name out of all the others to save space.
+ if (!strcmp(ccs->cc->label,"CAF")) {
+ written = snprintf(buf+next_offset,
+ (int)max_length-3-(int)next_offset,
+ "%s.CAF", ccs->cc->module);
+ } else {
+ if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
+ template = "%s/";
+ } else {
+ template = "%s";
+ }
+ written = snprintf(buf+next_offset,
+ (int)max_length-3-(int)next_offset,
+ template, ccs->cc->label);
+ }
- } else {
- fprint_ccs(fp, ccs->prevStack,components-1);
- fprintf(fp,"/%s",cc->label);
- }
+ if (next_offset+written >= max_length-4) {
+ sprintf(buf+max_length-4, "...");
+ break;
+ } else {
+ next_offset += written;
+ }
+ }
+ fprintf(fp, "%s", buf);
}
static void
@@ -444,7 +475,8 @@ reportCCSResid(FILE *fp, CostCentreStack *ccs)
if (ccs->mem_resid != 0) {
fprintf(fp," ");
- fprint_ccs(fp,ccs,2/*print 2 components only*/);
+ // print as much of the CCS as possible in 20 chars, ending with "..."
+ fprint_ccs(fp,ccs,30);
fprintf(fp," %ld\n", ccs->mem_resid * sizeof(W_));
}
@@ -455,75 +487,190 @@ reportCCSResid(FILE *fp, CostCentreStack *ccs)
}
}
-static
-rtsBool str_matches_selector ( char* str, char* sel )
+static rtsBool
+str_matches_selector( char* str, char* sel )
{
char* p;
- /* fprintf(stderr, "str_matches_selector %s %s\n", str, sel); */
+ // fprintf(stderr, "str_matches_selector %s %s\n", str, sel);
while (1) {
- /* Compare str against wherever we've got to in sel. */
- p = str;
- while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
- p++; sel++;
- }
- /* Match if all of str used and have reached the end of a sel
- fragment. */
- if (*p == '\0' && (*sel == ',' || *sel == '\0'))
- return rtsTrue;
-
- /* No match. Advance sel to the start of the next elem. */
- while (*sel != ',' && *sel != '\0') sel++;
- if (*sel == ',') sel++;
-
- /* Run out of sel ?? */
- if (*sel == '\0') return rtsFalse;
+ // Compare str against wherever we've got to in sel.
+ p = str;
+ while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
+ p++; sel++;
+ }
+ // Match if all of str used and have reached the end of a sel fragment.
+ if (*p == '\0' && (*sel == ',' || *sel == '\0'))
+ return rtsTrue;
+
+ // No match. Advance sel to the start of the next elem.
+ while (*sel != ',' && *sel != '\0') sel++;
+ if (*sel == ',') sel++;
+
+ /* Run out of sel ?? */
+ if (*sel == '\0') return rtsFalse;
}
}
-/* Figure out whether a closure should be counted in this census, by
- testing against all the specified constraints. */
-static
-rtsBool satisfies_constraints ( StgClosure* p )
+// Figure out whether a closure should be counted in this census, by
+// testing against all the specified constraints.
+rtsBool
+closureSatisfiesConstraints( StgClosure* p )
{
rtsBool b;
if (RtsFlags.ProfFlags.modSelector) {
- b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module,
- RtsFlags.ProfFlags.modSelector );
- if (!b) return rtsFalse;
+ b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module,
+ RtsFlags.ProfFlags.modSelector );
+ if (!b) return rtsFalse;
}
if (RtsFlags.ProfFlags.descrSelector) {
- b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc,
- RtsFlags.ProfFlags.descrSelector );
- if (!b) return rtsFalse;
+ b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc,
+ RtsFlags.ProfFlags.descrSelector );
+ if (!b) return rtsFalse;
}
if (RtsFlags.ProfFlags.typeSelector) {
- b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type,
+ b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type,
RtsFlags.ProfFlags.typeSelector );
- if (!b) return rtsFalse;
+ if (!b) return rtsFalse;
}
if (RtsFlags.ProfFlags.ccSelector) {
- b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label,
- RtsFlags.ProfFlags.ccSelector );
- if (!b) return rtsFalse;
+ b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label,
+ RtsFlags.ProfFlags.ccSelector );
+ if (!b) return rtsFalse;
}
return rtsTrue;
}
#endif /* PROFILING */
+/* -----------------------------------------------------------------------------
+ * Code to perform a heap census.
+ * -------------------------------------------------------------------------- */
+static void
+heapCensusChain( bdescr *bd )
+{
+ StgPtr p;
+ StgInfoTable *info;
+ nat size;
+#ifdef PROFILING
+ nat real_size;
+#endif
-static double time_of_last_heapCensus = 0.0;
+ for (; bd != NULL; bd = bd->link) {
+ p = bd->start;
+ while (p < bd->free) {
+ info = get_itbl((StgClosure *)p);
+
+ switch (info->type) {
+
+ case CONSTR:
+ case BCO:
+ case FUN:
+ case THUNK:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ case MVAR:
+ case MUT_VAR:
+ case MUT_CONS:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_2_0:
+ size = sizeW_fromITBL(info);
+ break;
+
+ case THUNK_1_0: /* ToDo - shouldn't be here */
+ case THUNK_0_1: /* " ditto " */
+ case THUNK_SELECTOR:
+ size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+ break;
+
+ case PAP:
+ case AP_UPD:
+ size = pap_sizeW((StgPAP *)p);
+ break;
+
+ case ARR_WORDS:
+ size = arr_words_sizeW(stgCast(StgArrWords*,p));
+ break;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
+ break;
+
+ case TSO:
+ size = tso_sizeW((StgTSO *)p);
+ break;
+
+ default:
+ barf("heapCensus");
+ }
+
+#ifdef DEBUG_HEAP_PROF
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_INFOPTR:
+ add_data((void *)(*p), size * sizeof(W_));
+ break;
+ case HEAP_BY_CLOSURE_TYPE:
+ closure_types[info->type] += size * sizeof(W_);
+ break;
+ }
+#endif
+
+#ifdef PROFILING
+ // subtract the profiling overhead
+ real_size = size - sizeofW(StgProfHeader);
+
+ if (closureSatisfiesConstraints((StgClosure*)p)) {
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_CCS:
+ ((StgClosure *)p)->header.prof.ccs->mem_resid += real_size;
+ break;
+ case HEAP_BY_MOD:
+ strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module)
+ ->mem_resid += real_size;
+ break;
+ case HEAP_BY_DESCR:
+ strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid
+ += real_size;
+ break;
+ case HEAP_BY_TYPE:
+ strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid
+ += real_size;
+ break;
+ default:
+ barf("heapCensus; doHeapProfile");
+ }
+ }
+#endif
+ p += size;
+ }
+ }
+}
void
-heapCensus(void)
+heapCensus( void )
{
- bdescr *bd;
- const StgInfoTable *info;
StgDouble time;
- nat size;
- StgPtr p;
-#ifdef PROFILING
- nat elapsed;
-#endif
+ nat g, s;
#ifdef DEBUG_HEAP_PROF
switch (RtsFlags.ProfFlags.doHeapProfile) {
@@ -542,21 +689,6 @@ heapCensus(void)
#endif
#ifdef PROFILING
- /*
- * We only continue iff we've waited long enough,
- * otherwise, we just dont do the census.
- */
-
- time = mut_user_time_during_GC();
- elapsed = (time - time_of_last_heapCensus) * 1000;
- if (elapsed < RtsFlags.ProfFlags.profileFrequency) {
- return;
- }
- time_of_last_heapCensus = time;
-#endif
-
-
-#ifdef PROFILING
switch (RtsFlags.ProfFlags.doHeapProfile) {
case NO_HEAP_PROFILING:
return;
@@ -574,136 +706,27 @@ heapCensus(void)
}
#endif
- /* Only do heap profiling in a two-space heap */
- ASSERT(RtsFlags.GcFlags.generations == 1);
- bd = g0s0->to_blocks;
-
+ time = mut_user_time_during_GC();
fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", time);
-
- while (bd != NULL) {
- p = bd->start;
- while (p < bd->free) {
- info = get_itbl((StgClosure *)p);
-
- switch (info->type) {
-
- case CONSTR:
- if (((StgClosure *)p)->header.info == &stg_DEAD_WEAK_info
- && !(LOOKS_LIKE_GHC_INFO(*(p + sizeW_fromITBL(info))))) {
- size = sizeofW(StgWeak);
- break;
- }
- /* else, fall through... */
-
- case BCO:
- case FUN:
- case THUNK:
- case IND_PERM:
- case IND_OLDGEN_PERM:
- case CAF_BLACKHOLE:
- case SE_CAF_BLACKHOLE:
- case SE_BLACKHOLE:
- case BLACKHOLE:
- case BLACKHOLE_BQ:
- case WEAK:
- case FOREIGN:
- case STABLE_NAME:
- case MVAR:
- case MUT_VAR:
- case CONSTR_INTLIKE:
- case CONSTR_CHARLIKE:
- case FUN_1_0:
- case FUN_0_1:
- case FUN_1_1:
- case FUN_0_2:
- case FUN_2_0:
- case THUNK_1_1:
- case THUNK_0_2:
- case THUNK_2_0:
- case CONSTR_1_0:
- case CONSTR_0_1:
- case CONSTR_1_1:
- case CONSTR_0_2:
- case CONSTR_2_0:
- size = sizeW_fromITBL(info);
- break;
-
- case THUNK_1_0: /* ToDo - shouldn't be here */
- case THUNK_0_1: /* " ditto " */
- case THUNK_SELECTOR:
- size = sizeofW(StgHeader) + MIN_UPD_SIZE;
- break;
-
- case AP_UPD: /* we can treat this as being the same as a PAP */
- case PAP:
- size = pap_sizeW((StgPAP *)p);
- break;
-
- case ARR_WORDS:
- size = arr_words_sizeW(stgCast(StgArrWords*,p));
- break;
-
- case MUT_ARR_PTRS:
- case MUT_ARR_PTRS_FROZEN:
- size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
- break;
-
- case TSO:
- size = tso_sizeW((StgTSO *)p);
- break;
-
- default:
- barf("heapCensus");
- }
-
-#ifdef DEBUG_HEAP_PROF
- switch (RtsFlags.ProfFlags.doHeapProfile) {
- case HEAP_BY_INFOPTR:
- add_data((void *)(*p), size * sizeof(W_));
- break;
- case HEAP_BY_CLOSURE_TYPE:
- closure_types[info->type] += size * sizeof(W_);
- break;
- }
-#endif
-# ifdef PROFILING
- if (satisfies_constraints((StgClosure*)p)) {
- switch (RtsFlags.ProfFlags.doHeapProfile) {
- case HEAP_BY_CCS:
- ((StgClosure *)p)->header.prof.ccs->mem_resid += size;
- break;
- case HEAP_BY_MOD:
- strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module)
- ->mem_resid += size;
- break;
- case HEAP_BY_DESCR:
- strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid
- += size;
- break;
- case HEAP_BY_TYPE:
- strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid
- += size;
- break;
- default:
- barf("heapCensus; doHeapProfile");
- }
+ if (RtsFlags.GcFlags.generations == 1) {
+ heapCensusChain( g0s0->to_blocks );
+ } else {
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ heapCensusChain( generations[g].steps[s].blocks );
+ }
}
-# endif
-
- p += size;
- }
- bd = bd->link;
}
#ifdef DEBUG_HEAP_PROF
switch (RtsFlags.ProfFlags.doHeapProfile) {
case HEAP_BY_INFOPTR:
- fprint_data(hp_file);
- break;
+ fprint_data(hp_file);
+ break;
case HEAP_BY_CLOSURE_TYPE:
- fprint_closure_types(hp_file);
- break;
+ fprint_closure_types(hp_file);
+ break;
}
#endif
diff --git a/ghc/rts/ProfHeap.h b/ghc/rts/ProfHeap.h
index 270dc5573e..852a82878d 100644
--- a/ghc/rts/ProfHeap.h
+++ b/ghc/rts/ProfHeap.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: ProfHeap.h,v 1.1 1999/09/15 13:46:29 simonmar Exp $
+ * $Id: ProfHeap.h,v 1.2 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -8,6 +8,7 @@
* ---------------------------------------------------------------------------*/
-void heapCensus(void);
-extern nat initHeapProfiling(void);
-void endHeapProfiling(void);
+extern void heapCensus( void );
+extern nat initHeapProfiling( void );
+extern void endHeapProfiling( void );
+extern rtsBool closureSatisfiesConstraints( StgClosure* p );
diff --git a/ghc/rts/Profiling.c b/ghc/rts/Profiling.c
index a8cf7a4e1d..fc863e96e8 100644
--- a/ghc/rts/Profiling.c
+++ b/ghc/rts/Profiling.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Profiling.c,v 1.24 2001/10/18 14:41:01 simonmar Exp $
+ * $Id: Profiling.c,v 1.25 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -19,6 +19,8 @@
#include "Itimer.h"
#include "ProfHeap.h"
#include "Arena.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
/*
* Profiling allocation arena.
@@ -144,9 +146,6 @@ static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
-#ifdef DEBUG
-static void printCCS ( CostCentreStack *ccs );
-#endif
static void initTimeProfiling ( void );
static void initProfilingLogFile( void );
@@ -195,6 +194,15 @@ initProfiling1 (void)
/* cost centres are registered by the per-module
* initialisation code now...
*/
+
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_RETAINER:
+ initRetainerProfiling();
+ break;
+ case HEAP_BY_LDV:
+ initLdvProfiling();
+ break;
+ }
}
void
@@ -242,6 +250,13 @@ initProfilingLogFile(void)
if ((prof_file = fopen(prof_filename, "w")) == NULL) {
fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
RtsFlags.CcFlags.doCostCentres = 0;
+ // @retainer profiling
+ // @LDV profiling
+ // The following line was added by Sung; retainer/LDV profiling may need
+ // two output files, i.e., <program>.prof/hp.
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER ||
+ RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV)
+ RtsFlags.ProfFlags.doHeapProfile = 0;
return;
}
@@ -328,7 +343,7 @@ PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
{
IF_DEBUG(prof,
fprintf(stderr,"Pushing %s on ", cc->label);
- printCCS(ccs);
+ fprintCCS(stderr,ccs);
fprintf(stderr,"\n"));
return PushCostCentre(ccs,cc);
}
@@ -390,9 +405,9 @@ AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
IF_DEBUG(prof,
if (ccs1 != ccs2) {
fprintf(stderr,"Appending ");
- printCCS(ccs1);
+ fprintCCS(stderr,ccs1);
fprintf(stderr," to ");
- printCCS(ccs2);
+ fprintCCS(stderr,ccs2);
fprintf(stderr,"\n");});
return AppendCCS(ccs1,ccs2);
}
@@ -688,7 +703,11 @@ report_ccs_profiling( void )
fprint_header();
reportCCS(pruneCCSTree(CCS_MAIN), 0);
- fclose(prof_file);
+ // @retainer profiling
+ // @LDV profiling
+ // Now, prof_file is closed in shutdownHaskell() because this file
+ // is also used for retainer/LDV profiling. See shutdownHaskell().
+ // fclose(prof_file);
}
static void
@@ -862,39 +881,16 @@ reportCCS_XML(CostCentreStack *ccs)
}
void
-print_ccs (FILE *fp, CostCentreStack *ccs)
-{
- if (ccs == CCCS) {
- fprintf(fp, "Cost-Centre Stack: ");
- }
-
- if (ccs != CCS_MAIN)
- {
- print_ccs(fp, ccs->prevStack);
- fprintf(fp, "->[%s,%s]", ccs->cc->label, ccs->cc->module);
- } else {
- fprintf(fp, "[%s,%s]", ccs->cc->label, ccs->cc->module);
- }
-
- if (ccs == CCCS) {
- fprintf(fp, "\n");
- }
-}
-
-
-#ifdef DEBUG
-static void
-printCCS ( CostCentreStack *ccs )
+fprintCCS( FILE *f, CostCentreStack *ccs )
{
- fprintf(stderr,"<");
- for (; ccs; ccs = ccs->prevStack ) {
- fprintf(stderr,ccs->cc->label);
- if (ccs->prevStack) {
- fprintf(stderr,",");
- }
+ fprintf(f,"<");
+ for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
+ fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
+ if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
+ fprintf(f,",");
+ }
}
- fprintf(stderr,">");
+ fprintf(f,">");
}
-#endif
#endif /* PROFILING */
diff --git a/ghc/rts/Profiling.h b/ghc/rts/Profiling.h
index d75d6613bb..52db2da05e 100644
--- a/ghc/rts/Profiling.h
+++ b/ghc/rts/Profiling.h
@@ -1,5 +1,6 @@
/* -----------------------------------------------------------------------------
- * $Id: Profiling.h,v 1.2 2000/04/19 12:42:48 simonmar Exp $
+
+ * $Id: Profiling.h,v 1.3 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -30,4 +31,9 @@ void print_ccs (FILE *, CostCentreStack *);
extern rtsBool time_profiling;
+extern lnat total_prof_ticks;
+
+extern void fprintCCS( FILE *f, CostCentreStack *ccs );
+
+
#endif
diff --git a/ghc/rts/Proftimer.c b/ghc/rts/Proftimer.c
index 42766d3df0..390dd6953b 100644
--- a/ghc/rts/Proftimer.c
+++ b/ghc/rts/Proftimer.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Proftimer.c,v 1.7 2001/08/14 13:40:09 sewardj Exp $
+ * $Id: Proftimer.c,v 1.8 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -10,28 +10,65 @@
#if defined (PROFILING)
#include "PosixSource.h"
+
#include "Rts.h"
#include "Profiling.h"
#include "Itimer.h"
#include "Proftimer.h"
+#include "RtsFlags.h"
+
+static rtsBool do_prof_ticks = rtsFalse; // enable profiling ticks
+static rtsBool do_heap_prof_ticks = rtsFalse; // enable heap profiling ticks
-rtsBool do_prof_ticks = rtsFalse; /* enable profiling ticks */
+// Number of ticks until next heap census
+static int ticks_to_heap_profile;
+
+// Time for a heap profile on the next context switch
+rtsBool performHeapProfile;
void
-stopProfTimer(void)
-{ /* Stops time profile */
+stopProfTimer( void )
+{
if (time_profiling) {
do_prof_ticks = rtsFalse;
}
-};
+}
void
-startProfTimer(void)
-{ /* Starts time profile */
+startProfTimer( void )
+{
if (time_profiling) {
do_prof_ticks = rtsTrue;
}
-};
+}
+
+void
+stopHeapProfTimer( void )
+{
+ do_heap_prof_ticks = rtsFalse;
+}
+
+void
+startHeapProfTimer( void )
+{
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ do_heap_prof_ticks = rtsTrue;
+ }
+}
+
+void
+initProfTimer( void )
+{
+ performHeapProfile = rtsFalse;
+
+ RtsFlags.ProfFlags.profileIntervalTicks =
+ RtsFlags.ProfFlags.profileInterval / TICK_MILLISECS;
+
+ ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks;
+
+ startHeapProfTimer();
+}
+
void
handleProfTick(void)
@@ -39,5 +76,14 @@ handleProfTick(void)
if (do_prof_ticks) {
CCS_TICK(CCCS);
}
+
+ if (do_heap_prof_ticks) {
+ ticks_to_heap_profile--;
+ if (ticks_to_heap_profile <= 0) {
+ ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks;
+ performHeapProfile = rtsTrue;
+ }
+ }
}
+
#endif /* PROFILING */
diff --git a/ghc/rts/Proftimer.h b/ghc/rts/Proftimer.h
index 231f8da4e1..1ddfc50072 100644
--- a/ghc/rts/Proftimer.h
+++ b/ghc/rts/Proftimer.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Proftimer.h,v 1.5 2000/04/03 15:54:50 simonmar Exp $
+ * $Id: Proftimer.h,v 1.6 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998
*
@@ -7,10 +7,12 @@
*
* ---------------------------------------------------------------------------*/
-extern rtsBool do_prof_ticks;
-extern lnat total_prof_ticks;
+extern void initProfTimer ( void );
+extern void handleProfTick ( void );
-extern void initProfTimer(nat ms);
-extern void stopProfTimer(void);
-extern void startProfTimer(void);
-extern void handleProfTick(void);
+extern void stopProfTimer ( void );
+extern void startProfTimer ( void );
+extern void stopHeapProfTimer ( void );
+extern void startHeapProfTimer ( void );
+
+extern rtsBool performHeapProfile;
diff --git a/ghc/rts/RetainerProfile.c b/ghc/rts/RetainerProfile.c
new file mode 100644
index 0000000000..f811d73aab
--- /dev/null
+++ b/ghc/rts/RetainerProfile.c
@@ -0,0 +1,2303 @@
+/* -----------------------------------------------------------------------------
+ * $Id: RetainerProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RetainerProfile.h"
+#include "RetainerSet.h"
+#include "Schedule.h"
+#include "Printer.h"
+#include "Storage.h"
+#include "StoragePriv.h"
+#include "RtsFlags.h"
+#include "Weak.h"
+#include "Sanity.h"
+#include "Profiling.h"
+#include "Stats.h"
+#include "BlockAlloc.h"
+#include "Itimer.h"
+#include "Proftimer.h"
+#include "ProfHeap.h"
+
+/*
+ Note: what to change in order to plug-in a new retainer profiling scheme?
+ (1) type retainer in ../includes/StgRetainerProf.h
+ (2) retainer function R(), i.e., getRetainerFrom()
+ (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
+ in RetainerSet.h, if needed.
+ (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
+ */
+
+/* -----------------------------------------------------------------------------
+ * Declarations...
+ * -------------------------------------------------------------------------- */
+
+static nat retainerGeneration; // generation
+
+static nat numObjectVisited; // total number of objects visited
+static nat timesAnyObjectVisited; // number of times any objects are visited
+
+/*
+ The rs field in the profile header of any object points to its retainer
+ set in an indirect way: if flip is 0, it points to the retainer set;
+ if flip is 1, it points to the next byte after the retainer set (even
+ for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
+ pointer. See retainerSetOf().
+ */
+
+// extract the retainer set field from c
+#define RSET(c) ((c)->header.prof.hp.rs)
+
+static StgWord flip = 0; // flip bit
+ // must be 0 if DEBUG_RETAINER is on (for static closures)
+
+#define isRetainerSetFieldValid(c) \
+ ((((StgWord)(c)->header.prof.hp.rs & 1) ^ flip) == 0)
+
+#define setRetainerSetToNull(c) \
+ (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
+
+static void retainStack(StgClosure *, StgClosure *, StgClosure *, StgPtr, StgPtr);
+static void retainClosure(StgClosure *, StgClosure *, StgClosure *);
+#ifdef DEBUG_RETAINER
+static void belongToHeap(StgPtr p);
+#endif
+
+#ifdef DEBUG_RETAINER
+/*
+ cStackSize records how many times retainStack() has been invoked recursively,
+ that is, the number of activation records for retainStack() on the C stack.
+ maxCStackSize records its max value.
+ Invariants:
+ cStackSize <= maxCStackSize
+ */
+static nat cStackSize, maxCStackSize;
+
+static nat sumOfNewCost; // sum of the cost of each object, computed
+ // when the object is first visited
+static nat sumOfNewCostExtra; // for those objects not visited during
+ // retainer profiling, e.g., MUT_VAR
+static nat costArray[N_CLOSURE_TYPES];
+
+nat sumOfCostLinear; // sum of the costs of all object, computed
+ // when linearly traversing the heap after
+ // retainer profiling
+nat costArrayLinear[N_CLOSURE_TYPES];
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Retainer stack - header
+ * Note:
+ * Although the retainer stack implementation could be separated *
+ * from the retainer profiling engine, there does not seem to be
+ * any advantage in doing that; retainer stack is an integral part
+ * of retainer profiling engine and cannot be use elsewhere at
+ * all.
+ * -------------------------------------------------------------------------- */
+
+typedef enum {
+ posTypeStep,
+ posTypePtrs,
+ posTypeSRT,
+} nextPosType;
+
+typedef union {
+ // fixed layout or layout specified by a field in the closure
+ StgWord step;
+
+ // layout.payload
+ struct {
+ // See StgClosureInfo in InfoTables.h
+#if SIZEOF_VOID_P == 8
+ StgWord32 pos;
+ StgWord32 ptrs;
+#else
+ StgWord16 pos;
+ StgWord16 ptrs;
+#endif
+ StgPtr payload;
+ } ptrs;
+
+ // SRT
+ struct {
+ StgClosure **srt;
+ StgClosure **srt_end;
+ } srt;
+} nextPos;
+
+typedef struct {
+ nextPosType type;
+ nextPos next;
+} stackPos;
+
+typedef struct {
+ StgClosure *c;
+ StgClosure *c_child_r;
+ stackPos info;
+} stackElement;
+
+/*
+ Invariants:
+ firstStack points to the first block group.
+ currentStack points to the block group currently being used.
+ currentStack->free == stackLimit.
+ stackTop points to the topmost byte in the stack of currentStack.
+ Unless the whole stack is empty, stackTop must point to the topmost
+ object (or byte) in the whole stack. Thus, it is only when the whole stack
+ is empty that stackTop == stackLimit (not during the execution of push()
+ and pop()).
+ stackBottom == currentStack->start.
+ stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
+ Note:
+ When a current stack becomes empty, stackTop is set to point to
+ the topmost element on the previous block group so as to satisfy
+ the invariants described above.
+ */
+bdescr *firstStack = NULL;
+static bdescr *currentStack;
+static stackElement *stackBottom, *stackTop, *stackLimit;
+
+/*
+ currentStackBoundary is used to mark the current stack chunk.
+ If stackTop == currentStackBoundary, it means that the current stack chunk
+ is empty. It is the responsibility of the user to keep currentStackBoundary
+ valid all the time if it is to be employed.
+ */
+static stackElement *currentStackBoundary;
+
+/*
+ stackSize records the current size of the stack.
+ maxStackSize records its high water mark.
+ Invariants:
+ stackSize <= maxStackSize
+ Note:
+ stackSize is just an estimate measure of the depth of the graph. The reason
+ is that some heap objects have only a single child and may not result
+ in a new element being pushed onto the stack. Therefore, at the end of
+ retainer profiling, maxStackSize + maxCStackSize is some value no greater
+ than the actual depth of the graph.
+ */
+#ifdef DEBUG_RETAINER
+static int stackSize, maxStackSize;
+#endif
+
+// number of blocks allocated for one stack
+#define BLOCKS_IN_STACK 1
+
+/* -----------------------------------------------------------------------------
+ * Add a new block group to the stack.
+ * Invariants:
+ * currentStack->link == s.
+ * -------------------------------------------------------------------------- */
+static inline void
+newStackBlock( bdescr *bd )
+{
+ currentStack = bd;
+ stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
+ stackBottom = (stackElement *)bd->start;
+ stackLimit = (stackElement *)stackTop;
+ bd->free = (StgPtr)stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Return to the previous block group.
+ * Invariants:
+ * s->link == currentStack.
+ * -------------------------------------------------------------------------- */
+static inline void
+returnToOldStack( bdescr *bd )
+{
+ currentStack = bd;
+ stackTop = (stackElement *)bd->free;
+ stackBottom = (stackElement *)bd->start;
+ stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
+ bd->free = (StgPtr)stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes the traverse stack.
+ * -------------------------------------------------------------------------- */
+static void
+initializeTraverseStack( void )
+{
+ if (firstStack != NULL) {
+ freeChain(firstStack);
+ }
+
+ firstStack = allocGroup(BLOCKS_IN_STACK);
+ firstStack->link = NULL;
+ firstStack->u.back = NULL;
+
+ newStackBlock(firstStack);
+}
+
+/* -----------------------------------------------------------------------------
+ * Frees all the block groups in the traverse stack.
+ * Invariants:
+ * firstStack != NULL
+ * -------------------------------------------------------------------------- */
+static void
+closeTraverseStack( void )
+{
+ freeChain(firstStack);
+ firstStack = NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if the whole stack is empty.
+ * -------------------------------------------------------------------------- */
+static inline rtsBool
+isEmptyRetainerStack( void )
+{
+ return (firstStack == currentStack) && stackTop == stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
+ * i.e., if the current stack chunk is empty.
+ * -------------------------------------------------------------------------- */
+static inline rtsBool
+isOnBoundary( void )
+{
+ return stackTop == currentStackBoundary;
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes *info from ptrs and payload.
+ * Invariants:
+ * payload[] begins with ptrs pointers followed by non-pointers.
+ * -------------------------------------------------------------------------- */
+static inline void
+init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
+{
+ info->type = posTypePtrs;
+ info->next.ptrs.pos = 0;
+ info->next.ptrs.ptrs = ptrs;
+ info->next.ptrs.payload = payload;
+}
+
+/* -----------------------------------------------------------------------------
+ * Find the next object from *info.
+ * -------------------------------------------------------------------------- */
+static inline StgClosure *
+find_ptrs( stackPos *info )
+{
+ if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
+ return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
+ } else {
+ return NULL;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes *info from SRT information stored in *infoTable.
+ * -------------------------------------------------------------------------- */
+static inline void
+init_srt( stackPos *info, StgInfoTable *infoTable )
+{
+ info->type = posTypeSRT;
+ info->next.srt.srt = (StgClosure **)(infoTable->srt);
+ info->next.srt.srt_end = info->next.srt.srt + infoTable->srt_len;
+}
+
+/* -----------------------------------------------------------------------------
+ * Find the next object from *info.
+ * -------------------------------------------------------------------------- */
+static inline StgClosure *
+find_srt( stackPos *info )
+{
+ StgClosure *c;
+
+ if (info->next.srt.srt < info->next.srt.srt_end) {
+ // See scavenge_srt() in GC.c for details.
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+ if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
+ c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
+ else
+ c = *(info->next.srt.srt);
+#else
+ c = *(info->next.srt.srt);
+#endif
+ info->next.srt.srt++;
+ return c;
+ } else {
+ return NULL;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * push() pushes a stackElement representing the next child of *c
+ * onto the traverse stack. If *c has no child, *first_child is set
+ * to NULL and nothing is pushed onto the stack. If *c has only one
+ * child, *c_chlid is set to that child and nothing is pushed onto
+ * the stack. If *c has more than two children, *first_child is set
+ * to the first child and a stackElement representing the second
+ * child is pushed onto the stack.
+
+ * Invariants:
+ * *c_child_r is the most recent retainer of *c's children.
+ * *c is not any of TSO, PAP, or AP_UPD, which means that
+ * there cannot be any stack objects.
+ * Note: SRTs are considered to be children as well.
+ * -------------------------------------------------------------------------- */
+static inline void
+push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
+{
+ stackElement se;
+ bdescr *nbd; // Next Block Descriptor
+
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+ ASSERT(get_itbl(c)->type != TSO);
+ ASSERT(get_itbl(c)->type != PAP);
+ ASSERT(get_itbl(c)->type != AP_UPD);
+
+ //
+ // fill in se
+ //
+
+ se.c = c;
+ se.c_child_r = c_child_r;
+
+ // fill in se.info
+ switch (get_itbl(c)->type) {
+ // no child, no SRT
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case ARR_WORDS:
+ *first_child = NULL;
+ return;
+
+ // one child (fixed), no SRT
+ case MUT_VAR:
+ case MUT_CONS:
+ *first_child = ((StgMutVar *)c)->var;
+ return;
+ case BLACKHOLE_BQ:
+ // blocking_queue must be TSO and the head of a linked list of TSOs.
+ // Shoule it be a child? Seems to be yes.
+ *first_child = (StgClosure *)((StgBlockingQueue *)c)->blocking_queue;
+ return;
+ case THUNK_SELECTOR:
+ *first_child = ((StgSelector *)c)->selectee;
+ return;
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_OLDGEN:
+ *first_child = ((StgIndOldGen *)c)->indirectee;
+ return;
+ case CONSTR_1_0:
+ case CONSTR_1_1:
+ *first_child = c->payload[0];
+ return;
+
+ // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
+ // of the next child. We do not write a separate initialization code.
+ // Also we do not have to initialize info.type;
+
+ // two children (fixed), no SRT
+ // need to push a stackElement, but nothing to store in se.info
+ case CONSTR_2_0:
+ *first_child = c->payload[0]; // return the first pointer
+ // se.info.type = posTypeStep;
+ // se.info.next.step = 2; // 2 = second
+ break;
+
+ // three children (fixed), no SRT
+ // need to push a stackElement
+ case MVAR:
+ // head must be TSO and the head of a linked list of TSOs.
+ // Shoule it be a child? Seems to be yes.
+ *first_child = (StgClosure *)((StgMVar *)c)->head;
+ // se.info.type = posTypeStep;
+ se.info.next.step = 2; // 2 = second
+ break;
+
+ // three children (fixed), no SRT
+ case WEAK:
+ *first_child = ((StgWeak *)c)->key;
+ // se.info.type = posTypeStep;
+ se.info.next.step = 2;
+ break;
+
+ // layout.payload.ptrs, no SRT
+ case CONSTR:
+ case FOREIGN:
+ case STABLE_NAME:
+ case BCO:
+ case CONSTR_STATIC:
+ init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
+ (StgPtr)c->payload);
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ return; // no child
+ break;
+
+ // StgMutArrPtr.ptrs, no SRT
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
+ (StgPtr)(((StgMutArrPtrs *)c)->payload));
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ return;
+ break;
+
+ // layout.payload.ptrs, SRT
+ case FUN: // *c is a heap object.
+ case FUN_2_0:
+ case THUNK:
+ case THUNK_2_0:
+ init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ // no child from ptrs, so check SRT
+ goto srt_only;
+ break;
+
+ // 1 fixed child, SRT
+ case FUN_1_0:
+ case FUN_1_1:
+ case THUNK_1_0:
+ case THUNK_1_1:
+ *first_child = c->payload[0];
+ ASSERT(*first_child != NULL);
+ init_srt(&se.info, get_itbl(c));
+ break;
+
+ // SRT only
+ case THUNK_STATIC:
+ case FUN_STATIC: // *c is a heap object.
+ ASSERT(get_itbl(c)->srt_len != 0);
+ case FUN_0_1:
+ case FUN_0_2:
+ case THUNK_0_1:
+ case THUNK_0_2:
+ srt_only:
+ init_srt(&se.info, get_itbl(c));
+ *first_child = find_srt(&se.info);
+ if (*first_child == NULL)
+ return; // no child
+ break;
+
+ // cannot appear
+ case PAP:
+ case AP_UPD:
+ case TSO:
+ case IND_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // invalid objects
+ case IND:
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case EVACUATED:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object *c in push()");
+ return;
+ }
+
+ if (stackTop - 1 < stackBottom) {
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "push() to the next stack.\n");
+#endif
+ // currentStack->free is updated when the active stack is switched
+ // to the next stack.
+ currentStack->free = (StgPtr)stackTop;
+
+ if (currentStack->link == NULL) {
+ nbd = allocGroup(BLOCKS_IN_STACK);
+ nbd->link = NULL;
+ nbd->u.back = currentStack;
+ currentStack->link = nbd;
+ } else
+ nbd = currentStack->link;
+
+ newStackBlock(nbd);
+ }
+
+ // adjust stackTop (acutal push)
+ stackTop--;
+ // If the size of stackElement was huge, we would better replace the
+ // following statement by either a memcpy() call or a switch statement
+ // on the type of the element. Currently, the size of stackElement is
+ // small enough (5 words) that this direct assignment seems to be enough.
+ *stackTop = se;
+
+#ifdef DEBUG_RETAINER
+ stackSize++;
+ if (stackSize > maxStackSize) maxStackSize = stackSize;
+ // ASSERT(stackSize >= 0);
+ // fprintf(stderr, "stackSize = %d\n", stackSize);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
+ * Invariants:
+ * stackTop cannot be equal to stackLimit unless the whole stack is
+ * empty, in which case popOff() is not allowed.
+ * Note:
+ * You can think of popOffReal() as a part of popOff() which is
+ * executed at the end of popOff() in necessary. Since popOff() is
+ * likely to be executed quite often while popOffReal() is not, we
+ * separate popOffReal() from popOff(), which is declared as an
+ * inline function (for the sake of execution speed). popOffReal()
+ * is called only within popOff() and nowhere else.
+ * -------------------------------------------------------------------------- */
+static void
+popOffReal(void)
+{
+ bdescr *pbd; // Previous Block Descriptor
+
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "pop() to the previous stack.\n");
+#endif
+
+ ASSERT(stackTop + 1 == stackLimit);
+ ASSERT(stackBottom == (stackElement *)currentStack->start);
+
+ if (firstStack == currentStack) {
+ // The stack is completely empty.
+ stackTop++;
+ ASSERT(stackTop == stackLimit);
+#ifdef DEBUG_RETAINER
+ stackSize--;
+ if (stackSize > maxStackSize) maxStackSize = stackSize;
+ /*
+ ASSERT(stackSize >= 0);
+ fprintf(stderr, "stackSize = %d\n", stackSize);
+ */
+#endif
+ return;
+ }
+
+ // currentStack->free is updated when the active stack is switched back
+ // to the previous stack.
+ currentStack->free = (StgPtr)stackLimit;
+
+ // find the previous block descriptor
+ pbd = currentStack->u.back;
+ ASSERT(pbd != NULL);
+
+ returnToOldStack(pbd);
+
+#ifdef DEBUG_RETAINER
+ stackSize--;
+ if (stackSize > maxStackSize) maxStackSize = stackSize;
+ /*
+ ASSERT(stackSize >= 0);
+ fprintf(stderr, "stackSize = %d\n", stackSize);
+ */
+#endif
+}
+
+static inline void
+popOff(void) {
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+ ASSERT(stackTop != stackLimit);
+ ASSERT(!isEmptyRetainerStack());
+
+ // <= (instead of <) is wrong!
+ if (stackTop + 1 < stackLimit) {
+ stackTop++;
+#ifdef DEBUG_RETAINER
+ stackSize--;
+ if (stackSize > maxStackSize) maxStackSize = stackSize;
+ /*
+ ASSERT(stackSize >= 0);
+ fprintf(stderr, "stackSize = %d\n", stackSize);
+ */
+#endif
+ return;
+ }
+
+ popOffReal();
+}
+
+/* -----------------------------------------------------------------------------
+ * Finds the next object to be considered for retainer profiling and store
+ * its pointer to *c.
+ * Test if the topmost stack element indicates that more objects are left,
+ * and if so, retrieve the first object and store its pointer to *c. Also,
+ * set *cp and *r appropriately, both of which are stored in the stack element.
+ * The topmost stack element then is overwritten so as for it to now denote
+ * the next object.
+ * If the topmost stack element indicates no more objects are left, pop
+ * off the stack element until either an object can be retrieved or
+ * the current stack chunk becomes empty, indicated by rtsTrue returned by
+ * isOnBoundary(), in which case *c is set to NULL.
+ * Note:
+ * It is okay to call this function even when the current stack chunk
+ * is empty.
+ * -------------------------------------------------------------------------- */
+static inline void
+pop( StgClosure **c, StgClosure **cp, StgClosure **r )
+{
+ stackElement *se;
+
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+ do {
+ if (isOnBoundary()) { // if the current stack chunk is depleted
+ *c = NULL;
+ return;
+ }
+
+ se = stackTop;
+
+ switch (get_itbl(se->c)->type) {
+ // two children (fixed), no SRT
+ // nothing in se.info
+ case CONSTR_2_0:
+ *c = se->c->payload[1];
+ *cp = se->c;
+ *r = se->c_child_r;
+ popOff();
+ return;
+
+ // three children (fixed), no SRT
+ // need to push a stackElement
+ case MVAR:
+ if (se->info.next.step == 2) {
+ *c = (StgClosure *)((StgMVar *)se->c)->tail;
+ se->info.next.step++; // move to the next step
+ // no popOff
+ } else {
+ *c = ((StgMVar *)se->c)->value;
+ popOff();
+ }
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+
+ // three children (fixed), no SRT
+ case WEAK:
+ if (se->info.next.step == 2) {
+ *c = ((StgWeak *)se->c)->value;
+ se->info.next.step++;
+ // no popOff
+ } else {
+ *c = ((StgWeak *)se->c)->finalizer;
+ popOff();
+ }
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+
+ case CONSTR:
+ case FOREIGN:
+ case STABLE_NAME:
+ case BCO:
+ case CONSTR_STATIC:
+ // StgMutArrPtr.ptrs, no SRT
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ *c = find_ptrs(&se->info);
+ if (*c == NULL) {
+ popOff();
+ break;
+ }
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+
+ // layout.payload.ptrs, SRT
+ case FUN: // always a heap object
+ case FUN_2_0:
+ case THUNK:
+ case THUNK_2_0:
+ if (se->info.type == posTypePtrs) {
+ *c = find_ptrs(&se->info);
+ if (*c != NULL) {
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+ }
+ init_srt(&se->info, get_itbl(se->c));
+ }
+ // fall through
+
+ // SRT
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ case FUN_0_1:
+ case FUN_0_2:
+ case THUNK_0_1:
+ case THUNK_0_2:
+ case FUN_1_0:
+ case FUN_1_1:
+ case THUNK_1_0:
+ case THUNK_1_1:
+ *c = find_srt(&se->info);
+ if (*c != NULL) {
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+ }
+ popOff();
+ break;
+
+ // no child (fixed), no SRT
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case ARR_WORDS:
+ // one child (fixed), no SRT
+ case MUT_VAR:
+ case MUT_CONS:
+ case BLACKHOLE_BQ:
+ case THUNK_SELECTOR:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_OLDGEN:
+ case CONSTR_1_1:
+ // cannot appear
+ case PAP:
+ case AP_UPD:
+ case TSO:
+ case IND_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case RET_DYN:
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // invalid objects
+ case IND:
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case EVACUATED:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object *c in pop()");
+ return;
+ }
+ } while (rtsTrue);
+}
+
+/* -----------------------------------------------------------------------------
+ * RETAINER PROFILING ENGINE
+ * -------------------------------------------------------------------------- */
+
+void
+initRetainerProfiling( void )
+{
+ initializeAllRetainerSet();
+ retainerGeneration = 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * This function must be called before f-closing prof_file.
+ * -------------------------------------------------------------------------- */
+void
+endRetainerProfiling( void )
+{
+#ifdef SECOND_APPROACH
+ outputAllRetainerSet(prof_file);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns the actual pointer to the retainer set of the closure *c.
+ * It may adjust RSET(c) subject to flip.
+ * Side effects:
+ * RSET(c) is initialized to NULL if its current value does not
+ * conform to flip.
+ * Note:
+ * Even though this function has side effects, they CAN be ignored because
+ * subsequent calls to retainerSetOf() always result in the same return value
+ * and retainerSetOf() is the only way to retrieve retainerSet of a given
+ * closure.
+ * We have to perform an XOR (^) operation each time a closure is examined.
+ * The reason is that we do not know when a closure is visited last.
+ * -------------------------------------------------------------------------- */
+static inline void
+maybeInitRetainerSet( StgClosure *c )
+{
+ if (!isRetainerSetFieldValid(c)) {
+ setRetainerSetToNull(c);
+ }
+}
+
+static inline RetainerSet *
+retainerSetOf( StgClosure *c )
+{
+ ASSERT( isRetainerSetFieldValid(c) );
+ // StgWord has the same size as pointers, so the following type
+ // casting is okay.
+ return (RetainerSet *)((StgWord)RSET(c) ^ flip);
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns the cost of the closure *c, e.g., the amount of heap memory
+ * allocated to *c. Static objects cost 0.
+ * The cost includes even the words allocated for profiling purpose.
+ * Cf. costPure().
+ * -------------------------------------------------------------------------- */
+static inline nat
+cost( StgClosure *c )
+{
+ StgInfoTable *info;
+
+ info = get_itbl(c);
+ switch (info->type) {
+ case TSO:
+ return tso_sizeW((StgTSO *)c);
+
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+
+ // static objects
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ case THUNK_STATIC:
+ return 0;
+
+ case MVAR:
+ return sizeofW(StgMVar);
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+
+ case AP_UPD:
+ case PAP:
+ return pap_sizeW((StgPAP *)c);
+
+ case ARR_WORDS:
+ return arr_words_sizeW((StgArrWords *)c);
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ case WEAK:
+ case MUT_VAR:
+ case MUT_CONS:
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case IND_PERM:
+ case IND_OLDGEN:
+ case IND_OLDGEN_PERM:
+ case FOREIGN:
+ case BCO:
+ case STABLE_NAME:
+ return sizeW_fromITBL(info);
+
+ case THUNK_SELECTOR:
+ return sizeofW(StgHeader) + MIN_UPD_SIZE;
+
+ /*
+ Error case
+ */
+ // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
+ case IND_STATIC:
+ // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
+ // cannot be *c, *cp, *r in the retainer profiling loop.
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // Stack objects are invalid because they are never treated as
+ // legal objects during retainer profiling.
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // other cases
+ case IND:
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case EVACUATED:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in cost(): %d", get_itbl(c)->type);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns the pure cost of the closure *c, i.e., the size of memory
+ * allocated for this object without profiling.
+ * Note & Todo:
+ * costPure() subtracts the overhead incurred by profiling for all types
+ * of objects except TSO. Even though the overhead in the TSO object
+ * itself is taken into account, the additional costs due to larger
+ * stack objects (with unnecessary retainer profiling fields) is not
+ * considered. Still, costPure() should result in an accurate estimate
+ * of heap use because stacks in TSO objects are allocated in large blocks.
+ * If we get rid of the (currently unused) retainer profiling field in
+ * all stack objects, the result will be accurate.
+ * ------------------------------------------------------------------------- */
+static inline nat
+costPure( StgClosure *c )
+{
+ nat cst;
+
+ if (!closureSatisfiesConstraints(c)) {
+ return 0;
+ }
+
+ cst = cost(c);
+
+ ASSERT(cst == 0 || cst - sizeofW(StgProfHeader) > 0);
+
+ if (cst > 0) {
+ return cst - sizeofW(StgProfHeader);
+ } else {
+ return 0;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if *c is a retainer.
+ * -------------------------------------------------------------------------- */
+static inline rtsBool
+isRetainer( StgClosure *c )
+{
+ if (get_itbl(c)->prof.closure_desc != NULL && !strcmp(get_itbl(c)->prof.closure_desc,"PCS")) { return rtsTrue; }
+
+ switch (get_itbl(c)->type) {
+ //
+ // True case
+ //
+ // TSOs MUST be retainers: they constitute the set of roots.
+ case TSO:
+
+ // mutable objects
+ case MVAR:
+ case MUT_VAR:
+ case MUT_CONS:
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+
+ // thunks are retainers.
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_SELECTOR:
+ case AP_UPD:
+
+ // Static thunks, or CAFS, are obviously retainers.
+ case THUNK_STATIC:
+
+ // WEAK objects are roots; there is separate code in which traversing
+ // begins from WEAK objects.
+ case WEAK:
+ return rtsTrue;
+
+ //
+ // False case
+ //
+
+ // constructors
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ // functions
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ // partial applications
+ case PAP:
+ // blackholes
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case BLACKHOLE_BQ:
+ // indirection
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_OLDGEN:
+ // static objects
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ // misc
+ case FOREIGN:
+ case STABLE_NAME:
+ case BCO:
+ case ARR_WORDS:
+ return rtsFalse;
+
+ //
+ // Error case
+ //
+ // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
+ case IND_STATIC:
+ // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
+ // cannot be *c, *cp, *r in the retainer profiling loop.
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // Stack objects are invalid because they are never treated as
+ // legal objects during retainer profiling.
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // other cases
+ case IND:
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case EVACUATED:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
+ return rtsFalse;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns the retainer function value for the closure *c, i.e., R(*c).
+ * This function does NOT return the retainer(s) of *c.
+ * Invariants:
+ * *c must be a retainer.
+ * Note:
+ * Depending on the definition of this function, the maintenance of retainer
+ * sets can be made easier. If most retainer sets are likely to be created
+ * again across garbage collections, refreshAllRetainerSet() in
+ * RetainerSet.c can simply set the cost field of each retainer set.
+ * If this is not the case, we can free all the retainer sets and
+ * re-initialize the hash table.
+ * See refreshAllRetainerSet() in RetainerSet.c.
+ * -------------------------------------------------------------------------- */
+static inline retainer
+getRetainerFrom( StgClosure *c )
+{
+ ASSERT(isRetainer(c));
+
+#if defined(RETAINER_SCHEME_INFO)
+ // Retainer scheme 1: retainer = info table
+ return get_itbl(c);
+#elif defined(RETAINER_SCHEME_CCS)
+ // Retainer scheme 2: retainer = cost centre stack
+ return c->header.prof.ccs;
+#elif defined(RETAINER_SCHEME_CC)
+ // Retainer scheme 3: retainer = cost centre
+ return c->header.prof.ccs->cc;
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Associates the retainer set *s with the closure *c, that is, *s becomes
+ * the retainer set of *c.
+ * Invariants:
+ * c != NULL
+ * s != NULL
+ * -------------------------------------------------------------------------- */
+static inline void
+associate( StgClosure *c, RetainerSet *rsOfc, RetainerSet *s )
+{
+ nat cost_c;
+
+ cost_c = costPure(c); // not cost(c)
+ if (rsOfc != NULL) {
+ ASSERT(rsOfc->cost >= cost_c);
+ rsOfc->cost -= cost_c;
+ }
+ // StgWord has the same size as pointers, so the following type
+ // casting is okay.
+ RSET(c) = (RetainerSet *)((StgWord)s | flip);
+ s->cost += cost_c;
+}
+
+/* -----------------------------------------------------------------------------
+ * Process all the objects in the stack chunk from stackStart to stackEnd
+ * with *c and *c_child_r being their parent and their most recent retainer,
+ * respectively. Treat stackOptionalFun as another child of *c if it is
+ * not NULL.
+ * Invariants:
+ * *c is one of the following: TSO, PAP, and AP_UPD.
+ * If *c is AP_UPD or PAP, stackOptionalFun is not NULL. Otherwise,
+ * it is NULL.
+ * If *c is TSO, c == c_child_r.
+ * stackStart < stackEnd.
+ * RSET(c) and RSET(c_child_r) are valid, i.e., their
+ * interpretation conforms to the current value of flip (even when they
+ * are interpreted to be NULL).
+ * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
+ * or ThreadKilled, which means that its stack is ready to process.
+ * Note:
+ * This code was almost plagiarzied from GC.c! For each pointer,
+ * retainClosure() is invoked instead of evacuate().
+ * -------------------------------------------------------------------------- */
+static void
+retainStack( StgClosure *c, StgClosure *c_child_r,
+ StgClosure *stackOptionalFun, StgPtr stackStart,
+ StgPtr stackEnd )
+{
+ stackElement *oldStackBoundary;
+ StgPtr p, q;
+ StgInfoTable *info;
+ StgWord32 bitmap;
+
+#ifdef DEBUG_RETAINER
+ cStackSize++;
+ if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
+#endif
+
+ /*
+ Each invocation of retainStack() creates a new virtual
+ stack. Since all such stacks share a single common stack, we
+ record the current currentStackBoundary, which will be restored
+ at the exit.
+ */
+ oldStackBoundary = currentStackBoundary;
+ currentStackBoundary = stackTop;
+
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
+#endif
+
+ if (stackOptionalFun != NULL) {
+ ASSERT(get_itbl(c)->type == AP_UPD || get_itbl(c)->type == PAP);
+ retainClosure(stackOptionalFun, c, c_child_r);
+ } else {
+ ASSERT(get_itbl(c)->type == TSO);
+ ASSERT(((StgTSO *)c)->what_next != ThreadRelocated &&
+ ((StgTSO *)c)->what_next != ThreadComplete &&
+ ((StgTSO *)c)->what_next != ThreadKilled);
+ }
+
+ p = stackStart;
+ while (p < stackEnd) {
+ q = *(StgPtr *)p;
+
+ //
+ // Note & Todo:
+ // The correctness of retainer profiling is subject to the
+ // correctness of the two macros IS_ARG_TAG() and
+ // LOOKS_LIKE_GHC_INFO(). Since LOOKS_LIKE_GHC_INFO() is a bit
+ // precarious macro, so I believe that the current
+ // implementation may not be quite safe. Also, scavenge_stack()
+ // in GC.c also exploits this macro in order to identify shallow
+ // pointers. I am not sure whether scavenge_stack() takes
+ // further measurements to discern real shallow pointers.
+ //
+ // I think this can be a serious problem if a stack chunk
+ // contains some word which looks like a pointer but is
+ // actually, say, a word constituting a floating number.
+ //
+
+ // skip tagged words
+ if (IS_ARG_TAG((StgWord)q)) {
+ p += 1 + ARG_SIZE(q);
+ continue;
+ }
+
+ // check if *p is a shallow closure pointer
+ if (!LOOKS_LIKE_GHC_INFO(q)) {
+ retainClosure((StgClosure *)q, c, c_child_r);
+ p++;
+ continue;
+ }
+
+ // regular stack objects
+ info = get_itbl((StgClosure *)p);
+ switch(info->type) {
+ case RET_DYN:
+ bitmap = ((StgRetDyn *)p)->liveness;
+ p = ((StgRetDyn *)p)->payload;
+ goto small_bitmap;
+
+ // FUN and FUN_STATIC keep only their info pointer.
+ case FUN:
+ case FUN_STATIC:
+ p++;
+ goto follow_srt;
+
+ case UPDATE_FRAME:
+ retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
+ p += sizeofW(StgUpdateFrame);
+ continue;
+
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ bitmap = info->layout.bitmap;
+ p++;
+ small_bitmap:
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0)
+ retainClosure((StgClosure *)*p, c, c_child_r);
+ p++;
+ bitmap = bitmap >> 1;
+ }
+ follow_srt:
+ {
+ StgClosure **srt, **srt_end;
+
+ srt = (StgClosure **)(info->srt);
+ srt_end = srt + info->srt_len;
+ for (; srt < srt_end; srt++) {
+ // See scavenge_srt() in GC.c for details.
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+ if ((unsigned long)(*srt) & 0x1)
+ retainClosure(*(StgClosure **)(((unsigned long)*srt & ~0x1)), c, c_child_r);
+ else
+ retainClosure(*srt, c, c_child_r);
+#else
+ retainClosure(*srt, c, c_child_r);
+#endif
+ }
+ }
+ continue;
+
+ case RET_BIG:
+ case RET_VEC_BIG:
+ {
+ StgPtr q;
+ StgLargeBitmap *large_bitmap;
+ nat i;
+
+ large_bitmap = info->layout.large_bitmap;
+ p++;
+
+ for (i = 0; i < large_bitmap->size; i++) {
+ bitmap = large_bitmap->bitmap[i];
+ q = p + sizeofW(StgWord) * 8;
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0)
+ retainClosure((StgClosure *)*p, c, c_child_r);
+ p++;
+ bitmap = bitmap >> 1;
+ }
+ if (i + 1 < large_bitmap->size) {
+ while (p < q) {
+ retainClosure((StgClosure *)*p, c, c_child_r);
+ p++;
+ }
+ }
+ }
+ }
+ goto follow_srt;
+ default:
+ barf("Invalid object found in retainStack(): %d",
+ (int)(info->type));
+ }
+ }
+
+ // restore currentStackBoundary
+ currentStackBoundary = oldStackBoundary;
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
+#endif
+
+#ifdef DEBUG_RETAINER
+ cStackSize--;
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Compute the retainer set of *c0 and all its desecents by traversing.
+ * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
+ * Invariants:
+ * c0 = cp0 = r0 holds only for root objects.
+ * RSET(cp0) and RSET(r0) are valid, i.e., their
+ * interpretation conforms to the current value of flip (even when they
+ * are interpreted to be NULL).
+ * However, RSET(c0) may be corrupt, i.e., it may not conform to
+ * the current value of flip. If it does not, during the execution
+ * of this function, RSET(c0) must be initialized as well as all
+ * its descendants.
+ * Note:
+ * stackTop must be the same at the beginning and the exit of this function.
+ * *c0 can be TSO (as well as PAP and AP_UPD).
+ * -------------------------------------------------------------------------- */
+static void
+retainClosure( StgClosure *c0, StgClosure *cp0, StgClosure *r0 )
+{
+ // c = Current closure
+ // cp = Current closure's Parent
+ // r = current closures' most recent Retainer
+ // c_child_r = current closure's children's most recent retainer
+ // first_child = first child of c
+ StgClosure *c, *cp, *r, *c_child_r, *first_child;
+ RetainerSet *s, *retainerSetOfc;
+ retainer R_r;
+ StgWord typeOfc;
+
+#ifdef DEBUG_RETAINER
+ // StgPtr oldStackTop;
+#endif
+
+#ifdef DEBUG_RETAINER
+ // oldStackTop = stackTop;
+ // fprintf(stderr, "retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
+#endif
+
+ // (c, cp, r) = (c0, cp0, r0)
+ c = c0;
+ cp = cp0;
+ r = r0;
+ goto inner_loop;
+
+loop:
+ //fprintf(stderr, "loop");
+ // pop to (c, cp, r);
+ pop(&c, &cp, &r);
+
+ if (c == NULL) {
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
+#endif
+ return;
+ }
+
+ //fprintf(stderr, "inner_loop");
+
+inner_loop:
+ // c = current closure under consideration,
+ // cp = current closure's parent,
+ // r = current closure's most recent retainer
+ //
+ // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
+ // RSET(cp) and RSET(r) are valid.
+ // RSET(c) is valid only if c has been visited before.
+ //
+ // Loop invariants (on the relation between c, cp, and r)
+ // if cp is not a retainer, r belongs to RSET(cp).
+ // if cp is a retainer, r == cp.
+
+ typeOfc = get_itbl(c)->type;
+
+#ifdef DEBUG_RETAINER
+ switch (typeOfc) {
+ case IND_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ case CONSTR_STATIC:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ break;
+ default:
+ if (retainerSetOf(c) == NULL) { // first visit?
+ costArray[typeOfc] += cost(c);
+ sumOfNewCost += cost(c);
+ }
+ break;
+ }
+#endif
+
+ // special cases
+ switch (typeOfc) {
+ case TSO:
+ if (((StgTSO *)c)->what_next == ThreadComplete ||
+ ((StgTSO *)c)->what_next == ThreadKilled) {
+#ifdef DEBUG_RETAINER
+ fprintf(stderr, "ThreadComplete or ThreadKilled encountered in retainClosure()\n");
+#endif
+ goto loop;
+ }
+ if (((StgTSO *)c)->what_next == ThreadRelocated) {
+#ifdef DEBUG_RETAINER
+ fprintf(stderr, "ThreadRelocated encountered in retainClosure()\n");
+#endif
+ c = (StgClosure *)((StgTSO *)c)->link;
+ goto inner_loop;
+ }
+ break;
+
+ case IND_STATIC:
+ // We just skip IND_STATIC, so its retainer set is never computed.
+ c = ((StgIndStatic *)c)->indirectee;
+ goto inner_loop;
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ // static objects with no pointers out, so goto loop.
+ case CONSTR_NOCAF_STATIC:
+ // It is not just enough not to compute the retainer set for *c; it is
+ // mandatory because CONSTR_NOCAF_STATIC are not reachable from
+ // scavenged_static_objects, the list from which is assumed to traverse
+ // all static objects after major garbage collections.
+ goto loop;
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ if (get_itbl(c)->srt_len == 0) {
+ // No need to compute the retainer set; no dynamic objects
+ // are reachable from *c.
+ //
+ // Static objects: if we traverse all the live closures,
+ // including static closures, during each heap census then
+ // we will observe that some static closures appear and
+ // disappear. eg. a closure may contain a pointer to a
+ // static function 'f' which is not otherwise reachable
+ // (it doesn't indirectly point to any CAFs, so it doesn't
+ // appear in any SRTs), so we would find 'f' during
+ // traversal. However on the next sweep there may be no
+ // closures pointing to 'f'.
+ //
+ // We must therefore ignore static closures whose SRT is
+ // empty, because these are exactly the closures that may
+ // "appear". A closure with a non-empty SRT, and which is
+ // still required, will always be reachable.
+ //
+ // But what about CONSTR_STATIC? Surely these may be able
+ // to appear, and they don't have SRTs, so we can't
+ // check. So for now, we're calling
+ // resetStaticObjectForRetainerProfiling() from the
+ // garbage collector to reset the retainer sets in all the
+ // reachable static objects.
+ goto loop;
+ }
+ default:
+ break;
+ }
+
+ // The above objects are ignored in computing the average number of times
+ // an object is visited.
+ timesAnyObjectVisited++;
+
+ // If this is the first visit to c, initialize its retainer set.
+ maybeInitRetainerSet(c);
+ retainerSetOfc = retainerSetOf(c);
+
+ // Now compute s:
+ // isRetainer(cp) == rtsTrue => s == NULL
+ // isRetainer(cp) == rtsFalse => s == cp.retainer
+ if (isRetainer(cp))
+ s = NULL;
+ else
+ s = retainerSetOf(cp);
+
+ // (c, cp, r, s) is available.
+ R_r = getRetainerFrom(r);
+
+ // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
+ if (retainerSetOfc == NULL) {
+ // This is the first visit to *c.
+ numObjectVisited++;
+
+ if (s == NULL)
+ associate(c, NULL, singleton(R_r));
+ else
+ // s is actually the retainer set of *c!
+ associate(c, NULL, s);
+
+ // compute c_child_r
+ c_child_r = isRetainer(c) ? c : r;
+ } else {
+ // This is not the first visit to *c.
+ if (isMember(R_r, retainerSetOfc))
+ goto loop; // no need to process child
+
+ if (s == NULL)
+ associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc));
+ else {
+ // s is not NULL and cp is not a retainer. This means that
+ // each time *cp is visited, so is *c. Thus, if s has
+ // exactly one more element in its retainer set than c, s
+ // is also the new retainer set for *c.
+ if (s->num == retainerSetOfc->num + 1) {
+ associate(c, retainerSetOfc, s);
+ }
+ // Otherwise, just add R_r to the current retainer set of *c.
+ else {
+ associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc));
+ }
+ }
+
+ if (isRetainer(c))
+ goto loop; // no need to process child
+
+ // compute c_child_r
+ c_child_r = r;
+ }
+
+ // now, RSET() of all of *c, *cp, and *r is valid.
+ // (c, c_child_r) are available.
+
+ // process child
+
+ if (typeOfc == TSO) {
+ retainStack(c, c_child_r,
+ NULL,
+ ((StgTSO *)c)->sp,
+ ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
+ // no more children
+ goto loop;
+ } else if (typeOfc == PAP) {
+ retainStack(c, c_child_r,
+ ((StgPAP *)c)->fun,
+ (StgPtr)((StgPAP *)c)->payload,
+ (StgPtr)((StgPAP *)c)->payload + ((StgPAP *)c)->n_args);
+ // no more children
+ goto loop;
+ } else if (typeOfc == AP_UPD) {
+ retainStack(c, c_child_r,
+ ((StgAP_UPD *)c)->fun,
+ (StgPtr)((StgAP_UPD *)c)->payload,
+ (StgPtr)((StgAP_UPD *)c)->payload +
+ ((StgAP_UPD *)c)->n_args);
+ // no more children
+ goto loop;
+ }
+
+ push(c, c_child_r, &first_child);
+
+ // If first_child is null, c has no child.
+ // If first_child is not null, the top stack element points to the next
+ // object. push() may or may not push a stackElement on the stack.
+ if (first_child == NULL)
+ goto loop;
+
+ // (c, cp, r) = (first_child, c, c_child_r)
+ r = c_child_r;
+ cp = c;
+ c = first_child;
+ goto inner_loop;
+}
+
+/* -----------------------------------------------------------------------------
+ * Compute the retainer set for every object reachable from *tl.
+ * -------------------------------------------------------------------------- */
+static void
+retainRoot( StgClosure **tl )
+{
+ // We no longer assume that only TSOs and WEAKs are roots; any closure can
+ // be a root.
+
+ ASSERT(isEmptyRetainerStack());
+ currentStackBoundary = stackTop;
+
+ retainClosure(*tl, *tl, *tl);
+
+ // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
+ // *tl might be a TSO which is ThreadComplete, in which
+ // case we ignore it for the purposes of retainer profiling.
+}
+
+/* -----------------------------------------------------------------------------
+ * Compute the retainer set for each of the objects in the heap.
+ * -------------------------------------------------------------------------- */
+static void
+computeRetainerSet( void )
+{
+ StgWeak *weak;
+ RetainerSet *rtl;
+ nat g;
+ StgMutClosure *ml;
+#ifdef DEBUG_RETAINER
+ RetainerSet tmpRetainerSet;
+#endif
+
+ GetRoots(retainRoot); // for scheduler roots
+
+ // This function is called after a major GC, when key, value, and finalizer
+ // all are guaranteed to be valid, or reachable.
+ //
+ // The following code assumes that WEAK objects are considered to be roots
+ // for retainer profilng.
+ for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
+ // retainRoot((StgClosure *)weak);
+ retainRoot((StgClosure **)&weak);
+
+ // The following code resets the rs field of each unvisited mutable
+ // object (computing sumOfNewCostExtra and updating costArray[] when
+ // debugging retainer profiler).
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ ASSERT(g != 0 ||
+ (generations[g].mut_list == END_MUT_LIST &&
+ generations[g].mut_once_list == END_MUT_LIST));
+
+ // Todo:
+ // I think traversing through mut_list is unnecessary.
+ // Think about removing this part.
+ for (ml = generations[g].mut_list; ml != END_MUT_LIST;
+ ml = ml->mut_link) {
+
+ maybeInitRetainerSet((StgClosure *)ml);
+ rtl = retainerSetOf((StgClosure *)ml);
+
+#ifdef DEBUG_RETAINER
+ if (rtl == NULL) {
+ // first visit to *ml
+ // This is a violation of the interface rule!
+ RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
+
+ switch (get_itbl((StgClosure *)ml)->type) {
+ case IND_STATIC:
+ // no cost involved
+ break;
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ case CONSTR_STATIC:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
+ break;
+ default:
+ // dynamic objects
+ costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
+ sumOfNewCostExtra += cost((StgClosure *)ml);
+ break;
+ }
+ }
+#endif
+ }
+
+ // Traversing through mut_once_list is, in contrast, necessary
+ // because we can find MUT_VAR objects which have not been
+ // visited during retainer profiling.
+ for (ml = generations[g].mut_once_list; ml != END_MUT_LIST;
+ ml = ml->mut_link) {
+
+ maybeInitRetainerSet((StgClosure *)ml);
+ rtl = retainerSetOf((StgClosure *)ml);
+#ifdef DEBUG_RETAINER
+ if (rtl == NULL) {
+ // first visit to *ml
+ // This is a violation of the interface rule!
+ RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
+
+ switch (get_itbl((StgClosure *)ml)->type) {
+ case IND_STATIC:
+ // no cost involved
+ break;
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ case CONSTR_STATIC:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
+ break;
+ default:
+ // dynamic objects
+ costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
+ sumOfNewCostExtra += cost((StgClosure *)ml);
+ break;
+ }
+ }
+#endif
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Traverse all static objects for which we compute retainer sets,
+ * and reset their rs fields to NULL, which is accomplished by
+ * invoking maybeInitRetainerSet(). This function must be called
+ * before zeroing all objects reachable from scavenged_static_objects
+ * in the case of major gabage collections. See GarbageCollect() in
+ * GC.c.
+ * Note:
+ * The mut_once_list of the oldest generation must also be traversed?
+ * Why? Because if the evacuation of an object pointed to by a static
+ * indirection object fails, it is put back to the mut_once_list of
+ * the oldest generation.
+ * However, this is not necessary because any static indirection objects
+ * are just traversed through to reach dynamic objects. In other words,
+ * they are not taken into consideration in computing retainer sets.
+ * -------------------------------------------------------------------------- */
+void
+resetStaticObjectForRetainerProfiling( void )
+{
+#ifdef DEBUG_RETAINER
+ nat count;
+#endif
+ StgClosure *p;
+
+#ifdef DEBUG_RETAINER
+ count = 0;
+#endif
+ p = scavenged_static_objects;
+ while (p != END_OF_STATIC_LIST) {
+#ifdef DEBUG_RETAINER
+ count++;
+#endif
+ switch (get_itbl(p)->type) {
+ case IND_STATIC:
+ // Since we do not compute the retainer set of any
+ // IND_STATIC object, we don't have to reset its retainer
+ // field.
+ p = IND_STATIC_LINK(p);
+ break;
+ case THUNK_STATIC:
+ maybeInitRetainerSet(p);
+ p = THUNK_STATIC_LINK(p);
+ break;
+ case FUN_STATIC:
+ maybeInitRetainerSet(p);
+ p = FUN_STATIC_LINK(p);
+ break;
+ case CONSTR_STATIC:
+ maybeInitRetainerSet(p);
+ p = STATIC_LINK(get_itbl(p), p);
+ break;
+ default:
+ barf("resetStaticObjectForRetainerProfiling: %p (%s)",
+ p, get_itbl(p)->type);
+ break;
+ }
+ }
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "count in scavenged_static_objects = %d\n", count);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Perform retainer profiling.
+ * N is the oldest generation being profilied, where the generations are
+ * numbered starting at 0.
+ * Invariants:
+ * Note:
+ * This function should be called only immediately after major garbage
+ * collection.
+ * ------------------------------------------------------------------------- */
+void
+retainerProfile(void)
+{
+ nat allCost, numSet;
+#ifdef DEBUG_RETAINER
+ nat i;
+ nat totalHeapSize; // total raw heap size (computed by linear scanning)
+#endif
+
+#ifdef DEBUG_RETAINER
+ fprintf(stderr, " < retainerProfile() invoked : %d>\n", retainerGeneration);
+#endif
+
+ stat_startRP();
+
+ // We haven't flipped the bit yet.
+#ifdef DEBUG_RETAINER
+ fprintf(stderr, "Before traversing:\n");
+ sumOfCostLinear = 0;
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ costArrayLinear[i] = 0;
+ totalHeapSize = checkHeapSanityForRetainerProfiling();
+
+ fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
+ /*
+ fprintf(stderr, "costArrayLinear[] = ");
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
+ fprintf(stderr, "\n");
+ */
+
+ ASSERT(sumOfCostLinear == totalHeapSize);
+
+/*
+#define pcostArrayLinear(index) \
+ if (costArrayLinear[index] > 0) \
+ fprintf(stderr, "costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
+ pcostArrayLinear(THUNK_STATIC);
+ pcostArrayLinear(FUN_STATIC);
+ pcostArrayLinear(CONSTR_STATIC);
+ pcostArrayLinear(CONSTR_NOCAF_STATIC);
+ pcostArrayLinear(CONSTR_INTLIKE);
+ pcostArrayLinear(CONSTR_CHARLIKE);
+*/
+#endif
+
+ // Now we flips flip.
+ flip = flip ^ 1;
+
+#ifdef DEBUG_RETAINER
+ stackSize = 0;
+ maxStackSize = 0;
+ cStackSize = 0;
+ maxCStackSize = 0;
+#endif
+ numObjectVisited = 0;
+ timesAnyObjectVisited = 0;
+
+#ifdef DEBUG_RETAINER
+ fprintf(stderr, "During traversing:\n");
+ sumOfNewCost = 0;
+ sumOfNewCostExtra = 0;
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ costArray[i] = 0;
+#endif
+
+ /*
+ We initialize the traverse stack each time the retainer profiling is
+ performed (because the traverse stack size varies on each retainer profiling
+ and this operation is not costly anyhow). However, we just refresh the
+ retainer sets.
+ */
+ initializeTraverseStack();
+#ifdef DEBUG_RETAINER
+ initializeAllRetainerSet();
+#else
+ refreshAllRetainerSet();
+#endif
+ computeRetainerSet();
+
+ outputRetainerSet(hp_file, &allCost, &numSet);
+
+#ifdef DEBUG_RETAINER
+ fprintf(stderr, "After traversing:\n");
+ sumOfCostLinear = 0;
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ costArrayLinear[i] = 0;
+ totalHeapSize = checkHeapSanityForRetainerProfiling();
+
+ fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
+ ASSERT(sumOfCostLinear == totalHeapSize);
+
+ // now, compare the two results
+ /*
+ Note:
+ costArray[] must be exactly the same as costArrayLinear[].
+ Known exceptions:
+ 1) Dead weak pointers, whose type is CONSTR. These objects are not
+ reachable from any roots.
+ */
+ fprintf(stderr, "Comparison:\n");
+ fprintf(stderr, "\tcostArrayLinear[] (must be empty) = ");
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ if (costArray[i] != costArrayLinear[i])
+ // nothing should be printed except MUT_VAR after major GCs
+ fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, "\tsumOfNewCost = %u\n", sumOfNewCost);
+ fprintf(stderr, "\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
+ fprintf(stderr, "\tcostArray[] (must be empty) = ");
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ if (costArray[i] != costArrayLinear[i])
+ // nothing should be printed except MUT_VAR after major GCs
+ fprintf(stderr, "[%u:%u] ", i, costArray[i]);
+ fprintf(stderr, "\n");
+
+ // only for major garbage collection
+ ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
+#endif
+
+ // post-processing
+ closeTraverseStack();
+#ifdef DEBUG_RETAINER
+ closeAllRetainerSet();
+#else
+ // Note that there is no post-processing for the retainer sets.
+#endif
+ retainerGeneration++;
+
+ stat_endRP(
+ retainerGeneration - 1, // retainerGeneration has just been incremented!
+#ifdef DEBUG_RETAINER
+ maxCStackSize, maxStackSize,
+#endif
+ (double)timesAnyObjectVisited / numObjectVisited,
+ allCost, numSet);
+}
+
+/* -----------------------------------------------------------------------------
+ * DEBUGGING CODE
+ * -------------------------------------------------------------------------- */
+
+#ifdef DEBUG_RETAINER
+
+#define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
+ ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) && \
+ ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
+
+static nat
+sanityCheckHeapClosure( StgClosure *c )
+{
+ StgInfoTable *info;
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
+ ASSERT(!closure_STATIC(c));
+ ASSERT(LOOKS_LIKE_PTR(c));
+
+ if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
+ if (get_itbl(c)->type == CONSTR &&
+ !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
+ !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
+ fprintf(stderr, "\tUnvisited dead weak pointer object found: c = %p\n", c);
+ costArray[get_itbl(c)->type] += cost(c);
+ sumOfNewCost += cost(c);
+ } else
+ fprintf(stderr,
+ "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
+ flip, c, get_itbl(c)->type,
+ get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
+ RSET(c));
+ } else {
+ // fprintf(stderr, "sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
+ }
+
+ info = get_itbl(c);
+ switch (info->type) {
+ case TSO:
+ return tso_sizeW((StgTSO *)c);
+
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+
+ case MVAR:
+ return sizeofW(StgMVar);
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+
+ case AP_UPD:
+ case PAP:
+ return pap_sizeW((StgPAP *)c);
+
+ case ARR_WORDS:
+ return arr_words_sizeW((StgArrWords *)c);
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ case WEAK:
+ case MUT_VAR:
+ case MUT_CONS:
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case IND_PERM:
+ case IND_OLDGEN:
+ case IND_OLDGEN_PERM:
+ case FOREIGN:
+ case BCO:
+ case STABLE_NAME:
+ return sizeW_fromITBL(info);
+
+ case THUNK_SELECTOR:
+ return sizeofW(StgHeader) + MIN_UPD_SIZE;
+
+ /*
+ Error case
+ */
+ case IND_STATIC:
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ case THUNK_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case SEQ_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case IND:
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case EVACUATED:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in sanityCheckHeapClosure(): %d",
+ get_itbl(c)->type);
+ return 0;
+ }
+}
+
+static nat
+heapCheck( bdescr *bd )
+{
+ StgPtr p;
+ static nat costSum, size;
+
+ costSum = 0;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ size = sanityCheckHeapClosure((StgClosure *)p);
+ sumOfCostLinear += size;
+ costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+ p += size;
+ // no need for slop check; I think slops are not used currently.
+ }
+ ASSERT(p == bd->free);
+ costSum += bd->free - bd->start;
+ bd = bd->link;
+ }
+
+ return costSum;
+}
+
+static nat
+smallObjectPoolCheck(void)
+{
+ bdescr *bd;
+ StgPtr p;
+ static nat costSum, size;
+
+ bd = small_alloc_list;
+ costSum = 0;
+
+ // first block
+ if (bd == NULL)
+ return costSum;
+
+ p = bd->start;
+ while (p < alloc_Hp) {
+ size = sanityCheckHeapClosure((StgClosure *)p);
+ sumOfCostLinear += size;
+ costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+ p += size;
+ }
+ ASSERT(p == alloc_Hp);
+ costSum += alloc_Hp - bd->start;
+
+ bd = bd->link;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ size = sanityCheckHeapClosure((StgClosure *)p);
+ sumOfCostLinear += size;
+ costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+ p += size;
+ }
+ ASSERT(p == bd->free);
+ costSum += bd->free - bd->start;
+ bd = bd->link;
+ }
+
+ return costSum;
+}
+
+static nat
+chainCheck(bdescr *bd)
+{
+ nat costSum, size;
+
+ costSum = 0;
+ while (bd != NULL) {
+ // bd->free - bd->start is not an accurate measurement of the
+ // object size. Actually it is always zero, so we compute its
+ // size explicitly.
+ size = sanityCheckHeapClosure((StgClosure *)bd->start);
+ sumOfCostLinear += size;
+ costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
+ costSum += size;
+ bd = bd->link;
+ }
+
+ return costSum;
+}
+
+static nat
+checkHeapSanityForRetainerProfiling( void )
+{
+ nat costSum, g, s;
+
+ costSum = 0;
+ fprintf(stderr, "START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ if (RtsFlags.GcFlags.generations == 1) {
+ costSum += heapCheck(g0s0->to_blocks);
+ fprintf(stderr, "heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ costSum += chainCheck(g0s0->large_objects);
+ fprintf(stderr, "chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ } else {
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+ for (s = 0; s < generations[g].n_steps; s++) {
+ /*
+ After all live objects have been scavenged, the garbage
+ collector may create some objects in
+ scheduleFinalizers(). These objects are created throught
+ allocate(), so the small object pool or the large object
+ pool of the g0s0 may not be empty.
+ */
+ if (g == 0 && s == 0) {
+ costSum += smallObjectPoolCheck();
+ fprintf(stderr, "smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ costSum += chainCheck(generations[g].steps[s].large_objects);
+ fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ } else {
+ costSum += heapCheck(generations[g].steps[s].blocks);
+ fprintf(stderr, "heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ costSum += chainCheck(generations[g].steps[s].large_objects);
+ fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ }
+ }
+ }
+
+ return costSum;
+}
+
+void
+findPointer(StgPtr p)
+{
+ StgPtr q, r, e;
+ bdescr *bd;
+ nat g, s;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ // if (g == 0 && s == 0) continue;
+ bd = generations[g].steps[s].blocks;
+ for (; bd; bd = bd->link) {
+ for (q = bd->start; q < bd->free; q++) {
+ if (*q == (StgWord)p) {
+ r = q;
+ while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
+ fprintf(stderr, "Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
+ // return;
+ }
+ }
+ }
+ bd = generations[g].steps[s].large_objects;
+ for (; bd; bd = bd->link) {
+ e = bd->start + cost((StgClosure *)bd->start);
+ for (q = bd->start; q < e; q++) {
+ if (*q == (StgWord)p) {
+ r = q;
+ while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
+ fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, r);
+ // return;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+belongToHeap(StgPtr p)
+{
+ bdescr *bd;
+ nat g, s;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ // if (g == 0 && s == 0) continue;
+ bd = generations[g].steps[s].blocks;
+ for (; bd; bd = bd->link) {
+ if (bd->start <= p && p < bd->free) {
+ fprintf(stderr, "Belongs to gen[%d], step[%d]", g, s);
+ return;
+ }
+ }
+ bd = generations[g].steps[s].large_objects;
+ for (; bd; bd = bd->link) {
+ if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
+ fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, bd->start);
+ return;
+ }
+ }
+ }
+ }
+}
+#endif // DEBUG_RETAINER
+
+#endif /* PROFILING */
diff --git a/ghc/rts/RetainerProfile.h b/ghc/rts/RetainerProfile.h
new file mode 100644
index 0000000000..7a2f0fb16e
--- /dev/null
+++ b/ghc/rts/RetainerProfile.h
@@ -0,0 +1,29 @@
+/* -----------------------------------------------------------------------------
+ * $Id: RetainerProfile.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer profiling interface.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RETAINERPROFILE_H
+#define RETAINERPROFILE_H
+
+#ifdef PROFILING
+
+extern void initRetainerProfiling ( void );
+extern void endRetainerProfiling ( void );
+extern void printRetainer ( FILE *, retainer );
+extern void retainerProfile ( void );
+extern void resetStaticObjectForRetainerProfiling ( void );
+
+// firstStack is exported because memInventory() in Schedule.c uses it.
+#ifdef DEBUG
+extern bdescr *firstStack;
+#endif
+
+#endif /* PROFILING */
+
+#endif /* RETAINERPROFILE_H */
diff --git a/ghc/rts/RetainerSet.c b/ghc/rts/RetainerSet.c
new file mode 100644
index 0000000000..709555a82e
--- /dev/null
+++ b/ghc/rts/RetainerSet.c
@@ -0,0 +1,587 @@
+/* -----------------------------------------------------------------------------
+ * $Id: RetainerSet.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer set implementation for retainer profiling (see RetainerProfile.c)
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Rts.h"
+#include "Stats.h"
+#include "RtsUtils.h"
+#include "RetainerSet.h"
+#include "Arena.h"
+#include "Profiling.h"
+
+#include <string.h>
+
+#define HASH_TABLE_SIZE 255
+#define hash(hk) (hk % HASH_TABLE_SIZE)
+static RetainerSet *hashTable[HASH_TABLE_SIZE];
+
+static Arena *arena; // arena in which we store retainer sets
+
+static int nextId; // id of next retainer set
+
+/* -----------------------------------------------------------------------------
+ * rs_MANY is a distinguished retainer set, such that
+ *
+ * isMember(e, rs_MANY) = True
+ *
+ * addElement(e, rs) = rs_MANY, if rs->num >= maxRetainerSetSize
+ * addElement(e, rs_MANY) = rs_MANY
+ *
+ * The point of rs_MANY is to keep the total number of retainer sets
+ * from growing too large.
+ * -------------------------------------------------------------------------- */
+RetainerSet rs_MANY = {
+ num : 0,
+ cost : 0,
+ hashKey : 0,
+ link : NULL,
+ id : 1,
+ element : {}
+};
+
+nat maxRetainerSetSize = 16;
+
+/* -----------------------------------------------------------------------------
+ * calculate the size of a RetainerSet structure
+ * -------------------------------------------------------------------------- */
+static inline size_t
+sizeofRetainerSet( int elems )
+{
+ return (sizeof(RetainerSet) + elems * sizeof(retainer));
+}
+
+/* -----------------------------------------------------------------------------
+ * Creates the first pool and initializes hashTable[].
+ * Frees all pools if any.
+ * -------------------------------------------------------------------------- */
+void
+initializeAllRetainerSet(void)
+{
+ int i;
+
+ arena = newArena();
+
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ hashTable[i] = NULL;
+ nextId = 2; // Initial value must be positive, 2 is MANY.
+}
+
+/* -----------------------------------------------------------------------------
+ * Refreshes all pools for reuse and initializes hashTable[].
+ * -------------------------------------------------------------------------- */
+void
+refreshAllRetainerSet(void)
+{
+ int i;
+
+ // Choose one of the following two approaches.
+
+#ifdef FIRST_APPROACH
+ // first approach: completely refresh
+ arenaFree(arena);
+ arena = newArena();
+
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ hashTable[i] = NULL;
+ nextId = 2;
+#endif // FIRST_APPROACH
+
+#ifdef SECOND_APPROACH
+ // second approach: leave all the retainer sets for reuse
+ RetainerSet *rs;
+ for (i = 0;i < HASH_TABLE_SIZE; i++) {
+ rs = hashTable[i];
+ while (rs != NULL) {
+ rs->cost = 0;
+ rs = rs->link;
+ }
+ }
+ rs_MANY.cost = 0;
+#endif // SECOND_APPROACH
+}
+
+/* -----------------------------------------------------------------------------
+ * Frees all pools.
+ * -------------------------------------------------------------------------- */
+void
+closeAllRetainerSet(void)
+{
+ arenaFree(arena);
+}
+
+/* -----------------------------------------------------------------------------
+ * Finds or creates if needed a singleton retainer set.
+ * -------------------------------------------------------------------------- */
+RetainerSet *
+singleton(retainer r)
+{
+ RetainerSet *rs;
+ StgWord hk;
+
+ hk = hashKeySingleton(r);
+ for (rs = hashTable[hash(hk)]; rs != NULL; rs = rs->link)
+ if (rs->num == 1 && rs->element[0] == r) return rs; // found it
+
+ // create it
+ rs = arenaAlloc( arena, sizeofRetainerSet(1) );
+ rs->num = 1;
+ rs->cost = 0;
+ rs->hashKey = hk;
+ rs->link = hashTable[hash(hk)];
+ rs->id = nextId++;
+ rs->element[0] = r;
+
+ // The new retainer set is placed at the head of the linked list.
+ hashTable[hash(hk)] = rs;
+
+ return rs;
+}
+
+/* -----------------------------------------------------------------------------
+ * Finds or creates a retainer set *rs augmented with r.
+ * Invariants:
+ * r is not a member of rs, i.e., isMember(r, rs) returns rtsFalse.
+ * rs is not NULL.
+ * Note:
+ * We could check if rs is NULL, in which case this function call
+ * reverts to singleton(). We do not choose this strategy because
+ * in most cases addElement() is invoked with non-NULL rs.
+ * -------------------------------------------------------------------------- */
+RetainerSet *
+addElement(retainer r, RetainerSet *rs)
+{
+ nat i;
+ nat nl; // Number of retainers in *rs Less than r
+ RetainerSet *nrs; // New Retainer Set
+ StgWord hk; // Hash Key
+
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "addElement(%p, %p) = ", r, rs);
+#endif
+
+ ASSERT(rs != NULL);
+ ASSERT(rs->num <= maxRetainerSetSize);
+
+ if (rs == &rs_MANY || rs->num == maxRetainerSetSize) {
+ return &rs_MANY;
+ }
+
+ ASSERT(!isMember(r, rs));
+
+ for (nl = 0; nl < rs->num; nl++)
+ if (r < rs->element[nl]) break;
+ // Now nl is the index for r into the new set.
+ // Also it denotes the number of retainers less than r in *rs.
+ // Thus, compare the first nl retainers, then r itself, and finally the
+ // remaining (rs->num - nl) retainers.
+
+ hk = hashKeyAddElement(r, rs);
+ for (nrs = hashTable[hash(hk)]; nrs != NULL; nrs = nrs->link) {
+ // test *rs and *nrs for equality
+
+ // check their size
+ if (rs->num + 1 != nrs->num) continue;
+
+ // compare the first nl retainers and find the first non-matching one.
+ for (i = 0; i < nl; i++)
+ if (rs->element[i] != nrs->element[i]) break;
+ if (i < nl) continue;
+
+ // compare r itself
+ if (r != nrs->element[i]) continue; // i == nl
+
+ // compare the remaining retainers
+ for (; i < rs->num; i++)
+ if (rs->element[i] != nrs->element[i + 1]) break;
+ if (i < rs->num) continue;
+
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "%p\n", nrs);
+#endif
+ // The set we are seeking already exists!
+ return nrs;
+ }
+
+ // create a new retainer set
+ nrs = arenaAlloc( arena, sizeofRetainerSet(rs->num + 1) );
+ nrs->num = rs->num + 1;
+ nrs->cost = 0;
+ nrs->hashKey = hk;
+ nrs->link = hashTable[hash(hk)];
+ nrs->id = nextId++;
+ for (i = 0; i < nl; i++) { // copy the first nl retainers
+ nrs->element[i] = rs->element[i];
+ }
+ nrs->element[i] = r; // copy r
+ for (; i < rs->num; i++) { // copy the remaining retainers
+ nrs->element[i + 1] = rs->element[i];
+ }
+
+ hashTable[hash(hk)] = nrs;
+
+#ifdef DEBUG_RETAINER
+ // fprintf(stderr, "%p\n", nrs);
+#endif
+ return nrs;
+}
+
+/* -----------------------------------------------------------------------------
+ * Call f() for each retainer set.
+ * -------------------------------------------------------------------------- */
+void
+traverseAllRetainerSet(void (*f)(RetainerSet *))
+{
+ int i;
+ RetainerSet *rs;
+
+ (*f)(&rs_MANY);
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ for (rs = hashTable[i]; rs != NULL; rs = rs->link)
+ (*f)(rs);
+}
+
+
+/* -----------------------------------------------------------------------------
+ * printRetainer() prints the full information on a given retainer,
+ * not a retainer set.
+ * -------------------------------------------------------------------------- */
+#if defined(RETAINER_SCHEME_INFO)
+// Retainer scheme 1: retainer = info table
+void
+printRetainer(FILE *f, retainer itbl)
+{
+ fprintf(f, "%s[%s]", itbl->prof.closure_desc, itbl->prof.closure_type);
+}
+#elif defined(RETAINER_SCHEME_CCS)
+// Retainer scheme 2: retainer = cost centre stack
+void
+printRetainer(FILE *f, retainer ccs)
+{
+ fprintCCS(f, ccs);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+void
+printRetainer(FILE *f, retainer cc)
+{
+ fprintf(f,"%s.%s", cc->module, cc->label);
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ * printRetainerSetShort() should always display the same output for
+ * a given retainer set regardless of the time of invocation.
+ * -------------------------------------------------------------------------- */
+#ifdef SECOND_APPROACH
+#if defined(RETAINER_SCHEME_INFO)
+// Retainer scheme 1: retainer = info table
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE 24
+ char tmp[MAX_RETAINER_SET_SPACE + 1];
+ int size;
+ nat j;
+
+ ASSERT(rs->id < 0);
+
+ tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+ // No blank characters are allowed.
+ sprintf(tmp + 0, "(%d)", -(rs->id));
+ size = strlen(tmp);
+ ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+ for (j = 0; j < rs->num; j++) {
+ if (j < rs->num - 1) {
+ strncpy(tmp + size, rs->element[j]->prof.closure_desc, MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ }
+ else {
+ strncpy(tmp + size, rs->element[j]->prof.closure_desc, MAX_RETAINER_SET_SPACE - size);
+ // size = strlen(tmp);
+ }
+ }
+ fprintf(f, tmp);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE 24
+ char tmp[MAX_RETAINER_SET_SPACE + 1];
+ int size;
+ nat j;
+
+}
+#elif defined(RETAINER_SCHEME_CCS)
+// Retainer scheme 2: retainer = cost centre stack
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE 24
+ char tmp[MAX_RETAINER_SET_SPACE + 1];
+ int size;
+ nat j;
+
+ ASSERT(rs->id < 0);
+
+ tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+ // No blank characters are allowed.
+ sprintf(tmp + 0, "(%d)", -(rs->id));
+ size = strlen(tmp);
+ ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+ for (j = 0; j < rs->num; j++) {
+ if (j < rs->num - 1) {
+ strncpy(tmp + size, rs->element[j]->cc->label, MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ }
+ else {
+ strncpy(tmp + size, rs->element[j]->cc->label, MAX_RETAINER_SET_SPACE - size);
+ // size = strlen(tmp);
+ }
+ }
+ fprintf(f, tmp);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+static void
+printRetainerSetShort(FILE *f, retainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE 24
+ char tmp[MAX_RETAINER_SET_SPACE + 1];
+ int size;
+ nat j;
+
+ ASSERT(rs->id < 0);
+
+ tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+ // No blank characters are allowed.
+ sprintf(tmp + 0, "(%d)", -(rs->id));
+ size = strlen(tmp);
+ ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+ for (j = 0; j < rs->num; j++) {
+ if (j < rs->num - 1) {
+ strncpy(tmp + size, rs->element[j]->label,
+ MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ }
+ else {
+ strncpy(tmp + size, rs->element[j]->label,
+ MAX_RETAINER_SET_SPACE - size);
+ // size = strlen(tmp);
+ }
+ }
+ fprintf(f, tmp);
+/*
+ #define MAX_RETAINER_SET_SPACE 24
+ #define DOT_NUMBER 3
+ // 1. 32 > MAX_RETAINER_SET_SPACE + 1 (1 for '\0')
+ // 2. (MAX_RETAINER_SET_SPACE - DOT_NUMBER ) characters should be enough for
+ // printing one natural number (plus '(' and ')').
+ char tmp[32];
+ int size, ts;
+ nat j;
+
+ ASSERT(rs->id < 0);
+
+ // No blank characters are allowed.
+ sprintf(tmp + 0, "(%d)", -(rs->id));
+ size = strlen(tmp);
+ ASSERT(size < MAX_RETAINER_SET_SPACE - DOT_NUMBER);
+
+ for (j = 0; j < rs->num; j++) {
+ ts = strlen(rs->element[j]->label);
+ if (j < rs->num - 1) {
+ if (size + ts + 1 > MAX_RETAINER_SET_SPACE - DOT_NUMBER) {
+ sprintf(tmp + size, "...");
+ break;
+ }
+ sprintf(tmp + size, "%s,", rs->element[j]->label);
+ size += ts + 1;
+ }
+ else {
+ if (size + ts > MAX_RETAINER_SET_SPACE - DOT_NUMBER) {
+ sprintf(tmp + size, "...");
+ break;
+ }
+ sprintf(tmp + size, "%s", rs->element[j]->label);
+ size += ts;
+ }
+ }
+ fprintf(f, tmp);
+*/
+}
+#endif /* RETAINER_SCHEME_CC */
+#endif /* SECOND_APPROACH */
+
+/* -----------------------------------------------------------------------------
+ * Print the statistics. This function is called after each
+ * retainer profiling. *allCost is set the sum of all costs retained
+ * by any retainer sets. *numSet is set to the number of all
+ * retainer sets (including those with 0 cost).
+ * -------------------------------------------------------------------------- */
+void
+outputRetainerSet( FILE *hp_file, nat *allCost, nat *numSet )
+{
+ nat i;
+#ifdef FIRST_APPROACH
+ nat j;
+#endif
+ RetainerSet *rs;
+ double duration;
+
+ *allCost = 0;
+ *numSet = 0;
+ duration = mut_user_time_during_RP();
+
+ fprintf(hp_file, "MARK %f\n", duration);
+ fprintf(hp_file, "BEGIN_SAMPLE %f\n", duration);
+
+ if (rs_MANY.cost > 0) {
+ fprintf(hp_file, "MANY\t%u\n", rs_MANY.cost * sizeof(StgWord));
+ }
+
+ for (i = 0; i < HASH_TABLE_SIZE; i++) {
+ for (rs = hashTable[i]; rs != NULL; rs = rs->link) {
+ (*numSet)++;
+ /*
+ Note: If rs->cost is 0, it means that there exists at
+ least one object which is retained by this retainer set
+ *rs temporarily. Since its new retainer set of this
+ object (replacing *rs) is at least larger than *rs, if
+ the cost of every object was a positive quantity, the
+ following invariants would hold: If rs->cost == 0, there
+ exists a retainer set rs' such that rs'->cost > 0 and
+ rs'->num > rs->num. However, static objects cost zero,
+ this does not hold. If we set the cost of each static
+ object to a positive quantity, it should hold, which is
+ actually the case.
+ */
+ if (rs->cost == 0)
+ continue;
+
+ *allCost += rs->cost;
+
+#ifdef SECOND_APPROACH
+ if (rs->id > 0) // if having a positive cost for the first time?
+ rs->id = -(rs->id); // mark as having a positive cost
+ // Now, this retainer set has a permanent negative id.
+
+ // report in the unit of bytes: * sizeof(StgWord)
+ printRetainerSetShort(hp_file, rs);
+ fprintf(hp_file, "\t%u\n", rs->cost * sizeof(StgWord));
+#endif
+
+#ifdef FIRST_APPROACH
+ fprintf(hp_file, "{");
+ for (j = 0; j < rs->num - 1; j++) {
+ printRetainer(hp_file, rs->element[j]);
+ fprintf(hp_file, ",");
+ }
+ printRetainer(hp_file, rs->element[j]);
+ fprintf(hp_file, "}\t%u\n", rs->cost * sizeof(StgWord));
+#endif
+ }
+ }
+ fprintf(hp_file, "END_SAMPLE %f\n", duration);
+}
+
+/*
+ This function is called at the exit of the program.
+ */
+#ifdef SECOND_APPROACH
+void
+outputAllRetainerSet(FILE *prof_file)
+{
+ nat i, j;
+ nat numSet;
+ RetainerSet *rs, **rsArray, *tmp;
+
+ // find out the number of retainer sets which have had a non-zero cost at
+ // least once during retainer profiling
+ numSet = 0;
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ for (rs = hashTable[i]; rs != NULL; rs = rs->link) {
+ if (rs->id < 0)
+ numSet++;
+ }
+
+ if (numSet == 0) // retainer profiling was not done at all.
+ return;
+
+ // allocate memory
+ rsArray = stgMallocBytes(numSet * sizeof(RetainerSet *),
+ "outputAllRetainerSet()");
+
+ // prepare for sorting
+ j = 0;
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ for (rs = hashTable[i]; rs != NULL; rs = rs->link) {
+ if (rs->id < 0) {
+ rsArray[j] = rs;
+ j++;
+ }
+ }
+
+ ASSERT(j == numSet);
+
+ // sort rsArray[] according to the id of each retainer set
+ for (i = numSet - 1; i > 0; i--) {
+ for (j = 0; j <= i - 1; j++) {
+ // if (-(rsArray[j]->id) < -(rsArray[j + 1]->id))
+ if (rsArray[j]->id < rsArray[j + 1]->id) {
+ tmp = rsArray[j];
+ rsArray[j] = rsArray[j + 1];
+ rsArray[j + 1] = tmp;
+ }
+ }
+ }
+
+ fprintf(prof_file, "\nRetainer sets created during profiling:\n");
+ for (i = 0;i < numSet; i++) {
+ fprintf(prof_file, "SET %u = {", -(rsArray[i]->id));
+ for (j = 0; j < rsArray[i]->num - 1; j++) {
+ printRetainer(prof_file, rsArray[i]->element[j]);
+ fprintf(prof_file, ", ");
+ }
+ printRetainer(prof_file, rsArray[i]->element[j]);
+ fprintf(prof_file, "}\n");
+ }
+
+ free(rsArray);
+}
+#endif // SECOND_APPROACH
+
+#endif /* PROFILING */
diff --git a/ghc/rts/RetainerSet.h b/ghc/rts/RetainerSet.h
new file mode 100644
index 0000000000..feed43e74a
--- /dev/null
+++ b/ghc/rts/RetainerSet.h
@@ -0,0 +1,139 @@
+/* -----------------------------------------------------------------------------
+ * $Id: RetainerSet.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer set interface for retainer profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+/*
+ Note:
+ There are two ways of maintaining all retainer sets. The first is simply by
+ freeing all the retainer sets and re-initialize the hash table at each
+ retainer profiling. The second is by setting the cost field of each
+ retainer set. The second is preferred to the first if most retainer sets
+ are likely to be observed again during the next retainer profiling. Note
+ that in the first approach, we do not free the memory allocated for
+ retainer sets; we just invalidate all retainer sets.
+ */
+#ifdef DEBUG_RETAINER
+// In thise case, FIRST_APPROACH must be turned on because the memory pool
+// for retainer sets is freed each time.
+#define FIRST_APPROACH
+#else
+// #define FIRST_APPROACH
+#define SECOND_APPROACH
+#endif
+
+// Creates the first pool and initializes a hash table. Frees all pools if any.
+void initializeAllRetainerSet(void);
+
+// Refreshes all pools for reuse and initializes a hash table.
+void refreshAllRetainerSet(void);
+
+// Frees all pools.
+void closeAllRetainerSet(void);
+
+// Finds or creates if needed a singleton retainer set.
+RetainerSet *singleton(retainer r);
+
+extern RetainerSet rs_MANY;
+
+// Checks if a given retainer is a memeber of the retainer set.
+//
+// Note & (maybe) Todo:
+// This function needs to be declared as an inline function, so it is declared
+// as an inline static function here.
+// This make the interface really bad, but isMember() returns a value, so
+// it is not easy either to write it as a macro (due to my lack of C
+// programming experience). Sungwoo
+//
+// rtsBool isMember(retainer, retainerSet *);
+/*
+ Returns rtsTrue if r is a member of *rs.
+ Invariants:
+ rs is not NULL.
+ Note:
+ The efficiency of this function is subject to the typical size of
+ retainer sets. If it is small, linear scan is better. If it
+ is large in most cases, binary scan is better.
+ The current implementation mixes the two search strategies.
+ */
+
+#define BINARY_SEARCH_THRESHOLD 8
+static inline rtsBool
+isMember(retainer r, RetainerSet *rs)
+{
+ int i, left, right; // must be int, not nat (because -1 can appear)
+ retainer ri;
+
+ if (rs == &rs_MANY) { return rtsTrue; }
+
+ if (rs->num < BINARY_SEARCH_THRESHOLD) {
+ for (i = 0; i < (int)rs->num; i++) {
+ ri = rs->element[i];
+ if (r == ri) return rtsTrue;
+ else if (r < ri) return rtsFalse;
+ }
+ } else {
+ left = 0;
+ right = rs->num - 1;
+ while (left <= right) {
+ i = (left + right) / 2;
+ ri = rs->element[i];
+ if (r == ri) return rtsTrue;
+ else if (r < ri) right = i - 1;
+ else left = i + 1;
+ }
+ }
+ return rtsFalse;
+}
+
+// Finds or creates a retainer set augmented with a new retainer.
+RetainerSet *addElement(retainer, RetainerSet *);
+
+// Call f() for each retainer set.
+void traverseAllRetainerSet(void (*f)(RetainerSet *));
+
+#ifdef SECOND_APPROACH
+// Prints a single retainer set.
+void printRetainerSetShort(FILE *, RetainerSet *);
+#endif
+
+// Print the statistics on all the retainer sets.
+// store the sum of all costs and the number of all retainer sets.
+void outputRetainerSet(FILE *, nat *, nat *);
+
+#ifdef SECOND_APPROACH
+// Print all retainer sets at the exit of the program.
+void outputAllRetainerSet(FILE *);
+#endif
+
+// Hashing functions
+/*
+ Invariants:
+ Once either initializeAllRetainerSet() or refreshAllRetainerSet()
+ is called, there exists only one copy of any retainer set created
+ through singleton() and addElement(). The pool (the storage for
+ retainer sets) is consumed linearly. All the retainer sets of the
+ same hash function value are linked together from an element in
+ hashTable[]. See the invariants of allocateInPool() for the
+ maximum size of retainer sets. The hashing function is defined by
+ hashKeySingleton() and hashKeyAddElement(). The hash key for a set
+ must be unique regardless of the order its elements are inserted,
+ i.e., the hashing function must be additive(?).
+*/
+#define hashKeySingleton(r) ((StgWord)(r))
+#define hashKeyAddElement(r, s) (hashKeySingleton((r)) + (s)->hashKey)
+
+// Prints the full information on a given retainer.
+// Note: This function is not part of retainerSet interface, but this is
+// the best place to define it.
+void printRetainer(FILE *, retainer);
+
+#endif /* PROFILING */
+
diff --git a/ghc/rts/RtsFlags.c b/ghc/rts/RtsFlags.c
index d070c130fe..e23346b7b3 100644
--- a/ghc/rts/RtsFlags.c
+++ b/ghc/rts/RtsFlags.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: RtsFlags.c,v 1.51 2001/10/01 11:36:28 simonmar Exp $
+ * $Id: RtsFlags.c,v 1.52 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The AQUA Project, Glasgow University, 1994-1997
* (c) The GHC Team, 1998-1999
@@ -250,12 +250,13 @@ void initRtsFlagsDefaults(void)
#ifdef PROFILING
RtsFlags.ProfFlags.doHeapProfile = rtsFalse;
- RtsFlags.ProfFlags.profileFrequency = 20;
+ RtsFlags.ProfFlags.profileInterval = 20;
RtsFlags.ProfFlags.showCCSOnException = rtsFalse;
RtsFlags.ProfFlags.modSelector = NULL;
RtsFlags.ProfFlags.descrSelector = NULL;
RtsFlags.ProfFlags.typeSelector = NULL;
RtsFlags.ProfFlags.ccSelector = NULL;
+
#elif defined(DEBUG)
RtsFlags.ProfFlags.doHeapProfile = rtsFalse;
#endif
@@ -417,6 +418,8 @@ usage_text[] = {
" -h<break-down> Heap residency profile (text) (output file <program>.prof)",
" break-down: C = cost centre stack (default), M = module",
" D = closure description, Y = type description",
+" -hR Retainer profile (output files <program>.hp)",
+" -hL Lag/Drag/Void/Use profile (output files <program>.hp)",
" A subset of closures may be selected thusly:",
" -hc{cc, cc ...} specific cost centre(s) (NOT STACKS!)",
" -hm{mod,mod...} all cost centres from the specified modules(s)",
@@ -838,18 +841,53 @@ error = rtsTrue;
PROFILING_BUILD_ONLY(
switch (rts_argv[arg][2]) {
case '\0':
- case CCchar:
- RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS;
- break;
- case MODchar:
- RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD;
- break;
- case DESCRchar:
- RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR;
- break;
- case TYPEchar:
- RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE;
- break;
+ case 'C':
+ if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS;
+ break;
+ } else {
+ goto many_hps;
+ }
+ case 'M':
+ if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD;
+ break;
+ } else {
+ goto many_hps;
+ }
+ case 'D':
+ if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR;
+ break;
+ } else {
+ goto many_hps;
+ }
+ case 'Y':
+ if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE;
+ break;
+ } else {
+ goto many_hps;
+ }
+ case 'R':
+ if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_RETAINER;
+ break;
+ } else {
+ goto many_hps;
+ }
+ case 'L':
+ if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_LDV;
+ break;
+ } else {
+ goto many_hps;
+ }
+ many_hps:
+ prog_belch("multiple heap profile options");
+ error = rtsTrue;
+ break;
+
case 'c': /* cost centre label select */
case 'm': /* cost centre module select */
case 'd': /* closure descr select */
@@ -904,11 +942,9 @@ error = rtsTrue;
if (cst != 0 && cst < CS_MIN_MILLISECS)
cst = CS_MIN_MILLISECS;
- RtsFlags.ProfFlags.profileFrequency = cst;
+ RtsFlags.ProfFlags.profileInterval = cst;
}
-
break;
-
#endif
/* =========== CONCURRENT ========================= */
diff --git a/ghc/rts/RtsStartup.c b/ghc/rts/RtsStartup.c
index 87c804fd17..69de672294 100644
--- a/ghc/rts/RtsStartup.c
+++ b/ghc/rts/RtsStartup.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: RtsStartup.c,v 1.55 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: RtsStartup.c,v 1.56 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -33,6 +33,7 @@
#if defined(PROFILING) || defined(DEBUG)
# include "Profiling.h"
# include "ProfHeap.h"
+# include "RetainerProfile.h"
#endif
#if defined(GRAN)
@@ -84,7 +85,7 @@ getProgArgv(int *argc, char **argv[])
void
startupHaskell(int argc, char *argv[], void (*init_root)(void))
{
- /* To avoid repeated initialisations of the RTS */
+ /* To avoid repeated initialisations of the RTS */
if (rts_has_started_up)
return;
else
@@ -218,13 +219,14 @@ startupHaskell(int argc, char *argv[], void (*init_root)(void))
/* The init functions use an explicit stack...
*/
-#define INIT_STACK_SIZE (BLOCK_SIZE * 4)
+#define INIT_STACK_BLOCKS 4
F_ *init_stack = NULL;
nat init_sp = 0;
static void
initModules ( void (*init_root)(void) )
{
+ bdescr *bd;
#ifdef SMP
Capability cap;
#else
@@ -232,7 +234,8 @@ initModules ( void (*init_root)(void) )
#endif
init_sp = 0;
- init_stack = (F_ *)allocate(INIT_STACK_SIZE / sizeof(W_));
+ bd = allocGroup(4);
+ init_stack = (F_ *)bd->start;
init_stack[init_sp++] = (F_)stg_init_ret;
init_stack[init_sp++] = (F_)__stginit_Prelude;
if (init_root != NULL) {
@@ -241,6 +244,8 @@ initModules ( void (*init_root)(void) )
cap.r.rSp = (P_)(init_stack + init_sp);
StgRun((StgFunPtr)stg_init, &cap.r);
+
+ freeGroup(bd);
}
/* -----------------------------------------------------------------------------
@@ -272,6 +277,26 @@ shutdownHaskell(void)
/* start timing the shutdown */
stat_startExit();
+#ifdef PROFILING
+ // @LDV profiling
+ //
+ // Note:
+ // We do not need to perform a major garbage collection because all the
+ // closures created since the last census will not affect the profiling
+ // statistics anyhow.
+ //
+ // Note:
+ // We ignore any object created afterwards.
+ // finalizeWeakPointersNow() may corrupt the heap (because it executes
+ // rts_evalIO(), which adds an initial evaluation stack again).
+ // Thus, we call LdvCensusKillAll() here, and prohibit LDV profiling
+ // afterwards.
+ // Acutally, it is pointless to call LdvCensusKillAll() any later because
+ // no object created later will be taken into account for profiling.
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV)
+ LdvCensusKillAll();
+#endif
+
#if !defined(GRAN)
/* Finalize any remaining weak pointers */
finalizeWeakPointersNow();
@@ -316,12 +341,19 @@ shutdownHaskell(void)
}
#endif
+#if defined(PROFILING)
+ report_ccs_profiling();
+#endif
+
#if defined(PROFILING) || defined(DEBUG)
endProfiling();
#endif
-#if defined(PROFILING)
- report_ccs_profiling();
+#ifdef PROFILING
+ // Originally, this was in report_ccs_profiling(). Now, retainer
+ // profiling might tack some extra stuff on to the end of this file
+ // during endProfiling().
+ fclose(prof_file);
#endif
#if defined(TICKY_TICKY)
diff --git a/ghc/rts/Schedule.c b/ghc/rts/Schedule.c
index ec0ac22b9f..256aab93a2 100644
--- a/ghc/rts/Schedule.c
+++ b/ghc/rts/Schedule.c
@@ -1,5 +1,5 @@
/* ---------------------------------------------------------------------------
- * $Id: Schedule.c,v 1.106 2001/11/08 16:17:35 simonmar Exp $
+ * $Id: Schedule.c,v 1.107 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -95,6 +95,12 @@
#include "Stats.h"
#include "Itimer.h"
#include "Prelude.h"
+#ifdef PROFILING
+#include "Proftimer.h"
+#include "ProfHeap.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
+#endif
#if defined(GRAN) || defined(PAR)
# include "GranSimRts.h"
# include "GranSim.h"
@@ -181,7 +187,6 @@ StgTSO *all_threads;
*/
static StgTSO *suspended_ccalling_threads;
-static void GetRoots(evac_fn);
static StgTSO *threadStackOverflow(StgTSO *tso);
/* KH: The following two flags are shared memory locations. There is no need
@@ -923,10 +928,14 @@ schedule( void )
* the user specified "context switch as often as possible", with
* +RTS -C0
*/
- if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
- && (run_queue_hd != END_TSO_QUEUE
- || blocked_queue_hd != END_TSO_QUEUE
- || sleeping_queue != END_TSO_QUEUE))
+ if (
+#ifdef PROFILING
+ RtsFlags.ProfFlags.profileInterval == 0 ||
+#endif
+ (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
+ && (run_queue_hd != END_TSO_QUEUE
+ || blocked_queue_hd != END_TSO_QUEUE
+ || sleeping_queue != END_TSO_QUEUE)))
context_switch = 1;
else
context_switch = 0;
@@ -936,6 +945,10 @@ schedule( void )
IF_DEBUG(scheduler, sched_belch("-->> Running TSO %ld (%p) %s ...",
t->id, t, whatNext_strs[t->what_next]));
+#ifdef PROFILING
+ startHeapProfTimer();
+#endif
+
/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
/* Run the current thread
*/
@@ -961,6 +974,7 @@ schedule( void )
/* Costs for the scheduler are assigned to CCS_SYSTEM */
#ifdef PROFILING
+ stopHeapProfTimer();
CCCS = CCS_SYSTEM;
#endif
@@ -1262,6 +1276,39 @@ schedule( void )
n_free_capabilities++;
#endif
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.profileInterval==0 || performHeapProfile) {
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
+ //
+ // Note: currently retainer profiling is performed after
+ // a major garbage collection.
+ //
+ GarbageCollect(GetRoots, rtsTrue);
+ retainerProfile();
+ } else if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+ //
+ // We have LdvCensus() preceded by a major garbage
+ // collection because we don't want *genuinely* dead
+ // closures to be involved in LDV profiling. Another good
+ // reason is to produce consistent profiling results
+ // regardless of the interval at which GCs are performed.
+ // In other words, we want LDV profiling results to be
+ // completely independent of the GC interval.
+ //
+ GarbageCollect(GetRoots, rtsTrue);
+ LdvCensus();
+ } else {
+ //
+ // Normal creator-based heap profile
+ //
+ GarbageCollect(GetRoots, rtsTrue);
+ heapCensus();
+ }
+ performHeapProfile = rtsFalse;
+ ready_to_gc = rtsFalse; // we already GC'd
+ }
+#endif
+
#ifdef SMP
if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes)
#else
@@ -2170,7 +2217,7 @@ take_off_run_queue(StgTSO *tso) {
KH @ 25/10/99
*/
-static void
+void
GetRoots(evac_fn evac)
{
StgMainThread *m;
diff --git a/ghc/rts/Schedule.h b/ghc/rts/Schedule.h
index f8976ebb00..b81087d9a2 100644
--- a/ghc/rts/Schedule.h
+++ b/ghc/rts/Schedule.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Schedule.h,v 1.24 2001/11/13 13:38:02 simonmar Exp $
+ * $Id: Schedule.h,v 1.25 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team 1998-1999
*
@@ -93,6 +93,15 @@ void awaitEvent(rtsBool wait); /* In Select.c */
*/
rtsBool wakeUpSleepingThreads(nat); /* In Select.c */
+/* GetRoots(evac_fn f)
+ *
+ * Call f() for each root known to the scheduler.
+ *
+ * Called from STG : NO
+ * Locks assumed : ????
+ */
+void GetRoots(evac_fn);
+
// ToDo: check whether all fcts below are used in the SMP version, too
//@cindex awaken_blocked_queue
#if defined(GRAN)
diff --git a/ghc/rts/Stats.c b/ghc/rts/Stats.c
index 1ebd5a7bf7..a82f395094 100644
--- a/ghc/rts/Stats.c
+++ b/ghc/rts/Stats.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Stats.c,v 1.35 2001/11/20 21:39:12 sof Exp $
+ * $Id: Stats.c,v 1.36 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -19,6 +19,7 @@
#include "Schedule.h"
#include "Stats.h"
#include "ParTicky.h" // ToDo: move into Rts.h
+#include "Profiling.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -93,8 +94,14 @@ static TICK_TYPE ExitElapsedTime = 0;
static ullong GC_tot_alloc = 0;
static ullong GC_tot_copied = 0;
-static TICK_TYPE GC_start_time, GC_tot_time = 0; /* User GC Time */
-static TICK_TYPE GCe_start_time, GCe_tot_time = 0; /* Elapsed GC time */
+static TICK_TYPE GC_start_time, GC_tot_time = 0; // User GC Time
+static TICK_TYPE GCe_start_time, GCe_tot_time = 0; // Elapsed GC time
+
+static TICK_TYPE RP_start_time, RP_tot_time = 0; // retainer prof user time
+static TICK_TYPE RPe_start_time, RPe_tot_time = 0; // retainer prof elap time
+
+static TICK_TYPE LDV_start_time, LDV_tot_time = 0; // LDV prof user time
+static TICK_TYPE LDVe_start_time, LDVe_tot_time = 0; // LDV prof elap time
lnat MaxResidency = 0; /* in words; for stats only */
lnat AvgResidency = 0;
@@ -210,16 +217,33 @@ getTimes(void)
* stat_startGC() for details)
*/
double
-mut_user_time_during_GC(void)
+mut_user_time_during_GC( void )
{
- return TICK_TO_DBL(GC_start_time - GC_tot_time);
+ return TICK_TO_DBL(GC_start_time - GC_tot_time - RP_tot_time - LDV_tot_time);
}
double
-mut_user_time(void)
+mut_user_time( void )
{
getTimes();
- return TICK_TO_DBL(CurrentUserTime - GC_tot_time);
+ return TICK_TO_DBL(CurrentUserTime - GC_tot_time - RP_tot_time - LDV_tot_time);
+}
+
+/*
+ mut_user_time_during_RP() is similar to mut_user_time_during_GC();
+ it returns the MUT time during retainer profiling.
+ The same is for mut_user_time_during_LDV();
+ */
+double
+mut_user_time_during_RP( void )
+{
+ return TICK_TO_DBL(RP_start_time - GC_tot_time - RP_tot_time - LDV_tot_time);
+}
+
+double
+mut_user_time_during_LDV( void )
+{
+ return TICK_TO_DBL(LDV_start_time - GC_tot_time - RP_tot_time - LDV_tot_time);
}
static nat
@@ -332,7 +356,7 @@ stat_startExit(void)
#ifdef SMP
MutUserTime = CurrentUserTime;
#else
- MutUserTime = CurrentUserTime - GC_tot_time - InitUserTime;
+ MutUserTime = CurrentUserTime - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime;
if (MutUserTime < 0) { MutUserTime = 0; }
#endif
}
@@ -344,7 +368,7 @@ stat_endExit(void)
#ifdef SMP
ExitUserTime = CurrentUserTime - MutUserTime;
#else
- ExitUserTime = CurrentUserTime - MutUserTime - GC_tot_time - InitUserTime;
+ ExitUserTime = CurrentUserTime - MutUserTime - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime;
#endif
ExitElapsedTime = CurrentElapsedTime - MutElapsedStamp;
if (ExitUserTime < 0) {
@@ -471,6 +495,64 @@ stat_endGC(lnat alloc, lnat collect, lnat live, lnat copied, lnat gen)
}
/* -----------------------------------------------------------------------------
+ Called at the beginning of each Retainer Profiliing
+ -------------------------------------------------------------------------- */
+void stat_startRP(void)
+{
+ getTimes();
+ RP_start_time = CurrentUserTime;
+ RPe_start_time = CurrentElapsedTime;
+}
+
+/* -----------------------------------------------------------------------------
+ Called at the end of each Retainer Profiliing
+ -------------------------------------------------------------------------- */
+void stat_endRP(
+ nat retainerGeneration,
+#ifdef DEBUG_RETAINER
+ nat maxCStackSize,
+ int maxStackSize,
+#endif
+ double averageNumVisit,
+ nat allCost,
+ nat numSet)
+{
+ getTimes();
+ RP_tot_time += CurrentUserTime - RP_start_time;
+ RPe_tot_time += CurrentElapsedTime - RPe_start_time;
+
+ fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
+ retainerGeneration, mut_user_time_during_RP());
+#ifdef DEBUG_RETAINER
+ fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
+ fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
+#endif
+ fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
+ fprintf(prof_file, "\tCurrent total costs in bytes = %u\n", allCost * sizeof(StgWord));
+ fprintf(prof_file, "\tNumber of retainer sets = %u\n\n", numSet);
+}
+
+/* -----------------------------------------------------------------------------
+ Called at the beginning of each LDV Profiliing
+ -------------------------------------------------------------------------- */
+void stat_startLDV(void)
+{
+ getTimes();
+ LDV_start_time = CurrentUserTime;
+ LDVe_start_time = CurrentElapsedTime;
+}
+
+/* -----------------------------------------------------------------------------
+ Called at the end of each LDV Profiliing
+ -------------------------------------------------------------------------- */
+void stat_endLDV(void)
+{
+ getTimes();
+ LDV_tot_time += CurrentUserTime - LDV_start_time;
+ LDVe_tot_time += CurrentElapsedTime - LDVe_start_time;
+}
+
+/* -----------------------------------------------------------------------------
stat_workerStop
Called under SMP when a worker thread finishes. We drop the timing
@@ -598,6 +680,14 @@ stat_exit(int alloc)
TICK_TO_DBL(MutUserTime), TICK_TO_DBL(MutElapsedTime));
fprintf(sf, " GC time %6.2fs (%6.2fs elapsed)\n",
TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time));
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
+ fprintf(sf, " RP time %6.2fs (%6.2fs elapsed)\n",
+ TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time));
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV)
+ fprintf(sf, " LDV time %6.2fs (%6.2fs elapsed)\n",
+ TICK_TO_DBL(LDV_tot_time), TICK_TO_DBL(LDVe_tot_time));
+#endif
fprintf(sf, " EXIT time %6.2fs (%6.2fs elapsed)\n",
TICK_TO_DBL(ExitUserTime), TICK_TO_DBL(ExitElapsedTime));
fprintf(sf, " Total time %6.2fs (%6.2fs elapsed)\n\n",
@@ -606,20 +696,20 @@ stat_exit(int alloc)
TICK_TO_DBL(GC_tot_time)*100/TICK_TO_DBL(time),
TICK_TO_DBL(GCe_tot_time)*100/TICK_TO_DBL(etime));
- if (time - GC_tot_time == 0)
+ if (time - GC_tot_time - RP_tot_time - LDV_tot_time == 0)
ullong_format_string(0, temp, rtsTrue/*commas*/);
else
ullong_format_string(
(ullong)((GC_tot_alloc*sizeof(W_))/
- TICK_TO_DBL(time - GC_tot_time)),
+ TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time)),
temp, rtsTrue/*commas*/);
fprintf(sf, " Alloc rate %s bytes per MUT second\n\n", temp);
fprintf(sf, " Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
- TICK_TO_DBL(time - GC_tot_time - InitUserTime) * 100
+ TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime) * 100
/ TICK_TO_DBL(time),
- TICK_TO_DBL(time - GC_tot_time - InitUserTime) * 100
+ TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime) * 100
/ TICK_TO_DBL(etime));
}
diff --git a/ghc/rts/Stats.h b/ghc/rts/Stats.h
index b5c9826a8b..535bef3a08 100644
--- a/ghc/rts/Stats.h
+++ b/ghc/rts/Stats.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Stats.h,v 1.11 2001/07/23 17:23:20 simonmar Exp $
+ * $Id: Stats.h,v 1.12 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -14,6 +14,16 @@ extern void stat_startGC(void);
extern void stat_endGC(lnat alloc, lnat collect, lnat live,
lnat copied, lnat gen);
+extern void stat_startRP(void);
+extern void stat_endRP(nat,
+#ifdef DEBUG_RETAINER
+ nat, int,
+#endif
+ double, nat, nat);
+
+extern void stat_startLDV(void);
+extern void stat_endLDV(void);
+
extern void stat_startExit(void);
extern void stat_endExit(void);
@@ -23,6 +33,11 @@ extern void stat_workerStop(void);
extern void initStats(void);
extern double mut_user_time_during_GC(void);
+#ifdef PROFILING
+// @retainer profiling
+extern double mut_user_time_during_RP(void);
+extern double mut_user_time_during_LDV(void);
+#endif
extern double mut_user_time(void);
extern void statDescribeGens( void );
diff --git a/ghc/rts/StgMiscClosures.hc b/ghc/rts/StgMiscClosures.hc
index de36bea1ff..63da5b16d5 100644
--- a/ghc/rts/StgMiscClosures.hc
+++ b/ghc/rts/StgMiscClosures.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: StgMiscClosures.hc,v 1.69 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: StgMiscClosures.hc,v 1.70 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -288,19 +288,18 @@ STGFUN(stg_BCO_entry) {
Entry code for an indirection.
-------------------------------------------------------------------------- */
-INFO_TABLE(stg_IND_info,stg_IND_entry,1,0,IND,,EF_,0,0);
+INFO_TABLE(stg_IND_info,stg_IND_entry,1,0,IND,,EF_,"IND","IND");
STGFUN(stg_IND_entry)
{
FB_
TICK_ENT_IND(Node); /* tick */
-
R1.p = (P_) ((StgInd*)R1.p)->indirectee;
TICK_ENT_VIA_NODE();
JMP_(ENTRY_CODE(*R1.p));
FE_
}
-INFO_TABLE(stg_IND_STATIC_info,stg_IND_STATIC_entry,1,0,IND_STATIC,,EF_,0,0);
+INFO_TABLE(stg_IND_STATIC_info,stg_IND_STATIC_entry,1,0,IND_STATIC,,EF_,"IND_STATIC","IND_STATIC");
STGFUN(stg_IND_STATIC_entry)
{
FB_
@@ -323,6 +322,8 @@ STGFUN(stg_IND_PERM_entry)
TICK_ENT_PERM_IND(R1.p); /* tick */
#endif
+ LDV_ENTER((StgInd *)R1.p);
+
/* Enter PAP cost centre -- lexical scoping only */
ENTER_CCS_PAP_CL(R1.cl);
@@ -353,19 +354,18 @@ STGFUN(stg_IND_PERM_entry)
FE_
}
-INFO_TABLE(stg_IND_OLDGEN_info,stg_IND_OLDGEN_entry,1,1,IND_OLDGEN,,EF_,0,0);
+INFO_TABLE(stg_IND_OLDGEN_info,stg_IND_OLDGEN_entry,1,1,IND_OLDGEN,,EF_,"IND_OLDGEN","IND_OLDGEN");
STGFUN(stg_IND_OLDGEN_entry)
{
FB_
TICK_ENT_IND(Node); /* tick */
-
R1.p = (P_) ((StgInd*)R1.p)->indirectee;
TICK_ENT_VIA_NODE();
JMP_(ENTRY_CODE(*R1.p));
FE_
}
-INFO_TABLE(stg_IND_OLDGEN_PERM_info,stg_IND_OLDGEN_PERM_entry,1,1,IND_OLDGEN_PERM,,EF_,0,0);
+INFO_TABLE(stg_IND_OLDGEN_PERM_info,stg_IND_OLDGEN_PERM_entry,1,1,IND_OLDGEN_PERM,,EF_,"IND_OLDGEN_PERM","IND_OLDGEN_PERM");
STGFUN(stg_IND_OLDGEN_PERM_entry)
{
FB_
@@ -375,7 +375,9 @@ STGFUN(stg_IND_OLDGEN_PERM_entry)
/* TICKY_TICKY && !PROFILING means PERM_IND *replaces* an IND, rather than being extra */
TICK_ENT_PERM_IND(R1.p); /* tick */
#endif
-
+
+ LDV_ENTER((StgInd *)R1.p);
+
/* Enter PAP cost centre -- lexical scoping only */
ENTER_CCS_PAP_CL(R1.cl);
@@ -433,7 +435,10 @@ STGFUN(stg_BLACKHOLE_entry)
#endif
TICK_ENT_BH();
- // Put ourselves on the blocking queue for this black hole
+ // Actually this is not necessary because R1.p is about to be destroyed.
+ LDV_ENTER((StgClosure *)R1.p);
+
+ /* Put ourselves on the blocking queue for this black hole */
#if defined(GRAN) || defined(PAR)
// in fact, only difference is the type of the end-of-queue marker!
CurrentTSO->link = END_BQ_QUEUE;
@@ -446,8 +451,19 @@ STGFUN(stg_BLACKHOLE_entry)
CurrentTSO->why_blocked = BlockedOnBlackHole;
CurrentTSO->block_info.closure = R1.cl;
- // Change the CAF_BLACKHOLE into a BLACKHOLE_BQ_STATIC
+ /* Change the BLACKHOLE into a BLACKHOLE_BQ */
+#ifdef PROFILING
+
+ // The size remains the same, so we call LDV_recordDead() - no need to fill slop.
+ LDV_recordDead((StgClosure *)R1.p, BLACKHOLE_sizeW());
+#endif
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+ //
((StgBlockingQueue *)R1.p)->header.info = &stg_BLACKHOLE_BQ_info;
+#ifdef PROFILING
+ LDV_recordCreate((StgClosure *)R1.p);
+#endif
// closure is mutable since something has just been added to its BQ
recordMutable((StgMutClosure *)R1.cl);
@@ -483,6 +499,7 @@ STGFUN(stg_BLACKHOLE_BQ_entry)
#endif
TICK_ENT_BH();
+ LDV_ENTER((StgClosure *)R1.p);
/* Put ourselves on the blocking queue for this black hole */
CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue;
@@ -515,7 +532,7 @@ STGFUN(stg_BLACKHOLE_BQ_entry)
#if defined(PAR) || defined(GRAN)
-INFO_TABLE(stg_RBH_info, stg_RBH_entry,1,1,RBH,,EF_,0,0);
+INFO_TABLE(stg_RBH_info, stg_RBH_entry,1,1,RBH,,EF_,"RBH","RBH");
STGFUN(stg_RBH_entry)
{
FB_
@@ -539,13 +556,13 @@ STGFUN(stg_RBH_entry)
FE_
}
-INFO_TABLE(stg_RBH_Save_0_info, stg_RBH_Save_0_entry,0,2,CONSTR,,EF_,0,0);
+INFO_TABLE(stg_RBH_Save_0_info, stg_RBH_Save_0_entry,0,2,CONSTR,,EF_,"RBH_Save_0","RBH_Save_0");
NON_ENTERABLE_ENTRY_CODE(RBH_Save_0);
-INFO_TABLE(stg_RBH_Save_1_info, stg_RBH_Save_1_entry,1,1,CONSTR,,EF_,0,0);
+INFO_TABLE(stg_RBH_Save_1_info, stg_RBH_Save_1_entry,1,1,CONSTR,,EF_,"RBH_Save_1","RBH_Save_1");
NON_ENTERABLE_ENTRY_CODE(RBH_Save_1);
-INFO_TABLE(stg_RBH_Save_2_info, stg_RBH_Save_2_entry,2,0,CONSTR,,EF_,0,0);
+INFO_TABLE(stg_RBH_Save_2_info, stg_RBH_Save_2_entry,2,0,CONSTR,,EF_,"RBH_Save_2","RBH_Save_2");
NON_ENTERABLE_ENTRY_CODE(RBH_Save_2);
#endif /* defined(PAR) || defined(GRAN) */
@@ -574,6 +591,7 @@ STGFUN(stg_CAF_BLACKHOLE_entry)
#endif
TICK_ENT_BH();
+ LDV_ENTER((StgClosure *)R1.p);
// Put ourselves on the blocking queue for this black hole
#if defined(GRAN) || defined(PAR)
@@ -602,7 +620,7 @@ STGFUN(stg_CAF_BLACKHOLE_entry)
}
#ifdef TICKY_TICKY
-INFO_TABLE(stg_SE_BLACKHOLE_info, stg_SE_BLACKHOLE_entry,0,2,SE_BLACKHOLE,,EF_,0,0);
+INFO_TABLE(stg_SE_BLACKHOLE_info, stg_SE_BLACKHOLE_entry,0,2,SE_BLACKHOLE,,EF_,"SE_BLACKHOLE","SE_BLACKHOLE");
STGFUN(stg_SE_BLACKHOLE_entry)
{
FB_
@@ -611,7 +629,7 @@ STGFUN(stg_SE_BLACKHOLE_entry)
FE_
}
-INFO_TABLE(SE_CAF_BLACKHOLE_info, SE_CAF_BLACKHOLE_entry,0,2,SE_CAF_BLACKHOLE,,EF_,0,0);
+INFO_TABLE(SE_CAF_BLACKHOLE_info, SE_CAF_BLACKHOLE_entry,0,2,SE_CAF_BLACKHOLE,,EF_,"CAF_BLACKHOLE","CAF_BLACKHOLE");
STGFUN(stg_SE_CAF_BLACKHOLE_entry)
{
FB_
@@ -622,7 +640,7 @@ STGFUN(stg_SE_CAF_BLACKHOLE_entry)
#endif
#ifdef SMP
-INFO_TABLE(stg_WHITEHOLE_info, stg_WHITEHOLE_entry,0,2,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE(stg_WHITEHOLE_info, stg_WHITEHOLE_entry,0,2,CONSTR_NOCAF_STATIC,,EF_,"WHITEHOLE","WHITEHOLE");
STGFUN(stg_WHITEHOLE_entry)
{
FB_
@@ -645,7 +663,7 @@ NON_ENTERABLE_ENTRY_CODE(TSO);
one is a real bug.
-------------------------------------------------------------------------- */
-INFO_TABLE(stg_EVACUATED_info,stg_EVACUATED_entry,1,0,EVACUATED,,EF_,0,0);
+INFO_TABLE(stg_EVACUATED_info,stg_EVACUATED_entry,1,0,EVACUATED,,EF_,"EVACUATED","EVACUATED");
NON_ENTERABLE_ENTRY_CODE(EVACUATED);
/* -----------------------------------------------------------------------------
@@ -659,7 +677,19 @@ NON_ENTERABLE_ENTRY_CODE(EVACUATED);
INFO_TABLE(stg_WEAK_info,stg_WEAK_entry,0,4,WEAK,,EF_,"WEAK","WEAK");
NON_ENTERABLE_ENTRY_CODE(WEAK);
-INFO_TABLE_CONSTR(stg_DEAD_WEAK_info,stg_DEAD_WEAK_entry,0,1,0,CONSTR,,EF_,"DEAD_WEAK","DEAD_WEAK");
+// XXX! The garbage collector replaces a WEAK with a DEAD_WEAK
+// in-place, which causes problems if the heap is scanned linearly
+// after GC (certain kinds of profiling do this). So when profiling,
+// we set the size of a DEAD_WEAK to 4 non-pointers, rather than its
+// usual 1.
+
+#ifdef PROFILING
+#define DEAD_WEAK_PAYLOAD_WORDS 4
+#else
+#define DEAD_WEAK_PAYLOAD_WORDS 1
+#endif
+
+INFO_TABLE_CONSTR(stg_DEAD_WEAK_info,stg_DEAD_WEAK_entry,0,DEAD_WEAK_PAYLOAD_WORDS,0,CONSTR,,EF_,"DEAD_WEAK","DEAD_WEAK");
NON_ENTERABLE_ENTRY_CODE(DEAD_WEAK);
/* -----------------------------------------------------------------------------
@@ -669,7 +699,7 @@ NON_ENTERABLE_ENTRY_CODE(DEAD_WEAK);
finalizer in a weak pointer object.
-------------------------------------------------------------------------- */
-INFO_TABLE_CONSTR(stg_NO_FINALIZER_info,stg_NO_FINALIZER_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE_CONSTR(stg_NO_FINALIZER_info,stg_NO_FINALIZER_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"NO_FINALIZER","NO_FINALIZER");
NON_ENTERABLE_ENTRY_CODE(NO_FINALIZER);
SET_STATIC_HDR(stg_NO_FINALIZER_closure,stg_NO_FINALIZER_info,0/*CC*/,,EI_)
@@ -709,7 +739,7 @@ NON_ENTERABLE_ENTRY_CODE(EMPTY_MVAR);
end of a linked TSO queue.
-------------------------------------------------------------------------- */
-INFO_TABLE_CONSTR(stg_END_TSO_QUEUE_info,stg_END_TSO_QUEUE_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE_CONSTR(stg_END_TSO_QUEUE_info,stg_END_TSO_QUEUE_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_TSO_QUEUE","END_TSO_QUEUE");
NON_ENTERABLE_ENTRY_CODE(END_TSO_QUEUE);
SET_STATIC_HDR(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE_info,0/*CC*/,,EI_)
@@ -723,26 +753,26 @@ SET_STATIC_HDR(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE_info,0/*CC*/,,EI_)
an END_MUT_LIST closure.
-------------------------------------------------------------------------- */
-INFO_TABLE_CONSTR(stg_END_MUT_LIST_info,stg_END_MUT_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE_CONSTR(stg_END_MUT_LIST_info,stg_END_MUT_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_MUT_LIST","END_MUT_LIST");
NON_ENTERABLE_ENTRY_CODE(END_MUT_LIST);
SET_STATIC_HDR(stg_END_MUT_LIST_closure,stg_END_MUT_LIST_info,0/*CC*/,,EI_)
, /*payload*/{} };
-INFO_TABLE(stg_MUT_CONS_info, stg_MUT_CONS_entry, 1, 1, MUT_CONS, , EF_, 0, 0);
+INFO_TABLE(stg_MUT_CONS_info, stg_MUT_CONS_entry, 1, 1, MUT_CONS, , EF_, "MUT_CONS", "MUT_CONS");
NON_ENTERABLE_ENTRY_CODE(MUT_CONS);
/* -----------------------------------------------------------------------------
Exception lists
-------------------------------------------------------------------------- */
-INFO_TABLE_CONSTR(stg_END_EXCEPTION_LIST_info,stg_END_EXCEPTION_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE_CONSTR(stg_END_EXCEPTION_LIST_info,stg_END_EXCEPTION_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_EXCEPTION_LIST","END_EXCEPTION_LIST");
NON_ENTERABLE_ENTRY_CODE(END_EXCEPTION_LIST);
SET_STATIC_HDR(stg_END_EXCEPTION_LIST_closure,stg_END_EXCEPTION_LIST_info,0/*CC*/,,EI_)
, /*payload*/{} };
-INFO_TABLE(stg_EXCEPTION_CONS_info, stg_EXCEPTION_CONS_entry, 1, 1, CONSTR, , EF_, 0, 0);
+INFO_TABLE(stg_EXCEPTION_CONS_info, stg_EXCEPTION_CONS_entry, 1, 1, CONSTR, , EF_, "EXCEPTION_CONS", "EXCEPTION_CONS");
NON_ENTERABLE_ENTRY_CODE(EXCEPTION_CONS);
/* -----------------------------------------------------------------------------
@@ -804,7 +834,7 @@ STGFUN(stg_error_entry) \
just enter the top stack word to start the thread. (see deleteThread)
* -------------------------------------------------------------------------- */
-INFO_TABLE(stg_dummy_ret_info, stg_dummy_ret_entry, 0, 0, CONSTR_NOCAF_STATIC, , EF_, 0, 0);
+INFO_TABLE(stg_dummy_ret_info, stg_dummy_ret_entry, 0, 0, CONSTR_NOCAF_STATIC, , EF_, "DUMMY_RET", "DUMMY_RET");
STGFUN(stg_dummy_ret_entry)
{
W_ ret_addr;
@@ -855,7 +885,7 @@ STGFUN(stg_forceIO_ret_entry)
}
#endif
-INFO_TABLE(stg_forceIO_info,stg_forceIO_entry,1,0,FUN_STATIC,,EF_,0,0);
+INFO_TABLE(stg_forceIO_info,stg_forceIO_entry,1,0,FUN_STATIC,,EF_,"FORCE_IO","FORCE_IO");
FN_(stg_forceIO_entry)
{
FB_
diff --git a/ghc/rts/StgStartup.hc b/ghc/rts/StgStartup.hc
index 92dc7018db..5920e5ec08 100644
--- a/ghc/rts/StgStartup.hc
+++ b/ghc/rts/StgStartup.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: StgStartup.hc,v 1.16 2001/09/04 18:29:21 ken Exp $
+ * $Id: StgStartup.hc,v 1.17 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -45,8 +45,8 @@
EXTFUN(stg_stop_thread_entry);
-#ifdef PROFILING
-#define STOP_THREAD_BITMAP 1
+#if defined(PROFILING)
+#define STOP_THREAD_BITMAP 3
#else
#define STOP_THREAD_BITMAP 0
#endif
diff --git a/ghc/rts/StgStdThunks.hc b/ghc/rts/StgStdThunks.hc
index 9373dab228..c3d77ac59a 100644
--- a/ghc/rts/StgStdThunks.hc
+++ b/ghc/rts/StgStdThunks.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: StgStdThunks.hc,v 1.17 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: StgStdThunks.hc,v 1.18 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-2000
*
@@ -32,7 +32,7 @@
#define SAVE_CCCS(fs) CCS_HDR(Sp-fs)=CCCS
#define GET_SAVED_CCCS RESTORE_CCCS(CCS_HDR(Sp))
#define ENTER_CCS(p) ENTER_CCS_TCL(p)
-#define RET_BITMAP 1
+#define RET_BITMAP 3
#else
#define SAVE_CCCS(fs) /* empty */
#define GET_SAVED_CCCS /* empty */
@@ -58,6 +58,7 @@
FB_ \
STK_CHK_NP(UPD_FRAME_SIZE,1,); \
UPD_BH_UPDATABLE(&stg_sel_##offset##_upd_info); \
+ LDV_ENTER(R1.cl); \
PUSH_UPD_FRAME(R1.p,0); \
ENTER_CCS(R1.p); \
SAVE_CCCS(UPD_FRAME_SIZE); \
@@ -103,6 +104,7 @@ SELECTOR_CODE_UPD(15);
FB_ \
STK_CHK_NP(NOUPD_FRAME_SIZE,1,) \
UPD_BH_SINGLE_ENTRY(&stg_sel_##offset##_noupd_info); \
+ LDV_ENTER(R1.cl); \
ENTER_CCS(R1.p); \
SAVE_CCCS(NOUPD_FRAME_SIZE); \
Sp[-NOUPD_FRAME_SIZE]=(W_)&stg_sel_ret_##offset##_noupd_info; \
@@ -163,6 +165,7 @@ FN_(stg_ap_1_upd_entry) {
FB_
STK_CHK_NP(sizeofW(StgUpdateFrame),1,);
UPD_BH_UPDATABLE(&stg_ap_1_upd_info);
+ LDV_ENTER(R1.cl);
ENTER_CCS(R1.p);
PUSH_UPD_FRAME(R1.p,0);
R1.p=(P_)(R1.cl->payload[0]);
@@ -176,6 +179,7 @@ FN_(stg_ap_2_upd_entry) {
FB_
STK_CHK_NP(sizeofW(StgUpdateFrame)+1,1,);
UPD_BH_UPDATABLE(&stg_ap_2_upd_info);
+ LDV_ENTER(R1.cl);
ENTER_CCS(R1.p);
PUSH_UPD_FRAME(R1.p,0);
Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[1]);
@@ -190,6 +194,7 @@ FN_(stg_ap_3_upd_entry) {
FB_
STK_CHK_NP(sizeofW(StgUpdateFrame)+2,1,);
UPD_BH_UPDATABLE(&stg_ap_3_upd_info);
+ LDV_ENTER(R1.cl);
ENTER_CCS(R1.p);
PUSH_UPD_FRAME(R1.p,0);
Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[2]);
@@ -205,6 +210,7 @@ FN_(stg_ap_4_upd_entry) {
FB_
STK_CHK_NP(sizeofW(StgUpdateFrame)+3,1,);
UPD_BH_UPDATABLE(&stg_ap_4_upd_info);
+ LDV_ENTER(R1.cl);
ENTER_CCS(R1.p);
PUSH_UPD_FRAME(R1.p,0);
Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[3]);
@@ -221,6 +227,7 @@ FN_(stg_ap_5_upd_entry) {
FB_
STK_CHK_NP(sizeofW(StgUpdateFrame)+4,1,);
UPD_BH_UPDATABLE(&stg_ap_5_upd_info);
+ LDV_ENTER(R1.cl);
ENTER_CCS(R1.p);
PUSH_UPD_FRAME(R1.p,0);
Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[4]);
@@ -238,6 +245,7 @@ FN_(stg_ap_6_upd_entry) {
FB_
STK_CHK_NP(sizeofW(StgUpdateFrame)+5,1,);
UPD_BH_UPDATABLE(&stg_ap_6_upd_info);
+ LDV_ENTER(R1.cl);
ENTER_CCS(R1.p);
PUSH_UPD_FRAME(R1.p,0);
Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[5]);
@@ -256,6 +264,7 @@ FN_(stg_ap_7_upd_entry) {
FB_
STK_CHK_NP(sizeofW(StgUpdateFrame)+6,1,);
UPD_BH_UPDATABLE(&stg_ap_7_upd_info);
+ LDV_ENTER(R1.cl);
ENTER_CCS(R1.p);
PUSH_UPD_FRAME(R1.p,0);
Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[6]);
@@ -275,6 +284,7 @@ FN_(stg_ap_8_upd_entry) {
FB_
STK_CHK_NP(sizeofW(StgUpdateFrame)+7,1,);
UPD_BH_UPDATABLE(&stg_ap_8_upd_info);
+ LDV_ENTER(R1.cl);
ENTER_CCS(R1.p);
PUSH_UPD_FRAME(R1.p,0);
Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[7]);
diff --git a/ghc/rts/Storage.c b/ghc/rts/Storage.c
index 9080bf624d..ee8cfd85c6 100644
--- a/ghc/rts/Storage.c
+++ b/ghc/rts/Storage.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Storage.c,v 1.53 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: Storage.c,v 1.54 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -23,6 +23,8 @@
#include "Schedule.h"
#include "StoragePriv.h"
+#include "RetainerProfile.h" // for counting memory blocks (memInventory)
+
StgClosure *caf_list = NULL;
bdescr *small_alloc_list; /* allocate()d small objects */
@@ -63,23 +65,6 @@ initStorage( void )
step *stp;
generation *gen;
- /* If we're doing heap profiling, we want a two-space heap with a
- * fixed-size allocation area so that we get roughly even-spaced
- * samples.
- */
-
- /* As an experiment, try a 2 generation collector
- */
-
-#if defined(PROFILING) || defined(DEBUG)
- if (RtsFlags.ProfFlags.doHeapProfile) {
- RtsFlags.GcFlags.generations = 1;
- RtsFlags.GcFlags.steps = 1;
- RtsFlags.GcFlags.oldGenFactor = 0;
- RtsFlags.GcFlags.heapSizeSuggestion = 0;
- }
-#endif
-
if (RtsFlags.GcFlags.maxHeapSize != 0 &&
RtsFlags.GcFlags.heapSizeSuggestion >
RtsFlags.GcFlags.maxHeapSize) {
@@ -350,6 +335,20 @@ resetNurseries( void )
}
#else
for (bd = g0s0->blocks; bd; bd = bd->link) {
+#ifdef PROFILING
+ // @LDV profiling
+ // Reset every word in the nursery to zero when doing LDV profiling.
+ // This relieves the mutator of the burden of zeroing every new closure,
+ // which is stored in the nursery.
+ //
+ // Todo: make it more efficient, e.g. memcpy()
+ //
+ StgPtr p;
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+ for (p = bd->start; p < bd->start + BLOCK_SIZE_W; p++)
+ *p = 0;
+ }
+#endif
bd->free = bd->start;
ASSERT(bd->gen_no == 0);
ASSERT(bd->step == g0s0);
@@ -370,6 +369,12 @@ allocNursery (bdescr *tail, nat blocks)
// cons them on to the front of the list, not forgetting to update
// the back pointer on the tail of the list to point to the new block.
for (i=0; i < blocks; i++) {
+ // @LDV profiling
+ /*
+ processNursery() in LdvProfile.c assumes that every block group in
+ the nursery contains only a single block. So, if a block group is
+ given multiple blocks, change processNursery() accordingly.
+ */
bd = allocBlock();
bd->link = tail;
// double-link the nursery: we might need to insert blocks
@@ -786,7 +791,14 @@ memInventory(void)
for (bd = large_alloc_list; bd; bd = bd->link) {
total_blocks += bd->blocks;
}
-
+
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
+ for (bd = firstStack; bd != NULL; bd = bd->link)
+ total_blocks += bd->blocks;
+ }
+#endif
+
// count the blocks allocated by the arena allocator
total_blocks += arenaBlocks();
diff --git a/ghc/rts/Storage.h b/ghc/rts/Storage.h
index 1156dd4da4..f746d1f0ca 100644
--- a/ghc/rts/Storage.h
+++ b/ghc/rts/Storage.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Storage.h,v 1.36 2001/08/08 10:50:37 simonmar Exp $
+ * $Id: Storage.h,v 1.37 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -13,6 +13,9 @@
#include "Block.h"
#include "BlockAlloc.h"
#include "StoragePriv.h"
+#ifdef PROFILING
+#include "LdvProfile.h"
+#endif
/* -----------------------------------------------------------------------------
Initialisation / De-initialisation
@@ -152,7 +155,10 @@ recordOldToNewPtrs(StgMutClosure *p)
}
}
-#ifndef DEBUG
+// @LDV profiling
+// We zero out the slop when PROFILING is on.
+// #ifndef DEBUG
+#if !defined(DEBUG) && !defined(PROFILING)
#define updateWithIndirection(info, p1, p2) \
{ \
bdescr *bd; \
@@ -174,6 +180,41 @@ recordOldToNewPtrs(StgMutClosure *p)
TICK_UPD_OLD_IND(); \
} \
}
+#elif defined(PROFILING)
+// @LDV profiling
+// We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in
+// which p1 resides.
+//
+// Note:
+// After all, we do *NOT* need to call LDV_recordCreate() for both IND and
+// IND_OLDGEN closures because they are inherently used. But, it corrupts
+// the invariants that every closure keeps its creation time in the profiling
+// field. So, we call LDV_recordCreate().
+
+#define updateWithIndirection(info, p1, p2) \
+ { \
+ bdescr *bd; \
+ \
+ LDV_recordDead_FILL_SLOP_DYNAMIC((p1)); \
+ bd = Bdescr((P_)p1); \
+ if (bd->gen_no == 0) { \
+ ((StgInd *)p1)->indirectee = p2; \
+ SET_INFO(p1,&stg_IND_info); \
+ LDV_recordCreate((p1)); \
+ TICK_UPD_NEW_IND(); \
+ } else { \
+ ((StgIndOldGen *)p1)->indirectee = p2; \
+ if (info != &stg_BLACKHOLE_BQ_info) { \
+ ACQUIRE_LOCK(&sm_mutex); \
+ ((StgIndOldGen *)p1)->mut_link = generations[bd->gen_no].mut_once_list; \
+ generations[bd->gen_no].mut_once_list = (StgMutClosure *)p1; \
+ RELEASE_LOCK(&sm_mutex); \
+ } \
+ SET_INFO(p1,&stg_IND_OLDGEN_info); \
+ LDV_recordCreate((p1)); \
+ } \
+ }
+
#else
/* In the DEBUG case, we also zero out the slop of the old closure,
@@ -242,10 +283,17 @@ updateWithPermIndirection(const StgInfoTable *info, StgClosure *p1, StgClosure *
bdescr *bd;
ASSERT( p1 != p2 && !closure_IND(p1) );
+
+ // @LDV profiling
+ // Destroy the old closure.
+ LDV_recordDead_FILL_SLOP_DYNAMIC(p1);
bd = Bdescr((P_)p1);
if (bd->gen_no == 0) {
((StgInd *)p1)->indirectee = p2;
SET_INFO(p1,&stg_IND_PERM_info);
+ // @LDV profiling
+ // We have just created a new closure.
+ LDV_recordCreate(p1);
TICK_UPD_NEW_PERM_IND(p1);
} else {
((StgIndOldGen *)p1)->indirectee = p2;
@@ -256,6 +304,9 @@ updateWithPermIndirection(const StgInfoTable *info, StgClosure *p1, StgClosure *
RELEASE_LOCK(&sm_mutex);
}
SET_INFO(p1,&stg_IND_OLDGEN_PERM_info);
+ // @LDV profiling
+ // We have just created a new closure.
+ LDV_recordCreate(p1);
TICK_UPD_OLD_PERM_IND();
}
}
diff --git a/ghc/rts/Updates.hc b/ghc/rts/Updates.hc
index 989ce2f90e..81f49de86f 100644
--- a/ghc/rts/Updates.hc
+++ b/ghc/rts/Updates.hc
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Updates.hc,v 1.35 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: Updates.hc,v 1.36 2001/11/22 14:25:12 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -117,8 +117,8 @@ UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_7_entry,RET_VEC(Sp[0],7));
return size!
*/
-#ifdef PROFILING
-#define UPD_FRAME_BITMAP 3
+#if defined(PROFILING)
+#define UPD_FRAME_BITMAP 7
#else
#define UPD_FRAME_BITMAP 1
#endif
@@ -207,6 +207,7 @@ STGFUN(stg_PAP_entry)
Sp -= Words;
TICK_ENT_PAP(pap);
+ LDV_ENTER(pap);
/* Enter PAP cost centre -- lexical scoping only */
ENTER_CCS_PAP_CL(pap);
@@ -286,6 +287,14 @@ EXTFUN(__stg_update_PAP)
* such as removing the update frame.
*/
if ((Hp += PapSize) > HpLim) {
+#ifdef PROFILING
+ // @LDV profiling
+ // Not filling the slop for the object (because there is none), but
+ // filling in the trailing words in the current block.
+ // This is unnecessary because we fills the entire nursery with
+ // zeroes after each garbage collection.
+ // FILL_SLOP(HpLim, PapSize - (Hp - HpLim));
+#endif
Sp -= 1;
Sp[0] = (W_)Fun;
JMP_(stg_gc_entertop);
@@ -351,7 +360,7 @@ EXTFUN(__stg_update_PAP)
Updatee = Su->updatee;
-#if defined(PROFILING)
+#if defined(PROFILING)
if (Words != 0) {
UPD_IND(Updatee,PapClosure);
TICK_UPD_PAP_IN_NEW(Words+1);
@@ -436,6 +445,7 @@ STGFUN(stg_AP_UPD_entry)
Sp -= sizeofW(StgUpdateFrame) + Words;
TICK_ENT_AP_UPD(ap);
+ LDV_ENTER(ap);
/* Enter PAP cost centre -- lexical scoping only */
ENTER_CCS_PAP_CL(ap); /* ToDo: ENTER_CC_AP_UPD_CL */
diff --git a/ghc/rts/Weak.c b/ghc/rts/Weak.c
index 1c03e69cdc..c80cf8c3c1 100644
--- a/ghc/rts/Weak.c
+++ b/ghc/rts/Weak.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: Weak.c,v 1.18 2001/08/14 13:40:09 sewardj Exp $
+ * $Id: Weak.c,v 1.19 2001/11/22 14:25:13 simonmar Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -36,6 +36,15 @@ finalizeWeakPointersNow(void)
while ((w = weak_ptr_list)) {
weak_ptr_list = w->link;
if (w->header.info != &stg_DEAD_WEAK_info) {
+ // @LDV profiling
+ // Even thought the info type of w changes, we DO NOT perform any
+ // LDV profiling because at this moment, LDV profiling must already
+ // have been terminated. See the comments in shutdownHaskell().
+ // At any rate, there is no need to call LDV_recordDead() because
+ // weak pointers are inherently used.
+#ifdef PROFILING
+ ASSERT(ldvTime == 0); // LDV profiling is turned off.
+#endif
w->header.info = &stg_DEAD_WEAK_info;
IF_DEBUG(weak,fprintf(stderr,"Finalising weak pointer at %p -> %p\n", w, w->key));
if (w->finalizer != &stg_NO_FINALIZER_closure) {
@@ -85,7 +94,17 @@ scheduleFinalizers(StgWeak *list)
arr->payload[n] = w->finalizer;
n++;
}
- w->header.info = &stg_DEAD_WEAK_info;
+
+#ifdef PROFILING
+ // A weak pointer is inherently used, so we do not need to call
+ // LDV_recordDead().
+ //
+ // Furthermore, when PROFILING is turned on, dead weak
+ // pointers are exactly as large as weak pointers, so there is
+ // no need to fill the slop, either. See stg_DEAD_WEAK_info
+ // in StgMiscClosures.hc.
+#endif
+ SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs);
}
t = createIOThread(RtsFlags.GcFlags.initialStkSize,