diff options
39 files changed, 5198 insertions, 417 deletions
diff --git a/ghc/includes/ClosureMacros.h b/ghc/includes/ClosureMacros.h index 0690981adf..70470e93b5 100644 --- a/ghc/includes/ClosureMacros.h +++ b/ghc/includes/ClosureMacros.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: ClosureMacros.h,v 1.32 2001/02/06 11:41:04 rrt Exp $ + * $Id: ClosureMacros.h,v 1.33 2001/11/22 14:25:11 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -79,8 +79,39 @@ static __inline__ StgFunPtr get_entry(const StgInfoTable *itbl) { -------------------------------------------------------------------------- */ #ifdef PROFILING -#define SET_PROF_HDR(c,ccs_) (c)->header.prof.ccs = ccs_ -#define SET_STATIC_PROF_HDR(ccs_) prof : { ccs : ccs_ }, +#ifdef DEBUG_RETAINER +/* + For the sake of debugging, we take the safest way for the moment. Actually, this + is useful to check the sanity of heap before beginning retainer profiling. + flip is defined in RetainerProfile.c, and declared as extern in RetainerProfile.h. + Note: change those functions building Haskell objects from C datatypes, i.e., + all rts_mk???() functions in RtsAPI.c, as well. + */ +extern StgWord flip; +#define SET_PROF_HDR(c,ccs_) \ + ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = (retainerSet *)((StgWord)NULL | flip)) +#else +/* + For retainer profiling only: we do not have to set (c)->header.prof.hp.rs to + NULL | flip (flip is defined in RetainerProfile.c) because even when flip + is 1, rs is invalid and will be initialized to NULL | flip later when + the closure *c is visited. + */ +/* +#define SET_PROF_HDR(c,ccs_) \ + ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = NULL) + */ +/* + The following macro works for both retainer profiling and LDV profiling: + for retainer profiling, ldvTime remains 0, so rs fields are initialized to 0. + See the invariants on ldvTime. + */ +#define SET_PROF_HDR(c,ccs_) \ + ((c)->header.prof.ccs = ccs_, \ + LDV_recordCreate((c))) +#endif // DEBUG_RETAINER +#define SET_STATIC_PROF_HDR(ccs_) \ + prof : { ccs : ccs_, hp : { rs : NULL } }, #else #define SET_PROF_HDR(c,ccs) #define SET_STATIC_PROF_HDR(ccs) @@ -109,6 +140,7 @@ static __inline__ StgFunPtr get_entry(const StgInfoTable *itbl) { #define SET_TICKY_HDR(c,stuff) #define SET_STATIC_TICKY_HDR(stuff) #endif + #define SET_HDR(c,info,ccs) \ { \ SET_INFO(c,info); \ diff --git a/ghc/includes/Closures.h b/ghc/includes/Closures.h index 5f0b570954..0f413b58a1 100644 --- a/ghc/includes/Closures.h +++ b/ghc/includes/Closures.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: Closures.h,v 1.28 2001/10/03 13:57:42 simonmar Exp $ + * $Id: Closures.h,v 1.29 2001/11/22 14:25:11 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -20,7 +20,11 @@ -------------------------------------------------------------------------- */ typedef struct { - CostCentreStack *ccs; + CostCentreStack *ccs; + union { + RetainerSet *rs; // Retainer Set + StgWord ldvw; // Lag/Drag/Void Word + } hp; } StgProfHeader; /* ----------------------------------------------------------------------------- diff --git a/ghc/includes/Stg.h b/ghc/includes/Stg.h index 9fc9c77317..9d302a7d27 100644 --- a/ghc/includes/Stg.h +++ b/ghc/includes/Stg.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Stg.h,v 1.39 2001/10/27 21:44:54 sof Exp $ + * $Id: Stg.h,v 1.40 2001/11/22 14:25:11 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -150,6 +150,8 @@ typedef StgWord64 LW_; /* Profiling information */ #include "StgProf.h" +#include "StgRetainerProf.h" +#include "StgLdvProf.h" /* Storage format definitions */ #include "Closures.h" diff --git a/ghc/includes/StgLdvProf.h b/ghc/includes/StgLdvProf.h new file mode 100644 index 0000000000..7ece7316da --- /dev/null +++ b/ghc/includes/StgLdvProf.h @@ -0,0 +1,132 @@ +/* ----------------------------------------------------------------------------- + * $Id: StgLdvProf.h,v 1.1 2001/11/22 14:25:11 simonmar Exp $ + * + * (c) The GHC Team, 2001 + * Author: Sungwoo Park + * + * Lag/Drag/Void profiling. + * + * ---------------------------------------------------------------------------*/ + +#ifndef STGLDVPROF_H +#define STGLDVPROF_H + +#ifdef PROFILING + +// Engine + +// declared in LdvProfile.c +extern nat ldvTime; + +// LdvGenInfo stores the statistics for one specific census. +typedef struct { + double time; // the time in MUT time at the corresponding census is made + + // We employ int instead of nat, for some values may be negative temporarily, + // e.g., dragNew. + + // computed at each census + int inherentlyUsed; // total size of 'inherently used' closures + int notUsed; // total size of 'never used' closures + int used; // total size of 'used at least once' closures + + /* + voidNew and dragNew are updated when a closure is destroyed. + For instance, when a 'never used' closure of size s and creation time + t is destroyed at time u, voidNew of eras t through u - 1 is increased + by s. + Likewise, when a 'used at least once' closure of size s and last use time + t is destroyed at time u, dragNew of eras t + 1 through u - 1 is increase + by s. + In our implementation, voidNew and dragNew are computed indirectly: instead + of updating voidNew or dragNew of all intervening eras, we update that + of the end two eras (one is increased and the other is decreased). + */ + int voidNew; // current total size of 'destroyed without being used' closures + int dragNew; // current total size of 'used at least once and waiting to die' + // closures + + // computed post-mortem + int voidTotal; // total size of closures in 'void' state + // lagTotal == notUsed - voidTotal // in 'lag' state + int dragTotal; // total size of closures in 'drag' state + // useTotal == used - dragTotal // in 'use' state +} LdvGenInfo; + +extern LdvGenInfo *gi; + +// retrieves the LDV word from closure c +#define LDVW(c) (((StgClosure *)(c))->header.prof.hp.ldvw) + +/* + An LDV word is divided into 3 parts: state bits (LDV_STATE_MASK), creation + time bits (LDV_CREATE_MASK), and last use time bits (LDV_LAST_MASK). + */ +#if SIZEOF_VOID_P == 8 +#define LDV_SHIFT 30 +#define LDV_STATE_MASK 0x1000000000000000 +#define LDV_CREATE_MASK 0x0FFFFFFFC0000000 +#define LDV_LAST_MASK 0x000000003FFFFFFF +#define LDV_STATE_CREATE 0x0000000000000000 +#define LDV_STATE_USE 0x1000000000000000 +#else +#define LDV_SHIFT 15 +#define LDV_STATE_MASK 0x40000000 +#define LDV_CREATE_MASK 0x3FFF8000 +#define LDV_LAST_MASK 0x00007FFF +#define LDV_STATE_CREATE 0x00000000 +#define LDV_STATE_USE 0x40000000 +#endif // SIZEOF_VOID_P + +// Stores the creation time for closure c. +// This macro is called at the very moment of closure creation. +// +// NOTE: this initializes LDVW(c) to zero, which ensures that there +// is no conflict between retainer profiling and LDV profiling, +// because retainer profiling also expects LDVW(c) to be initialised +// to zero. +#define LDV_recordCreate(c) \ + LDVW((c)) = (ldvTime << LDV_SHIFT) | LDV_STATE_CREATE + +// Stores the last use time for closure c. +// This macro *must* be called whenever a closure is used, that is, it is +// entered. +#define LDV_recordUse(c) \ + { \ + if (ldvTime > 0) \ + LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | \ + ldvTime | \ + LDV_STATE_USE; \ + } + +// Creates a 0-filled slop of size 'howManyBackwards' backwards from the +// address 'from'. +// +// Invoked when: +// 1) Hp is incremented and exceeds HpLim (in Updates.hc). +// 2) copypart() is called (in GC.c). +#define FILL_SLOP(from, howManyBackwards) \ + if (ldvTime > 0) { \ + int i; \ + for (i = 0;i < (howManyBackwards); i++) \ + ((StgWord *)(from))[-i] = 0; \ + } + +// Informs the LDV profiler that closure c has just been evacuated. +// Evacuated objects are no longer needed, so we just store its original size in +// the LDV field. +#define SET_EVACUAEE_FOR_LDV(c, size) \ + LDVW((c)) = (size) + +// Macros called when a closure is entered. +// The closure is not an 'inherently used' one. +// The closure is not IND or IND_OLDGEN because neither is considered for LDV +// profiling. +#define LDV_ENTER(c) LDV_recordUse((c)) + +#else // !PROFILING + +#define LDV_ENTER(c) + +#endif // PROFILING +#endif // STGLDVPROF_H diff --git a/ghc/includes/StgMacros.h b/ghc/includes/StgMacros.h index da3a42548b..6bd5887008 100644 --- a/ghc/includes/StgMacros.h +++ b/ghc/includes/StgMacros.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgMacros.h,v 1.41 2001/11/08 16:37:54 simonmar Exp $ + * $Id: StgMacros.h,v 1.42 2001/11/22 14:25:11 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -144,7 +144,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } tag_assts \ (r) = (P_)ret; \ JMP_(stg_chk_##layout); \ - } + } #define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \ DO_GRAN_ALLOCATE(hp_headroom) \ @@ -153,7 +153,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } tag_assts \ (r) = (P_)ret; \ JMP_(stg_chk_##layout); \ - } + } /* ----------------------------------------------------------------------------- A Heap Check in a case alternative are much simpler: everything is @@ -186,7 +186,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } HpAlloc = (headroom); \ tag_assts \ JMP_(stg_gc_enter_##ptrs); \ - } + } #define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts) \ DO_GRAN_ALLOCATE(headroom) \ @@ -194,7 +194,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } HpAlloc = (headroom); \ tag_assts \ JMP_(stg_gc_seq_##ptrs); \ - } + } #define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \ DO_GRAN_ALLOCATE(hp_headroom) \ @@ -202,7 +202,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } HpAlloc = (hp_headroom); \ tag_assts \ JMP_(stg_gc_enter_##ptrs); \ - } + } /* Heap checks for branches of a primitive case / unboxed tuple return */ @@ -214,7 +214,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } HpAlloc = (headroom); \ tag_assts \ JMP_(lbl); \ - } + } #define HP_CHK_NOREGS(headroom,tag_assts) \ GEN_HP_CHK_ALT(headroom,stg_gc_noregs,tag_assts); @@ -298,7 +298,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } R9.w = (W_)LIVENESS_MASK(liveness); \ R10.w = (W_)reentry; \ JMP_(stg_gen_chk); \ - } + } #define HP_CHK_GEN_TICKY(headroom,liveness,reentry,tag_assts) \ HP_CHK_GEN(headroom,liveness,reentry,tag_assts); \ @@ -435,12 +435,29 @@ EXTINFO_RTS(stg_gen_chk_info); } \ SET_INFO(R1.cl,&stg_BLACKHOLE_info) # else +# ifndef PROFILING # define UPD_BH_UPDATABLE(info) \ TICK_UPD_BH_UPDATABLE(); \ SET_INFO(R1.cl,&stg_BLACKHOLE_info) # define UPD_BH_SINGLE_ENTRY(info) \ TICK_UPD_BH_SINGLE_ENTRY(); \ SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info) +# else +// An object is replaced by a blackhole, so we fill the slop with zeros. +// +// Todo: maybe use SET_HDR() and remove LDV_recordCreate()? +// +# define UPD_BH_UPDATABLE(info) \ + TICK_UPD_BH_UPDATABLE(); \ + LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl); \ + SET_INFO(R1.cl,&stg_BLACKHOLE_info); \ + LDV_recordCreate(R1.cl) +# define UPD_BH_SINGLE_ENTRY(info) \ + TICK_UPD_BH_SINGLE_ENTRY(); \ + LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl); \ + SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info) \ + LDV_recordCreate(R1.cl) +# endif /* PROFILING */ # endif #else /* !EAGER_BLACKHOLING */ # define UPD_BH_UPDATABLE(thunk) /* nothing */ diff --git a/ghc/includes/StgProf.h b/ghc/includes/StgProf.h index 2c89d94a0e..825c8461f1 100644 --- a/ghc/includes/StgProf.h +++ b/ghc/includes/StgProf.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgProf.h,v 1.13 2001/10/18 13:46:47 simonmar Exp $ + * $Id: StgProf.h,v 1.14 2001/11/22 14:25:11 simonmar Exp $ * * (c) The GHC Team, 1998 * @@ -349,9 +349,6 @@ extern CostCentreStack *CCS_LIST; /* registered CCS list */ #define ENTER_CCS_PAP_CL(closure) \ ENTER_CCS_PAP((closure)->header.prof.ccs) - /* temp EW */ -#define STATIC_CCS_REF(ccs) (ccs) - /* ----------------------------------------------------------------------------- When not profiling, these macros do nothing... -------------------------------------------------------------------------- */ diff --git a/ghc/includes/StgRetainerProf.h b/ghc/includes/StgRetainerProf.h new file mode 100644 index 0000000000..2b77772a1b --- /dev/null +++ b/ghc/includes/StgRetainerProf.h @@ -0,0 +1,75 @@ +/* ----------------------------------------------------------------------------- + * $Id: StgRetainerProf.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $ + * + * (c) The GHC Team, 2001 + * + * Retainer profiling + * ---------------------------------------------------------------------------*/ + +#ifndef STGRETAINERPROF_H +#define STGRETAINERPROF_H + +/* + Type 'retainer' defines the retainer identity. + + Invariant: + 1. The retainer identity of a given retainer cannot change during + program execution, no matter where it is actually stored. + For instance, the memory address of a retainer cannot be used as + its retainer identity because its location may change during garbage + collections. + 2. Type 'retainer' must come with comparison operations as well as + an equality operation. That it, <, >, and == must be supported - + this is necessary to store retainers in a sorted order in retainer sets. + Therefore, you cannot use a huge structure type as 'retainer', for instance. + + We illustrate three possibilities of defining 'retainer identity'. + Choose one of the following three compiler directives: + + Retainer scheme 1 (RETAINER_SCHEME_INFO) : retainer = info table + Retainer scheme 2 (RETAINER_SCHEME_CCS) : retainer = cost centre stack + Retainer scheme 3 (RETAINER_SCHEME_CC) : retainer = cost centre +*/ + +// #define RETAINER_SCHEME_INFO +#define RETAINER_SCHEME_CCS +// #define RETAINER_SCHEME_CC + +#ifdef RETAINER_SCHEME_INFO +struct _StgInfoTable; +typedef struct _StgInfoTable *retainer; +#endif + +#ifdef RETAINER_SCHEME_CCS +typedef CostCentreStack *retainer; +#endif + +#ifdef RETAINER_SCHEME_CC +typedef CostCentre *retainer; +#endif + +/* + Type 'retainerSet' defines an abstract datatype for sets of retainers. + + Invariants: + A retainer set stores its elements in increasing order (in element[] array). + */ + +typedef struct _RetainerSet { + nat num; // number of elements + nat cost; // cost associated with this retainer set + StgWord hashKey; // hash key for this retainer set + struct _RetainerSet *link; // link to the next retainer set in the bucket + int id; // unique id of this retainer set (used when printing) + // Its absolute value is interpreted as its true id; if id is + // negative, it indicates that this retainer set has had a postive + // cost after some retainer profiling. + retainer element[0]; // elements of this retainer set + // do not put anything below here! +} RetainerSet; + +// +// retainerSet - interface: see rts/RetainerSet.h +// + +#endif /* STGRETAINERPROF_H */ diff --git a/ghc/includes/Updates.h b/ghc/includes/Updates.h index d20332433f..3b2461b39b 100644 --- a/ghc/includes/Updates.h +++ b/ghc/includes/Updates.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Updates.h,v 1.25 2001/11/08 12:46:31 simonmar Exp $ + * $Id: Updates.h,v 1.26 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -178,7 +178,9 @@ extern void awakenBlockedQueue(StgTSO *q); ------------------------------------------------------------------------- */ #if defined(PROFILING) -#define PUSH_STD_CCCS(frame) frame->header.prof.ccs = CCCS +// frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) is unnecessary +// because it is not used anyhow. +#define PUSH_STD_CCCS(frame) (frame->header.prof.ccs = CCCS) #else #define PUSH_STD_CCCS(frame) #endif diff --git a/ghc/rts/Exception.hc b/ghc/rts/Exception.hc index f7b58878ce..8cb24e9b75 100644 --- a/ghc/rts/Exception.hc +++ b/ghc/rts/Exception.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Exception.hc,v 1.21 2001/08/17 14:44:54 simonmar Exp $ + * $Id: Exception.hc,v 1.22 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -260,8 +260,8 @@ CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_5_entry,RET_VEC(Sp[SP_OFF],5)); CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_6_entry,RET_VEC(Sp[SP_OFF],6)); CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_7_entry,RET_VEC(Sp[SP_OFF],7)); -#ifdef PROFILING -#define CATCH_FRAME_BITMAP 7 +#if defined(PROFILING) +#define CATCH_FRAME_BITMAP 15 #else #define CATCH_FRAME_BITMAP 3 #endif @@ -355,7 +355,7 @@ FN_(raisezh_fast) * the info was only displayed for an *uncaught* exception. */ if (RtsFlags.ProfFlags.showCCSOnException) { - STGCALL2(print_ccs,stderr,CCCS); + STGCALL2(fprintCCS,stderr,CCCS); } #endif @@ -365,8 +365,18 @@ FN_(raisezh_fast) * is the exception raise. It is used to overwrite all the * thunks which are currently under evaluataion. */ + /* + // @LDV profiling + // stg_raise_info has THUNK as its closure type. Since a THUNK takes at least + // MIN_UPD_SIZE words in its payload, MIN_UPD_SIZE is more approprate than 1. + // It seems that 1 does not cause any problem unless profiling is performed. + // However, when LDV profiling goes on, we need to linearly scan small object pool, + // where raise_closure is stored, so we should use MIN_UPD_SIZE. raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate, sizeofW(StgClosure)+1); + */ + raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate, + sizeofW(StgClosure)+MIN_UPD_SIZE); SET_HDR(raise_closure, &stg_raise_info, CCCS); raise_closure->payload[0] = R1.cl; diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index 3ecde2b5eb..8a9e2ace2a 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: GC.c,v 1.126 2001/11/08 12:46:31 simonmar Exp $ + * $Id: GC.c,v 1.127 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team 1998-1999 * @@ -42,6 +42,9 @@ #include "FrontPanel.h" #endif +#include "RetainerProfile.h" +#include "LdvProfile.h" + /* STATIC OBJECT LIST. * * During GC: @@ -602,6 +605,13 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) } } +#ifdef PROFILING + // We call processHeapClosureForDead() on every closure destroyed during + // the current garbage collection, so we invoke LdvCensusForDead(). + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) + LdvCensusForDead(N); +#endif + // NO MORE EVACUATION AFTER THIS POINT! // Finally: compaction of the oldest generation. if (major_gc && oldest_gen->steps[0].is_compacted) { @@ -933,6 +943,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) if (major_gc) { gcCAFs(); } #endif +#ifdef PROFILING + // resetStaticObjectForRetainerProfiling() must be called before + // zeroing below. + resetStaticObjectForRetainerProfiling(); +#endif + // zero the scavenged static object list if (major_gc) { zero_static_object_list(scavenged_static_objects); @@ -963,7 +979,6 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // restore enclosing cost centre #ifdef PROFILING - heapCensus(); CCCS = prev_CCS; #endif @@ -1271,6 +1286,10 @@ static __inline__ StgClosure * copy(StgClosure *src, nat size, step *stp) { P_ to, from, dest; +#ifdef PROFILING + // @LDV profiling + nat size_org = size; +#endif TICK_GC_WORDS_COPIED(size); /* Find out where we're going, using the handy "to" pointer in @@ -1300,6 +1319,12 @@ copy(StgClosure *src, nat size, step *stp) dest = stp->hp; stp->hp = to; upd_evacuee(src,(StgClosure *)dest); +#ifdef PROFILING + // @LDV profiling + // We store the size of the just evacuated object in the LDV word so that + // the profiler can guess the position of the next object later. + SET_EVACUAEE_FOR_LDV(src, size_org); +#endif return (StgClosure *)dest; } @@ -1309,10 +1334,14 @@ copy(StgClosure *src, nat size, step *stp) */ -static __inline__ StgClosure * +static StgClosure * copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) { P_ dest, to, from; +#ifdef PROFILING + // @LDV profiling + nat size_to_copy_org = size_to_copy; +#endif TICK_GC_WORDS_COPIED(size_to_copy); if (stp->gen_no < evac_gen) { @@ -1334,6 +1363,17 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) dest = stp->hp; stp->hp += size_to_reserve; upd_evacuee(src,(StgClosure *)dest); +#ifdef PROFILING + // @LDV profiling + // We store the size of the just evacuated object in the LDV word so that + // the profiler can guess the position of the next object later. + // size_to_copy_org is wrong because the closure already occupies size_to_reserve + // words. + SET_EVACUAEE_FOR_LDV(src, size_to_reserve); + // fill the slop + if (size_to_reserve - size_to_copy_org > 0) + FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); +#endif return (StgClosure *)dest; } @@ -2162,9 +2202,23 @@ scavenge(step *stp) } case IND_PERM: - if (stp->gen_no != 0) { - SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info); - } + if (stp->gen->no != 0) { +#ifdef PROFILING + // @LDV profiling + // No need to call LDV_recordDead_FILL_SLOP_DYNAMIC() because an + // IND_OLDGEN_PERM closure is larger than an IND_PERM closure. + LDV_recordDead((StgClosure *)p, sizeofW(StgInd)); +#endif + // + // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? + // + SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info); +#ifdef PROFILING + // @LDV profiling + // We pretend that p has just been created. + LDV_recordCreate((StgClosure *)p); +#endif + } // fall through case IND_OLDGEN_PERM: ((StgIndOldGen *)p)->indirectee = @@ -3590,7 +3644,17 @@ threadLazyBlackHole(StgTSO *tso) #if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) belch("Unexpected lazy BHing required at 0x%04x",(int)bh); #endif +#ifdef PROFILING + // @LDV profiling + // We pretend that bh is now dead. + LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); +#endif SET_INFO(bh,&stg_BLACKHOLE_info); +#ifdef PROFILING + // @LDV profiling + // We pretend that bh has just been created. + LDV_recordCreate(bh); +#endif } update_frame = update_frame->link; @@ -3832,7 +3896,20 @@ threadSqueezeStack(StgTSO *tso) } } #endif +#ifdef PROFILING + // @LDV profiling + // We pretend that bh is now dead. + LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); +#endif + // + // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? + // SET_INFO(bh,&stg_BLACKHOLE_info); +#ifdef PROFILING + // @LDV profiling + // We pretend that bh has just been created. + LDV_recordCreate(bh); +#endif } } diff --git a/ghc/rts/HeapStackCheck.hc b/ghc/rts/HeapStackCheck.hc index 5fa5f100e8..52a998537b 100644 --- a/ghc/rts/HeapStackCheck.hc +++ b/ghc/rts/HeapStackCheck.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: HeapStackCheck.hc,v 1.18 2001/11/08 12:46:31 simonmar Exp $ + * $Id: HeapStackCheck.hc,v 1.19 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -47,7 +47,6 @@ * ThreadRunGHC thread. */ - #define GC_GENERIC \ if (Hp > HpLim) { \ Hp -= HpAlloc; \ diff --git a/ghc/rts/Itimer.c b/ghc/rts/Itimer.c index 2ec3ea97c8..d1821f63d0 100644 --- a/ghc/rts/Itimer.c +++ b/ghc/rts/Itimer.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Itimer.c,v 1.25 2001/11/21 20:55:10 sof Exp $ + * $Id: Itimer.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1995-1999 * @@ -142,6 +142,8 @@ initialize_virtual_timer(nat ms) } } + initProfTimer(); + return 0; } @@ -158,6 +160,10 @@ initialize_virtual_timer(nat ms) timestamp = getourtimeofday(); +#ifdef PROFILING + initProfTimer(); +#endif + it.it_value.tv_sec = ms / 1000; it.it_value.tv_usec = 1000 * (ms - (1000 * it.it_value.tv_sec)); it.it_interval = it.it_value; @@ -178,6 +184,8 @@ initialize_virtual_timer(nat ms) timestamp = getourtimeofday(); + initProfTimer(); + se.sigev_notify = SIGEV_SIGNAL; se.sigev_signo = SIGVTALRM; se.sigev_value.sival_int = SIGVTALRM; diff --git a/ghc/rts/Itimer.h b/ghc/rts/Itimer.h index f3a185ad18..9de549cddd 100644 --- a/ghc/rts/Itimer.h +++ b/ghc/rts/Itimer.h @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------------- - * $Id: Itimer.h,v 1.8 2001/11/21 20:55:10 sof Exp $ + * $Id: Itimer.h,v 1.9 2001/11/22 14:25:12 simonmar Exp $ * - * (c) The GHC Team 1998-1999 + * (c) The GHC Team 1998-2001 * * Interval timer for profiling and pre-emptive scheduling. * @@ -15,11 +15,6 @@ */ #define CS_MIN_MILLISECS TICK_MILLISECS /* milliseconds per slice */ -extern rtsBool do_prof_ticks; /* profiling ticks on/off */ - -/* Total number of ticks since startup */ -extern lnat total_ticks; - int initialize_virtual_timer ( nat ms ); int install_vtalrm_handler ( void ); void block_vtalrm_signal ( void ); diff --git a/ghc/rts/LdvProfile.c b/ghc/rts/LdvProfile.c new file mode 100644 index 0000000000..59a758ff6b --- /dev/null +++ b/ghc/rts/LdvProfile.c @@ -0,0 +1,857 @@ +/* ----------------------------------------------------------------------------- + * $Id: LdvProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $ + * + * (c) The GHC Team, 2001 + * Author: Sungwoo Park + * + * Lag/Drag/Void profiling. + * + * ---------------------------------------------------------------------------*/ + +#ifdef PROFILING + +#include "Stg.h" +#include "Rts.h" +#include "LdvProfile.h" +#include "RtsFlags.h" +#include "Itimer.h" +#include "Proftimer.h" +#include "Profiling.h" +#include "Stats.h" +#include "Storage.h" +#include "RtsUtils.h" +#include "Schedule.h" + +/* + ldvTime stores the current LDV time, that is, the current era. It + is one larger than the number of times LDV profiling has been + performed, i.e., + ldvTime - 1 == the number of time LDV profiling was executed + == the number of censuses made so far. + RESTRICTION: + ldvTime must be no longer than LDV_SHIFT (15 or 30) bits. + Invariants: + LDV profiling is turned off if ldvTime is 0. + LDV profiling is turned on if ldvTime is > 0. + ldvTime is initialized to 1 in initLdvProfiling(). + If LDV profiling is not performed, ldvTime must remain 0 (e.g., when we + are doing retainer profiling). + ldvTime is set to 1 in initLdvProfiling(). + ldvTime is set back to 0 in shutdownHaskell(). + In the meanwhile, ldvTime increments. +*/ +nat ldvTime = 0; +# +// ldvTimeSave is set in LdvCensusKillAll(), and stores the final number of +// times that LDV profiling was proformed. +static nat ldvTimeSave; + +// gi[] stores the statistics obtained at each heap census. +// gi[0] is not used. See initLdvProfiling(). +LdvGenInfo *gi; + +#define giINCREMENT 32 // allocation unit for gi[] +static nat giLength; // current length of gi[] + +// giMax is initialized to 2^LDV_SHIFT in initLdvProfiling(). +// When ldvTime reaches giMax, the profiling stops because a closure can +// store only up to (giMax - 1) as its creation or last use time. +static nat giMax; + +/* -------------------------------------------------------------------------- + * Fills in the slop when a *dynamic* closure changes its type. + * First calls LDV_recordDead() to declare the closure is dead, and then + * fills in the slop. + * + * Invoked when: + * 1) blackholing, UPD_BH_UPDATABLE() and UPD_BH_SINGLE_ENTRY (in + * includes/StgMacros.h), threadLazyBlackHole() and + * threadSqueezeStack() (in GC.c). + * 2) updating with indirection closures, updateWithIndirection() + * and updateWithPermIndirection() (in Storage.h). + * + * LDV_recordDead_FILL_SLOP_DYNAMIC() is not called on 'inherently used' + * closures such as TSO. It is not called on PAP because PAP is not updatable. + * ----------------------------------------------------------------------- */ +void +LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p ) +{ + if (ldvTime > 0) { + StgInfoTable *inf = get_itbl((p)); + nat nw, i; + switch (inf->type) { + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_SELECTOR: + nw = MIN_UPD_SIZE; + break; + case THUNK: + nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs; + if (nw < MIN_UPD_SIZE) + nw = MIN_UPD_SIZE; + break; + case AP_UPD: + nw = sizeofW(StgPAP) - sizeofW(StgHeader) + ((StgPAP *)p)->n_args; + break; + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs; + break; + default: + barf("Unexpected closure type %u in LDV_recordDead_FILL_SLOP_DYNAMIC()", inf->type); + break; + } + LDV_recordDead((StgClosure *)(p), nw + sizeofW(StgHeader)); + for (i = 0; i < nw; i++) { + ((StgClosure *)(p))->payload[i] = 0; + } + } +} + +/* -------------------------------------------------------------------------- + * Initialize gi[ldvTime]. + * ----------------------------------------------------------------------- */ +static inline void +giInitForCurrentEra(void) +{ + gi[ldvTime].notUsed = 0; + gi[ldvTime].inherentlyUsed = 0; + gi[ldvTime].used = 0; + + gi[ldvTime].voidNew = 0; + gi[ldvTime].dragNew = 0; +} + +/* -------------------------------------------------------------------------- + * Increases ldvTime by 1 and initialize gi[ldvTime]. + * Reallocates gi[] and increases its size if needed. + * ----------------------------------------------------------------------- */ +static void +incrementLdvTime( void ) +{ + ldvTime++; + + if (ldvTime == giMax) { + fprintf(stderr, + "Lag/Drag/Void profiling limit %u reached. " + "Please increase the profiling interval with -L option.\n", + giLength); + barf("Current profiling interval = %f seconds", + (float)RtsFlags.ProfFlags.profileInterval / 1000.0 ); + } + + if (ldvTime % giINCREMENT == 0) { + gi = stgReallocBytes(gi, sizeof(LdvGenInfo) * (giLength + giINCREMENT), + "incrementLdvTime"); + giLength += giINCREMENT; + } + + // What a stupid bug I struggled against for such a long time! I + // placed giInitForCurrentEra() before the above rellocation part, + // and it cost me three hours! + giInitForCurrentEra(); +} + +/* -------------------------------------------------------------------------- + * Initialization code for LDV profiling. + * ----------------------------------------------------------------------- */ +void +initLdvProfiling( void ) +{ + nat p; + + gi = stgMallocBytes(sizeof(LdvGenInfo) * giINCREMENT, "initLdvProfiling"); + giLength = giINCREMENT; + + ldvTime = 1; // turn on LDV profiling. + giInitForCurrentEra(); + + // giMax = 2^LDV_SHIFT + giMax = 1; + for (p = 0; p < LDV_SHIFT; p++) + giMax *= 2; +} + +/* -------------------------------------------------------------------------- + * This function must be called before f-closing prof_file. + * Still hp_file is open; see endHeapProfiling() in ProfHeap.c. + * ----------------------------------------------------------------------- */ +void +endLdvProfiling( void ) +{ + nat t; + int sumVoidNew, sumDragNew; + + // Now we compute voidTotal and dragTotal of each LdvGenInfo structure. + sumVoidNew = 0; + sumDragNew = 0; + for (t = 0; t < ldvTimeSave; t++) { + sumVoidNew += gi[t].voidNew; + sumDragNew += gi[t].dragNew; + gi[t].voidTotal = sumVoidNew; + gi[t].dragTotal = sumDragNew; + } + + // t = 0 is wrong (because ldvTime == 0 indicates LDV profiling is + // turned off. + for (t = 1;t < ldvTimeSave; t++) { + fprintf(hp_file, "MARK %f\n", gi[t].time); + fprintf(hp_file, "BEGIN_SAMPLE %f\n", gi[t].time); + fprintf(hp_file, "VOID\t%u\n", gi[t].voidTotal * sizeof(StgWord)); + fprintf(hp_file, "LAG\t%u\n", (gi[t].notUsed - gi[t].voidTotal) * sizeof(StgWord)); + fprintf(hp_file, "USE\t%u\n", (gi[t].used - gi[t].dragTotal) * sizeof(StgWord)); + fprintf(hp_file, "INHERENT_USE\t%u\n", gi[t].inherentlyUsed * sizeof(StgWord)); + fprintf(hp_file, "DRAG\t%u\n", gi[t].dragTotal * sizeof(StgWord)); + fprintf(hp_file, "END_SAMPLE %f\n", gi[t].time); + } +} + +/* -------------------------------------------------------------------------- + * Print the statistics. + * This function is called after each retainer profiling. + * ----------------------------------------------------------------------- */ +static void +outputLdvSet( void ) +{ +} + +/* -------------------------------------------------------------------------- + * This function is eventually called on every object in the heap + * during a census. Any census is initiated immediately after a major + * garbage collection, and we exploit this fact in the implementation. + * If c is an 'inherently used' closure, gi[ldvTime].inherentlyUsed is + * updated. If c is an ordinary closure, either gi[ldvTime].notUsed or + * gi[ldvTime].used is updated. + * ----------------------------------------------------------------------- */ +static inline nat +processHeapClosure(StgClosure *c) +{ + nat size; + StgInfoTable *info; + + info = get_itbl(c); + + ASSERT( + ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime && + ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0 + ); + ASSERT( + ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) || + ( + (LDVW(c) & LDV_LAST_MASK) <= ldvTime && + (LDVW(c) & LDV_LAST_MASK) > 0 + ) + ); + + switch (info->type) { + /* + 'inherently used' cases: add to gi[ldvTime].inherentlyUsed + */ + + case TSO: + size = tso_sizeW((StgTSO *)c); + goto inherently_used; + + case MVAR: + size = sizeofW(StgMVar); + goto inherently_used; + + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c); + goto inherently_used; + + case ARR_WORDS: + size = arr_words_sizeW((StgArrWords *)c); + goto inherently_used; + + case WEAK: + case MUT_VAR: + case MUT_CONS: + case FOREIGN: + case BCO: + case STABLE_NAME: + size = sizeW_fromITBL(info); + goto inherently_used; + + /* + ordinary cases: add to gi[ldvTime].notUsed if c is not being used. + add to gi[ldvTime].used if c is being used. + */ + case THUNK: + size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE); + break; + + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_SELECTOR: + size = sizeofW(StgHeader) + MIN_UPD_SIZE; + break; + + case AP_UPD: + case PAP: + size = pap_sizeW((StgPAP *)c); + break; + + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + + case FUN: + case FUN_1_0: + case FUN_0_1: + case FUN_2_0: + case FUN_1_1: + case FUN_0_2: + + case BLACKHOLE_BQ: + case BLACKHOLE: + case SE_BLACKHOLE: + case CAF_BLACKHOLE: + case SE_CAF_BLACKHOLE: + size = sizeW_fromITBL(info); + break; + + case IND_PERM: + size = sizeofW(StgInd); + break; + + case IND_OLDGEN_PERM: + size = sizeofW(StgIndOldGen); + break; + + /* + Error case + */ + case IND: // IND cannot appear after major GCs. + case IND_OLDGEN: // IND_OLDGEN cannot appear major GCs. + case EVACUATED: // EVACUATED is encountered only during GCs. + // static objects + case IND_STATIC: + case CONSTR_STATIC: + case FUN_STATIC: + case THUNK_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // stack objects + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case SEQ_FRAME: + case RET_DYN: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // others + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case INVALID_OBJECT: + default: + barf("Invalid object in processHeapClosure(): %d", info->type); + return 0; + } + + /* + ordinary cases: + We can compute either gi[ldvTime].notUsed or gi[ldvTime].used; the other + can be computed from the total sum of costs. + At the moment, we choose to compute gi[ldvTime].notUsed, which seems to + be smaller than gi[ldvTime].used. + */ + + // ignore closures that don't satisfy our constraints. + if (closureSatisfiesConstraints(c)) { + if ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) + gi[ldvTime].notUsed += size - sizeofW(StgProfHeader); + else + gi[ldvTime].used += size - sizeofW(StgProfHeader); + } + return size; + +inherently_used: + // ignore closures that don't satisfy our constraints. + if (closureSatisfiesConstraints(c)) { + gi[ldvTime].inherentlyUsed += size - sizeofW(StgProfHeader); + } + return size; +} + +/* -------------------------------------------------------------------------- + * Calls processHeapClosure() on every closure in the heap blocks + * begining at bd during a census. + * ----------------------------------------------------------------------- */ +static void +processHeap( bdescr *bd ) +{ + StgPtr p; + nat size; + + while (bd != NULL) { + p = bd->start; + while (p < bd->free) { + size = processHeapClosure((StgClosure *)p); + p += size; + while (p < bd->free && !*p) // skip slop + p++; + } + ASSERT(p == bd->free); + bd = bd->link; + } +} + +/* -------------------------------------------------------------------------- + * Calls processHeapClosure() on every closure in the small object pool + * during a census. + * ----------------------------------------------------------------------- */ +static void +processSmallObjectPool( void ) +{ + bdescr *bd; + StgPtr p; + nat size; + + bd = small_alloc_list; + + // first block + if (bd == NULL) + return; + + p = bd->start; + while (p < alloc_Hp) { + size = processHeapClosure((StgClosure *)p); + p += size; + while (p < alloc_Hp && !*p) // skip slop + p++; + } + ASSERT(p == alloc_Hp); + + bd = bd->link; + while (bd != NULL) { + p = bd->start; + while (p < bd->free) { + size = processHeapClosure((StgClosure *)p); + p += size; + while (p < bd->free && !*p) // skip slop + p++; + } + ASSERT(p == bd->free); + bd = bd->link; + } +} + +/* -------------------------------------------------------------------------- + * Calls processHeapClosure() on every (large) closure in the object + * chain beginning at bd during a census. + * ----------------------------------------------------------------------- */ +static void +processChain( bdescr *bd ) +{ + while (bd != NULL) { + // bd->free - bd->start is not an accurate measurement of the + // object size. Actually it is always zero, so we compute its + // size explicitly. + processHeapClosure((StgClosure *)bd->start); + bd = bd->link; + } +} + +/* -------------------------------------------------------------------------- + * Starts a census for LDV profiling. + * Invariants: + * Any call to LdvCensus() is preceded by a major garbage collection. + * ----------------------------------------------------------------------- */ +void +LdvCensus( void ) +{ + nat g, s; + + // ldvTime == 0 means that LDV profiling is currently turned off. + if (ldvTime == 0) + return; + + stat_startLDV(); + // + // Todo: when we perform LDV profiling, the Haskell mutator time seems to + // be affected by -S or -s runtime option. For instance, the + // following two options should result in nearly same + // profiling outputs, but the second run (without -Sstderr + // option) spends almost twice as long in the Haskell + // mutator as the first run: + // + // 1) +RTS -Sstderr -hL -RTS + // 2) +RTS -hL -RTS + // + // This is quite a subtle bug because this wierd phenomenon is not + // observed in retainer profiling, yet mut_user_time_during_LDV() is + // completely orthogonal to mut_user_time_during_RP(). However, the + // overall shapes of the resultant graphs are almost the same. + // + gi[ldvTime].time = mut_user_time_during_LDV(); + if (RtsFlags.GcFlags.generations == 1) { + // + // Todo: support LDV for two-space garbage collection. + // + barf("Lag/Drag/Void profiling not supported with -G1"); + } else { + for (g = 0; g < RtsFlags.GcFlags.generations; g++) + for (s = 0; s < generations[g].n_steps; s++) { + if (g == 0 && s == 0) { + // after a major GC, the nursery must be empty, + // and no need to call processNursery(). + ASSERT(MainCapability.r.rNursery->start == + MainCapability.r.rNursery->free); + processSmallObjectPool(); + processChain(generations[g].steps[s].large_objects); + } else{ + processHeap(generations[g].steps[s].blocks); + processChain(generations[g].steps[s].large_objects); + } + } + } + outputLdvSet(); // output to hp_file + stat_endLDV(); // output to prof_file + + incrementLdvTime(); +} + +/* -------------------------------------------------------------------------- + * This function is called eventually on every object destroyed during + * a garbage collection, whether it is a major garbage collection or + * not. If c is an 'inherently used' closure, nothing happens. If c + * is an ordinary closure, LDV_recordDead() is called on c with its + * proper size which excludes the profiling header portion in the + * closure. Returns the size of the closure, including the profiling + * header portion, so that the caller can find the next closure. + * ----------------------------------------------------------------------- */ +static inline nat +processHeapClosureForDead( StgClosure *c ) +{ + nat size; + StgInfoTable *info; + + info = get_itbl(c); + + if (info->type != EVACUATED) { + ASSERT(((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime && + ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0); + ASSERT(((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) || + ( + (LDVW(c) & LDV_LAST_MASK) <= ldvTime && + (LDVW(c) & LDV_LAST_MASK) > 0 + )); + } + + switch (info->type) { + /* + 'inherently used' cases: do nothing. + */ + + case TSO: + size = tso_sizeW((StgTSO *)c); + return size; + + case MVAR: + size = sizeofW(StgMVar); + return size; + + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c); + return size; + + case ARR_WORDS: + size = arr_words_sizeW((StgArrWords *)c); + return size; + + case WEAK: + case MUT_VAR: + case MUT_CONS: + case FOREIGN: + case BCO: + case STABLE_NAME: + size = sizeW_fromITBL(info); + return size; + + /* + ordinary cases: call LDV_recordDead(). + */ + + case THUNK: + size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE); + break; + + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_SELECTOR: + size = sizeofW(StgHeader) + MIN_UPD_SIZE; + break; + + case AP_UPD: + case PAP: + size = pap_sizeW((StgPAP *)c); + break; + + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + + case FUN: + case FUN_1_0: + case FUN_0_1: + case FUN_2_0: + case FUN_1_1: + case FUN_0_2: + + case BLACKHOLE_BQ: + case BLACKHOLE: + case SE_BLACKHOLE: + case CAF_BLACKHOLE: + case SE_CAF_BLACKHOLE: + size = sizeW_fromITBL(info); + break; + + case IND_PERM: + size = sizeofW(StgInd); + break; + + case IND_OLDGEN_PERM: + size = sizeofW(StgIndOldGen); + break; + + /* + 'Ingore' cases + */ + // Why can we ignore IND/IND_OLDGEN closures? We assume that + // any census is preceded by a major garbage collection, which + // IND/IND_OLDGEN closures cannot survive. Therefore, it is no + // use considering IND/IND_OLDGEN closures in the meanwhile + // because they will perish before the next census at any + // rate. + case IND: + size = sizeofW(StgInd); + return size; + + case IND_OLDGEN: + size = sizeofW(StgIndOldGen); + return size; + + case EVACUATED: + // The size of the evacuated closure is currently stored in + // the LDV field. See SET_EVACUAEE_FOR_LDV() in + // includes/StgLdvProf.h. + return LDVW(c); + + /* + Error case + */ + // static objects + case IND_STATIC: + case CONSTR_STATIC: + case FUN_STATIC: + case THUNK_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // stack objects + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case SEQ_FRAME: + case RET_DYN: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // others + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case INVALID_OBJECT: + default: + barf("Invalid object in processHeapClosureForDead(): %d", info->type); + return 0; + } + + // Found a dead closure: record its size + LDV_recordDead(c, size); + return size; +} + +/* -------------------------------------------------------------------------- + * Calls processHeapClosureForDead() on every *dead* closures in the + * heap blocks starting at bd. + * ----------------------------------------------------------------------- */ +static void +processHeapForDead( bdescr *bd ) +{ + StgPtr p; + + while (bd != NULL) { + p = bd->start; + while (p < bd->free) { + p += processHeapClosureForDead((StgClosure *)p); + while (p < bd->free && !*p) // skip slop + p++; + } + ASSERT(p == bd->free); + bd = bd->link; + } +} + +/* -------------------------------------------------------------------------- + * Calls processHeapClosureForDead() on every *dead* closures in the nursery. + * ----------------------------------------------------------------------- */ +static void +processNurseryForDead( void ) +{ + StgPtr p, bdLimit; + bdescr *bd; + + bd = MainCapability.r.rNursery; + while (bd->start < bd->free) { + p = bd->start; + bdLimit = bd->start + BLOCK_SIZE_W; + while (p < bd->free && p < bdLimit) { + p += processHeapClosureForDead((StgClosure *)p); + while (p < bd->free && p < bdLimit && !*p) // skip slop + p++; + } + bd = bd->link; + if (bd == NULL) + break; + } +} + +/* -------------------------------------------------------------------------- + * Calls processHeapClosureForDead() on every *dead* closures in the + * small object pool. + * ----------------------------------------------------------------------- */ +static void +processSmallObjectPoolForDead( void ) +{ + bdescr *bd; + StgPtr p; + + bd = small_alloc_list; + + // first block + if (bd == NULL) + return; + + p = bd->start; + while (p < alloc_Hp) { + p += processHeapClosureForDead((StgClosure *)p); + while (p < alloc_Hp && !*p) // skip slop + p++; + } + ASSERT(p == alloc_Hp); + + bd = bd->link; + while (bd != NULL) { + p = bd->start; + while (p < bd->free) { + p += processHeapClosureForDead((StgClosure *)p); + while (p < bd->free && !*p) // skip slop + p++; + } + ASSERT(p == bd->free); + bd = bd->link; + } +} + +/* -------------------------------------------------------------------------- + * Calls processHeapClosureForDead() on every *dead* closures in the closure + * chain. + * ----------------------------------------------------------------------- */ +static void +processChainForDead( bdescr *bd ) +{ + // Any object still in the chain is dead! + while (bd != NULL) { + processHeapClosureForDead((StgClosure *)bd->start); + bd = bd->link; + } +} + +/* -------------------------------------------------------------------------- + * Start a census for *dead* closures, and calls + * processHeapClosureForDead() on every closure which died in the + * current garbage collection. This function is called from a garbage + * collector right before tidying up, when all dead closures are still + * stored in the heap and easy to identify. Generations 0 through N + * have just beed garbage collected. + * ----------------------------------------------------------------------- */ +void +LdvCensusForDead( nat N ) +{ + nat g, s; + + // ldvTime == 0 means that LDV profiling is currently turned off. + if (ldvTime == 0) + return; + + if (RtsFlags.GcFlags.generations == 1) { + // + // Todo: support LDV for two-space garbage collection. + // + barf("Lag/Drag/Void profiling not supported with -G1"); + } else { + for (g = 0; g <= N; g++) + for (s = 0; s < generations[g].n_steps; s++) { + if (g == 0 && s == 0) { + processSmallObjectPoolForDead(); + processNurseryForDead(); + processChainForDead(generations[g].steps[s].large_objects); + } else{ + processHeapForDead(generations[g].steps[s].blocks); + processChainForDead(generations[g].steps[s].large_objects); + } + } + } +} + +/* -------------------------------------------------------------------------- + * Regard any closure in the current heap as dead or moribund and update + * LDV statistics accordingly. + * Called from shutdownHaskell() in RtsStartup.c. + * Also, stops LDV profiling by resetting ldvTime to 0. + * ----------------------------------------------------------------------- */ +void +LdvCensusKillAll( void ) +{ + LdvCensusForDead(RtsFlags.GcFlags.generations - 1); + + // record the time when LDV profiling stops. + ldvTimeSave = ldvTime; + + // and, stops LDV profiling. + ldvTime = 0; +} + +#endif /* PROFILING */ diff --git a/ghc/rts/LdvProfile.h b/ghc/rts/LdvProfile.h new file mode 100644 index 0000000000..b722fbc8ae --- /dev/null +++ b/ghc/rts/LdvProfile.h @@ -0,0 +1,63 @@ +/* ----------------------------------------------------------------------------- + * $Id: LdvProfile.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $ + * + * (c) The GHC Team, 2001 + * Author: Sungwoo Park + * + * Lag/Drag/Void profiling. + * + * ---------------------------------------------------------------------------*/ + +#ifndef LDVPROFILE_H +#define LDVPROFILE_H + +#ifdef PROFILING + +#include "ProfHeap.h" + +void LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p ); + +// Precesses a closure 'c' being destroyed whose size is 'size'. +// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures +// such as TSO; they should not be involved in computing dragNew or voidNew. +// +// Note: ldvTime is 0 if LDV profiling is turned off. +// ldvTime is > 0 if LDV profiling is turned on. +// size does not include StgProfHeader. +// +// Even though ldvTime is checked in both LdvCensusForDead() and +// LdvCensusKillAll(), we still need to make sure that ldvTime is > 0 because +// LDV_recordDead() may be called from elsewhere in the runtime system. E.g., +// when a thunk is replaced by an indirection object. + +static inline void +LDV_recordDead( StgClosure *c, nat size ) +{ + if (ldvTime > 0 && closureSatisfiesConstraints(c)) { + nat t; + size -= sizeofW(StgProfHeader); + if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) { + t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT; + if (t < ldvTime) { + gi[t].voidNew += (int)size; + gi[ldvTime].voidNew -= (int)size; + } + } else { + t = LDVW((c)) & LDV_LAST_MASK; + if (t + 1 < ldvTime) { + gi[t + 1].dragNew += size; + gi[ldvTime].dragNew -= size; + } + } + } +} + +extern void initLdvProfiling ( void ); +extern void endLdvProfiling ( void ); +extern void LdvCensus ( void ); +extern void LdvCensusForDead ( nat ); +extern void LdvCensusKillAll ( void ); + +#endif /* PROFILING */ + +#endif /* LDVPROFILE_H */ diff --git a/ghc/rts/PrimOps.hc b/ghc/rts/PrimOps.hc index d36c18e0af..2036768e85 100644 --- a/ghc/rts/PrimOps.hc +++ b/ghc/rts/PrimOps.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: PrimOps.hc,v 1.84 2001/11/08 12:46:31 simonmar Exp $ + * $Id: PrimOps.hc,v 1.85 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -420,7 +420,25 @@ FN_(finalizzeWeakzh_fast) } /* kill it */ +#ifdef PROFILING + // @LDV profiling + // A weak pointer is inherently used, so we do not need to call + // LDV_recordDead_FILL_SLOP_DYNAMIC(): + // LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w); + // or, LDV_recordDead(): + // LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader)); + // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as + // large as weak pointers, so there is no need to fill the slop, either. + // See stg_DEAD_WEAK_info in StgMiscClosures.hc. +#endif + // + // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? + // w->header.info = &stg_DEAD_WEAK_info; +#ifdef PROFILING + // @LDV profiling + LDV_recordCreate((StgClosure *)w); +#endif f = ((StgWeak *)w)->finalizer; w->link = ((StgWeak *)w)->link; diff --git a/ghc/rts/ProfHeap.c b/ghc/rts/ProfHeap.c index 5597792d77..fc4f421012 100644 --- a/ghc/rts/ProfHeap.c +++ b/ghc/rts/ProfHeap.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: ProfHeap.c,v 1.25 2001/08/14 13:40:09 sewardj Exp $ + * $Id: ProfHeap.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -25,6 +25,8 @@ #include "Stats.h" #include "Hash.h" #include "StrHash.h" +#include "RetainerProfile.h" +#include "LdvProfile.h" #ifdef DEBUG_HEAP_PROF #include "Printer.h" @@ -95,7 +97,7 @@ strToCtr(const char *str) for (; ctr != NULL; prev = ctr, ctr = ctr->next_bucket ) { if (!strcmp(ctr->str, str)) { insertHashTable( str_to_ctr, (W_)str, ctr ); -#ifdef DEBUG +#ifdef DEBUG_CTR fprintf(stderr,"strToCtr: existing ctr for `%s'\n",str); #endif return ctr; @@ -109,7 +111,7 @@ strToCtr(const char *str) ctr->next = all_ctrs; all_ctrs = ctr; -#ifdef DEBUG +#ifdef DEBUG_CTR fprintf(stderr,"strToCtr: new ctr for `%s'\n",str); #endif @@ -175,23 +177,17 @@ initHeapProfiling(void) fprintf(hp_file, "JOB \"%s", prog_argv[0]); -# ifdef PROFILING - switch (RtsFlags.ProfFlags.doHeapProfile) { - case HEAP_BY_CCS: fprintf(hp_file, " -h%c", CCchar); break; - case HEAP_BY_MOD: fprintf(hp_file, " -h%c", MODchar); break; - case HEAP_BY_DESCR: fprintf(hp_file, " -h%c", DESCRchar); break; - case HEAP_BY_TYPE: fprintf(hp_file, " -h%c", TYPEchar); break; - default: /* nothing */ +#ifdef PROFILING + { + int count; + for(count = 1; count < prog_argc; count++) + fprintf(hp_file, " %s", prog_argv[count]); + fprintf(hp_file, " +RTS "); + for(count = 0; count < rts_argc; count++) + fprintf(hp_file, "%s ", rts_argv[count]); + fprintf(hp_file, "\n"); } - if (RtsFlags.ProfFlags.ccSelector) - fprintf(hp_file, " -hc{%s}", RtsFlags.ProfFlags.ccSelector); - if (RtsFlags.ProfFlags.modSelector) - fprintf(hp_file, " -hm{%s}", RtsFlags.ProfFlags.modSelector); - if (RtsFlags.ProfFlags.descrSelector) - fprintf(hp_file, " -hd{%s}", RtsFlags.ProfFlags.descrSelector); - if (RtsFlags.ProfFlags.typeSelector) - fprintf(hp_file, " -hy{%s}", RtsFlags.ProfFlags.typeSelector); -# endif /* PROFILING */ +#endif /* PROFILING */ fprintf(hp_file, "\"\n" ); @@ -224,6 +220,17 @@ endHeapProfiling(void) return; } +#ifdef PROFILING + switch (RtsFlags.ProfFlags.doHeapProfile) { + case HEAP_BY_RETAINER: + endRetainerProfiling(); + break; + case HEAP_BY_LDV: + endLdvProfiling(); + break; + } +#endif + seconds = mut_user_time(); fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", seconds); fprintf(hp_file, "END_SAMPLE %0.2f\n", seconds); @@ -417,24 +424,48 @@ clearCCSResid(CostCentreStack *ccs) } static void -fprint_ccs(FILE *fp, CostCentreStack *ccs, nat components) +fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length) { - CostCentre *cc; - CostCentreStack *prev; + char buf[max_length+1]; + nat next_offset = 0; + nat written; + char *template; + + // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN. + if (ccs == CCS_MAIN) { + fprintf(fp, "MAIN"); + return; + } - cc = ccs->cc; - prev = ccs->prevStack; + // keep printing components of the stack until we run out of space + // in the buffer. If we run out of space, end with "...". + for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) { - if (prev == NULL - || prev->cc->is_caf != CC_IS_BORING - || components == 1) { - fprintf(fp,"%s",cc->label); - return; + // CAF cost centres print as M.CAF, but we leave the module + // name out of all the others to save space. + if (!strcmp(ccs->cc->label,"CAF")) { + written = snprintf(buf+next_offset, + (int)max_length-3-(int)next_offset, + "%s.CAF", ccs->cc->module); + } else { + if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) { + template = "%s/"; + } else { + template = "%s"; + } + written = snprintf(buf+next_offset, + (int)max_length-3-(int)next_offset, + template, ccs->cc->label); + } - } else { - fprint_ccs(fp, ccs->prevStack,components-1); - fprintf(fp,"/%s",cc->label); - } + if (next_offset+written >= max_length-4) { + sprintf(buf+max_length-4, "..."); + break; + } else { + next_offset += written; + } + } + fprintf(fp, "%s", buf); } static void @@ -444,7 +475,8 @@ reportCCSResid(FILE *fp, CostCentreStack *ccs) if (ccs->mem_resid != 0) { fprintf(fp," "); - fprint_ccs(fp,ccs,2/*print 2 components only*/); + // print as much of the CCS as possible in 20 chars, ending with "..." + fprint_ccs(fp,ccs,30); fprintf(fp," %ld\n", ccs->mem_resid * sizeof(W_)); } @@ -455,75 +487,190 @@ reportCCSResid(FILE *fp, CostCentreStack *ccs) } } -static -rtsBool str_matches_selector ( char* str, char* sel ) +static rtsBool +str_matches_selector( char* str, char* sel ) { char* p; - /* fprintf(stderr, "str_matches_selector %s %s\n", str, sel); */ + // fprintf(stderr, "str_matches_selector %s %s\n", str, sel); while (1) { - /* Compare str against wherever we've got to in sel. */ - p = str; - while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) { - p++; sel++; - } - /* Match if all of str used and have reached the end of a sel - fragment. */ - if (*p == '\0' && (*sel == ',' || *sel == '\0')) - return rtsTrue; - - /* No match. Advance sel to the start of the next elem. */ - while (*sel != ',' && *sel != '\0') sel++; - if (*sel == ',') sel++; - - /* Run out of sel ?? */ - if (*sel == '\0') return rtsFalse; + // Compare str against wherever we've got to in sel. + p = str; + while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) { + p++; sel++; + } + // Match if all of str used and have reached the end of a sel fragment. + if (*p == '\0' && (*sel == ',' || *sel == '\0')) + return rtsTrue; + + // No match. Advance sel to the start of the next elem. + while (*sel != ',' && *sel != '\0') sel++; + if (*sel == ',') sel++; + + /* Run out of sel ?? */ + if (*sel == '\0') return rtsFalse; } } -/* Figure out whether a closure should be counted in this census, by - testing against all the specified constraints. */ -static -rtsBool satisfies_constraints ( StgClosure* p ) +// Figure out whether a closure should be counted in this census, by +// testing against all the specified constraints. +rtsBool +closureSatisfiesConstraints( StgClosure* p ) { rtsBool b; if (RtsFlags.ProfFlags.modSelector) { - b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module, - RtsFlags.ProfFlags.modSelector ); - if (!b) return rtsFalse; + b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module, + RtsFlags.ProfFlags.modSelector ); + if (!b) return rtsFalse; } if (RtsFlags.ProfFlags.descrSelector) { - b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc, - RtsFlags.ProfFlags.descrSelector ); - if (!b) return rtsFalse; + b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc, + RtsFlags.ProfFlags.descrSelector ); + if (!b) return rtsFalse; } if (RtsFlags.ProfFlags.typeSelector) { - b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type, + b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type, RtsFlags.ProfFlags.typeSelector ); - if (!b) return rtsFalse; + if (!b) return rtsFalse; } if (RtsFlags.ProfFlags.ccSelector) { - b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label, - RtsFlags.ProfFlags.ccSelector ); - if (!b) return rtsFalse; + b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label, + RtsFlags.ProfFlags.ccSelector ); + if (!b) return rtsFalse; } return rtsTrue; } #endif /* PROFILING */ +/* ----------------------------------------------------------------------------- + * Code to perform a heap census. + * -------------------------------------------------------------------------- */ +static void +heapCensusChain( bdescr *bd ) +{ + StgPtr p; + StgInfoTable *info; + nat size; +#ifdef PROFILING + nat real_size; +#endif -static double time_of_last_heapCensus = 0.0; + for (; bd != NULL; bd = bd->link) { + p = bd->start; + while (p < bd->free) { + info = get_itbl((StgClosure *)p); + + switch (info->type) { + + case CONSTR: + case BCO: + case FUN: + case THUNK: + case IND_PERM: + case IND_OLDGEN_PERM: + case CAF_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case SE_BLACKHOLE: + case BLACKHOLE: + case BLACKHOLE_BQ: + case WEAK: + case FOREIGN: + case STABLE_NAME: + case MVAR: + case MUT_VAR: + case MUT_CONS: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case FUN_1_0: + case FUN_0_1: + case FUN_1_1: + case FUN_0_2: + case FUN_2_0: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_2_0: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_1_1: + case CONSTR_0_2: + case CONSTR_2_0: + size = sizeW_fromITBL(info); + break; + + case THUNK_1_0: /* ToDo - shouldn't be here */ + case THUNK_0_1: /* " ditto " */ + case THUNK_SELECTOR: + size = sizeofW(StgHeader) + MIN_UPD_SIZE; + break; + + case PAP: + case AP_UPD: + size = pap_sizeW((StgPAP *)p); + break; + + case ARR_WORDS: + size = arr_words_sizeW(stgCast(StgArrWords*,p)); + break; + + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p); + break; + + case TSO: + size = tso_sizeW((StgTSO *)p); + break; + + default: + barf("heapCensus"); + } + +#ifdef DEBUG_HEAP_PROF + switch (RtsFlags.ProfFlags.doHeapProfile) { + case HEAP_BY_INFOPTR: + add_data((void *)(*p), size * sizeof(W_)); + break; + case HEAP_BY_CLOSURE_TYPE: + closure_types[info->type] += size * sizeof(W_); + break; + } +#endif + +#ifdef PROFILING + // subtract the profiling overhead + real_size = size - sizeofW(StgProfHeader); + + if (closureSatisfiesConstraints((StgClosure*)p)) { + switch (RtsFlags.ProfFlags.doHeapProfile) { + case HEAP_BY_CCS: + ((StgClosure *)p)->header.prof.ccs->mem_resid += real_size; + break; + case HEAP_BY_MOD: + strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module) + ->mem_resid += real_size; + break; + case HEAP_BY_DESCR: + strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid + += real_size; + break; + case HEAP_BY_TYPE: + strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid + += real_size; + break; + default: + barf("heapCensus; doHeapProfile"); + } + } +#endif + p += size; + } + } +} void -heapCensus(void) +heapCensus( void ) { - bdescr *bd; - const StgInfoTable *info; StgDouble time; - nat size; - StgPtr p; -#ifdef PROFILING - nat elapsed; -#endif + nat g, s; #ifdef DEBUG_HEAP_PROF switch (RtsFlags.ProfFlags.doHeapProfile) { @@ -542,21 +689,6 @@ heapCensus(void) #endif #ifdef PROFILING - /* - * We only continue iff we've waited long enough, - * otherwise, we just dont do the census. - */ - - time = mut_user_time_during_GC(); - elapsed = (time - time_of_last_heapCensus) * 1000; - if (elapsed < RtsFlags.ProfFlags.profileFrequency) { - return; - } - time_of_last_heapCensus = time; -#endif - - -#ifdef PROFILING switch (RtsFlags.ProfFlags.doHeapProfile) { case NO_HEAP_PROFILING: return; @@ -574,136 +706,27 @@ heapCensus(void) } #endif - /* Only do heap profiling in a two-space heap */ - ASSERT(RtsFlags.GcFlags.generations == 1); - bd = g0s0->to_blocks; - + time = mut_user_time_during_GC(); fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", time); - - while (bd != NULL) { - p = bd->start; - while (p < bd->free) { - info = get_itbl((StgClosure *)p); - - switch (info->type) { - - case CONSTR: - if (((StgClosure *)p)->header.info == &stg_DEAD_WEAK_info - && !(LOOKS_LIKE_GHC_INFO(*(p + sizeW_fromITBL(info))))) { - size = sizeofW(StgWeak); - break; - } - /* else, fall through... */ - - case BCO: - case FUN: - case THUNK: - case IND_PERM: - case IND_OLDGEN_PERM: - case CAF_BLACKHOLE: - case SE_CAF_BLACKHOLE: - case SE_BLACKHOLE: - case BLACKHOLE: - case BLACKHOLE_BQ: - case WEAK: - case FOREIGN: - case STABLE_NAME: - case MVAR: - case MUT_VAR: - case CONSTR_INTLIKE: - case CONSTR_CHARLIKE: - case FUN_1_0: - case FUN_0_1: - case FUN_1_1: - case FUN_0_2: - case FUN_2_0: - case THUNK_1_1: - case THUNK_0_2: - case THUNK_2_0: - case CONSTR_1_0: - case CONSTR_0_1: - case CONSTR_1_1: - case CONSTR_0_2: - case CONSTR_2_0: - size = sizeW_fromITBL(info); - break; - - case THUNK_1_0: /* ToDo - shouldn't be here */ - case THUNK_0_1: /* " ditto " */ - case THUNK_SELECTOR: - size = sizeofW(StgHeader) + MIN_UPD_SIZE; - break; - - case AP_UPD: /* we can treat this as being the same as a PAP */ - case PAP: - size = pap_sizeW((StgPAP *)p); - break; - - case ARR_WORDS: - size = arr_words_sizeW(stgCast(StgArrWords*,p)); - break; - - case MUT_ARR_PTRS: - case MUT_ARR_PTRS_FROZEN: - size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p); - break; - - case TSO: - size = tso_sizeW((StgTSO *)p); - break; - - default: - barf("heapCensus"); - } - -#ifdef DEBUG_HEAP_PROF - switch (RtsFlags.ProfFlags.doHeapProfile) { - case HEAP_BY_INFOPTR: - add_data((void *)(*p), size * sizeof(W_)); - break; - case HEAP_BY_CLOSURE_TYPE: - closure_types[info->type] += size * sizeof(W_); - break; - } -#endif -# ifdef PROFILING - if (satisfies_constraints((StgClosure*)p)) { - switch (RtsFlags.ProfFlags.doHeapProfile) { - case HEAP_BY_CCS: - ((StgClosure *)p)->header.prof.ccs->mem_resid += size; - break; - case HEAP_BY_MOD: - strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module) - ->mem_resid += size; - break; - case HEAP_BY_DESCR: - strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid - += size; - break; - case HEAP_BY_TYPE: - strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid - += size; - break; - default: - barf("heapCensus; doHeapProfile"); - } + if (RtsFlags.GcFlags.generations == 1) { + heapCensusChain( g0s0->to_blocks ); + } else { + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + heapCensusChain( generations[g].steps[s].blocks ); + } } -# endif - - p += size; - } - bd = bd->link; } #ifdef DEBUG_HEAP_PROF switch (RtsFlags.ProfFlags.doHeapProfile) { case HEAP_BY_INFOPTR: - fprint_data(hp_file); - break; + fprint_data(hp_file); + break; case HEAP_BY_CLOSURE_TYPE: - fprint_closure_types(hp_file); - break; + fprint_closure_types(hp_file); + break; } #endif diff --git a/ghc/rts/ProfHeap.h b/ghc/rts/ProfHeap.h index 270dc5573e..852a82878d 100644 --- a/ghc/rts/ProfHeap.h +++ b/ghc/rts/ProfHeap.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: ProfHeap.h,v 1.1 1999/09/15 13:46:29 simonmar Exp $ + * $Id: ProfHeap.h,v 1.2 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -8,6 +8,7 @@ * ---------------------------------------------------------------------------*/ -void heapCensus(void); -extern nat initHeapProfiling(void); -void endHeapProfiling(void); +extern void heapCensus( void ); +extern nat initHeapProfiling( void ); +extern void endHeapProfiling( void ); +extern rtsBool closureSatisfiesConstraints( StgClosure* p ); diff --git a/ghc/rts/Profiling.c b/ghc/rts/Profiling.c index a8cf7a4e1d..fc863e96e8 100644 --- a/ghc/rts/Profiling.c +++ b/ghc/rts/Profiling.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Profiling.c,v 1.24 2001/10/18 14:41:01 simonmar Exp $ + * $Id: Profiling.c,v 1.25 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -19,6 +19,8 @@ #include "Itimer.h" #include "ProfHeap.h" #include "Arena.h" +#include "RetainerProfile.h" +#include "LdvProfile.h" /* * Profiling allocation arena. @@ -144,9 +146,6 @@ static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *, -#ifdef DEBUG -static void printCCS ( CostCentreStack *ccs ); -#endif static void initTimeProfiling ( void ); static void initProfilingLogFile( void ); @@ -195,6 +194,15 @@ initProfiling1 (void) /* cost centres are registered by the per-module * initialisation code now... */ + + switch (RtsFlags.ProfFlags.doHeapProfile) { + case HEAP_BY_RETAINER: + initRetainerProfiling(); + break; + case HEAP_BY_LDV: + initLdvProfiling(); + break; + } } void @@ -242,6 +250,13 @@ initProfilingLogFile(void) if ((prof_file = fopen(prof_filename, "w")) == NULL) { fprintf(stderr, "Can't open profiling report file %s\n", prof_filename); RtsFlags.CcFlags.doCostCentres = 0; + // @retainer profiling + // @LDV profiling + // The following line was added by Sung; retainer/LDV profiling may need + // two output files, i.e., <program>.prof/hp. + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER || + RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) + RtsFlags.ProfFlags.doHeapProfile = 0; return; } @@ -328,7 +343,7 @@ PushCostCentre ( CostCentreStack *ccs, CostCentre *cc ) { IF_DEBUG(prof, fprintf(stderr,"Pushing %s on ", cc->label); - printCCS(ccs); + fprintCCS(stderr,ccs); fprintf(stderr,"\n")); return PushCostCentre(ccs,cc); } @@ -390,9 +405,9 @@ AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) IF_DEBUG(prof, if (ccs1 != ccs2) { fprintf(stderr,"Appending "); - printCCS(ccs1); + fprintCCS(stderr,ccs1); fprintf(stderr," to "); - printCCS(ccs2); + fprintCCS(stderr,ccs2); fprintf(stderr,"\n");}); return AppendCCS(ccs1,ccs2); } @@ -688,7 +703,11 @@ report_ccs_profiling( void ) fprint_header(); reportCCS(pruneCCSTree(CCS_MAIN), 0); - fclose(prof_file); + // @retainer profiling + // @LDV profiling + // Now, prof_file is closed in shutdownHaskell() because this file + // is also used for retainer/LDV profiling. See shutdownHaskell(). + // fclose(prof_file); } static void @@ -862,39 +881,16 @@ reportCCS_XML(CostCentreStack *ccs) } void -print_ccs (FILE *fp, CostCentreStack *ccs) -{ - if (ccs == CCCS) { - fprintf(fp, "Cost-Centre Stack: "); - } - - if (ccs != CCS_MAIN) - { - print_ccs(fp, ccs->prevStack); - fprintf(fp, "->[%s,%s]", ccs->cc->label, ccs->cc->module); - } else { - fprintf(fp, "[%s,%s]", ccs->cc->label, ccs->cc->module); - } - - if (ccs == CCCS) { - fprintf(fp, "\n"); - } -} - - -#ifdef DEBUG -static void -printCCS ( CostCentreStack *ccs ) +fprintCCS( FILE *f, CostCentreStack *ccs ) { - fprintf(stderr,"<"); - for (; ccs; ccs = ccs->prevStack ) { - fprintf(stderr,ccs->cc->label); - if (ccs->prevStack) { - fprintf(stderr,","); - } + fprintf(f,"<"); + for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { + fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label); + if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { + fprintf(f,","); + } } - fprintf(stderr,">"); + fprintf(f,">"); } -#endif #endif /* PROFILING */ diff --git a/ghc/rts/Profiling.h b/ghc/rts/Profiling.h index d75d6613bb..52db2da05e 100644 --- a/ghc/rts/Profiling.h +++ b/ghc/rts/Profiling.h @@ -1,5 +1,6 @@ /* ----------------------------------------------------------------------------- - * $Id: Profiling.h,v 1.2 2000/04/19 12:42:48 simonmar Exp $ + + * $Id: Profiling.h,v 1.3 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -30,4 +31,9 @@ void print_ccs (FILE *, CostCentreStack *); extern rtsBool time_profiling; +extern lnat total_prof_ticks; + +extern void fprintCCS( FILE *f, CostCentreStack *ccs ); + + #endif diff --git a/ghc/rts/Proftimer.c b/ghc/rts/Proftimer.c index 42766d3df0..390dd6953b 100644 --- a/ghc/rts/Proftimer.c +++ b/ghc/rts/Proftimer.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Proftimer.c,v 1.7 2001/08/14 13:40:09 sewardj Exp $ + * $Id: Proftimer.c,v 1.8 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -10,28 +10,65 @@ #if defined (PROFILING) #include "PosixSource.h" + #include "Rts.h" #include "Profiling.h" #include "Itimer.h" #include "Proftimer.h" +#include "RtsFlags.h" + +static rtsBool do_prof_ticks = rtsFalse; // enable profiling ticks +static rtsBool do_heap_prof_ticks = rtsFalse; // enable heap profiling ticks -rtsBool do_prof_ticks = rtsFalse; /* enable profiling ticks */ +// Number of ticks until next heap census +static int ticks_to_heap_profile; + +// Time for a heap profile on the next context switch +rtsBool performHeapProfile; void -stopProfTimer(void) -{ /* Stops time profile */ +stopProfTimer( void ) +{ if (time_profiling) { do_prof_ticks = rtsFalse; } -}; +} void -startProfTimer(void) -{ /* Starts time profile */ +startProfTimer( void ) +{ if (time_profiling) { do_prof_ticks = rtsTrue; } -}; +} + +void +stopHeapProfTimer( void ) +{ + do_heap_prof_ticks = rtsFalse; +} + +void +startHeapProfTimer( void ) +{ + if (RtsFlags.ProfFlags.doHeapProfile) { + do_heap_prof_ticks = rtsTrue; + } +} + +void +initProfTimer( void ) +{ + performHeapProfile = rtsFalse; + + RtsFlags.ProfFlags.profileIntervalTicks = + RtsFlags.ProfFlags.profileInterval / TICK_MILLISECS; + + ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks; + + startHeapProfTimer(); +} + void handleProfTick(void) @@ -39,5 +76,14 @@ handleProfTick(void) if (do_prof_ticks) { CCS_TICK(CCCS); } + + if (do_heap_prof_ticks) { + ticks_to_heap_profile--; + if (ticks_to_heap_profile <= 0) { + ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks; + performHeapProfile = rtsTrue; + } + } } + #endif /* PROFILING */ diff --git a/ghc/rts/Proftimer.h b/ghc/rts/Proftimer.h index 231f8da4e1..1ddfc50072 100644 --- a/ghc/rts/Proftimer.h +++ b/ghc/rts/Proftimer.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Proftimer.h,v 1.5 2000/04/03 15:54:50 simonmar Exp $ + * $Id: Proftimer.h,v 1.6 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998 * @@ -7,10 +7,12 @@ * * ---------------------------------------------------------------------------*/ -extern rtsBool do_prof_ticks; -extern lnat total_prof_ticks; +extern void initProfTimer ( void ); +extern void handleProfTick ( void ); -extern void initProfTimer(nat ms); -extern void stopProfTimer(void); -extern void startProfTimer(void); -extern void handleProfTick(void); +extern void stopProfTimer ( void ); +extern void startProfTimer ( void ); +extern void stopHeapProfTimer ( void ); +extern void startHeapProfTimer ( void ); + +extern rtsBool performHeapProfile; diff --git a/ghc/rts/RetainerProfile.c b/ghc/rts/RetainerProfile.c new file mode 100644 index 0000000000..f811d73aab --- /dev/null +++ b/ghc/rts/RetainerProfile.c @@ -0,0 +1,2303 @@ +/* ----------------------------------------------------------------------------- + * $Id: RetainerProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $ + * + * (c) The GHC Team, 2001 + * Author: Sungwoo Park + * + * Retainer profiling. + * + * ---------------------------------------------------------------------------*/ + +#ifdef PROFILING + +#include "Rts.h" +#include "RtsUtils.h" +#include "RetainerProfile.h" +#include "RetainerSet.h" +#include "Schedule.h" +#include "Printer.h" +#include "Storage.h" +#include "StoragePriv.h" +#include "RtsFlags.h" +#include "Weak.h" +#include "Sanity.h" +#include "Profiling.h" +#include "Stats.h" +#include "BlockAlloc.h" +#include "Itimer.h" +#include "Proftimer.h" +#include "ProfHeap.h" + +/* + Note: what to change in order to plug-in a new retainer profiling scheme? + (1) type retainer in ../includes/StgRetainerProf.h + (2) retainer function R(), i.e., getRetainerFrom() + (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(), + in RetainerSet.h, if needed. + (4) printRetainer() and printRetainerSetShort() in RetainerSet.c. + */ + +/* ----------------------------------------------------------------------------- + * Declarations... + * -------------------------------------------------------------------------- */ + +static nat retainerGeneration; // generation + +static nat numObjectVisited; // total number of objects visited +static nat timesAnyObjectVisited; // number of times any objects are visited + +/* + The rs field in the profile header of any object points to its retainer + set in an indirect way: if flip is 0, it points to the retainer set; + if flip is 1, it points to the next byte after the retainer set (even + for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual + pointer. See retainerSetOf(). + */ + +// extract the retainer set field from c +#define RSET(c) ((c)->header.prof.hp.rs) + +static StgWord flip = 0; // flip bit + // must be 0 if DEBUG_RETAINER is on (for static closures) + +#define isRetainerSetFieldValid(c) \ + ((((StgWord)(c)->header.prof.hp.rs & 1) ^ flip) == 0) + +#define setRetainerSetToNull(c) \ + (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip) + +static void retainStack(StgClosure *, StgClosure *, StgClosure *, StgPtr, StgPtr); +static void retainClosure(StgClosure *, StgClosure *, StgClosure *); +#ifdef DEBUG_RETAINER +static void belongToHeap(StgPtr p); +#endif + +#ifdef DEBUG_RETAINER +/* + cStackSize records how many times retainStack() has been invoked recursively, + that is, the number of activation records for retainStack() on the C stack. + maxCStackSize records its max value. + Invariants: + cStackSize <= maxCStackSize + */ +static nat cStackSize, maxCStackSize; + +static nat sumOfNewCost; // sum of the cost of each object, computed + // when the object is first visited +static nat sumOfNewCostExtra; // for those objects not visited during + // retainer profiling, e.g., MUT_VAR +static nat costArray[N_CLOSURE_TYPES]; + +nat sumOfCostLinear; // sum of the costs of all object, computed + // when linearly traversing the heap after + // retainer profiling +nat costArrayLinear[N_CLOSURE_TYPES]; +#endif + +/* ----------------------------------------------------------------------------- + * Retainer stack - header + * Note: + * Although the retainer stack implementation could be separated * + * from the retainer profiling engine, there does not seem to be + * any advantage in doing that; retainer stack is an integral part + * of retainer profiling engine and cannot be use elsewhere at + * all. + * -------------------------------------------------------------------------- */ + +typedef enum { + posTypeStep, + posTypePtrs, + posTypeSRT, +} nextPosType; + +typedef union { + // fixed layout or layout specified by a field in the closure + StgWord step; + + // layout.payload + struct { + // See StgClosureInfo in InfoTables.h +#if SIZEOF_VOID_P == 8 + StgWord32 pos; + StgWord32 ptrs; +#else + StgWord16 pos; + StgWord16 ptrs; +#endif + StgPtr payload; + } ptrs; + + // SRT + struct { + StgClosure **srt; + StgClosure **srt_end; + } srt; +} nextPos; + +typedef struct { + nextPosType type; + nextPos next; +} stackPos; + +typedef struct { + StgClosure *c; + StgClosure *c_child_r; + stackPos info; +} stackElement; + +/* + Invariants: + firstStack points to the first block group. + currentStack points to the block group currently being used. + currentStack->free == stackLimit. + stackTop points to the topmost byte in the stack of currentStack. + Unless the whole stack is empty, stackTop must point to the topmost + object (or byte) in the whole stack. Thus, it is only when the whole stack + is empty that stackTop == stackLimit (not during the execution of push() + and pop()). + stackBottom == currentStack->start. + stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks. + Note: + When a current stack becomes empty, stackTop is set to point to + the topmost element on the previous block group so as to satisfy + the invariants described above. + */ +bdescr *firstStack = NULL; +static bdescr *currentStack; +static stackElement *stackBottom, *stackTop, *stackLimit; + +/* + currentStackBoundary is used to mark the current stack chunk. + If stackTop == currentStackBoundary, it means that the current stack chunk + is empty. It is the responsibility of the user to keep currentStackBoundary + valid all the time if it is to be employed. + */ +static stackElement *currentStackBoundary; + +/* + stackSize records the current size of the stack. + maxStackSize records its high water mark. + Invariants: + stackSize <= maxStackSize + Note: + stackSize is just an estimate measure of the depth of the graph. The reason + is that some heap objects have only a single child and may not result + in a new element being pushed onto the stack. Therefore, at the end of + retainer profiling, maxStackSize + maxCStackSize is some value no greater + than the actual depth of the graph. + */ +#ifdef DEBUG_RETAINER +static int stackSize, maxStackSize; +#endif + +// number of blocks allocated for one stack +#define BLOCKS_IN_STACK 1 + +/* ----------------------------------------------------------------------------- + * Add a new block group to the stack. + * Invariants: + * currentStack->link == s. + * -------------------------------------------------------------------------- */ +static inline void +newStackBlock( bdescr *bd ) +{ + currentStack = bd; + stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); + stackBottom = (stackElement *)bd->start; + stackLimit = (stackElement *)stackTop; + bd->free = (StgPtr)stackLimit; +} + +/* ----------------------------------------------------------------------------- + * Return to the previous block group. + * Invariants: + * s->link == currentStack. + * -------------------------------------------------------------------------- */ +static inline void +returnToOldStack( bdescr *bd ) +{ + currentStack = bd; + stackTop = (stackElement *)bd->free; + stackBottom = (stackElement *)bd->start; + stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); + bd->free = (StgPtr)stackLimit; +} + +/* ----------------------------------------------------------------------------- + * Initializes the traverse stack. + * -------------------------------------------------------------------------- */ +static void +initializeTraverseStack( void ) +{ + if (firstStack != NULL) { + freeChain(firstStack); + } + + firstStack = allocGroup(BLOCKS_IN_STACK); + firstStack->link = NULL; + firstStack->u.back = NULL; + + newStackBlock(firstStack); +} + +/* ----------------------------------------------------------------------------- + * Frees all the block groups in the traverse stack. + * Invariants: + * firstStack != NULL + * -------------------------------------------------------------------------- */ +static void +closeTraverseStack( void ) +{ + freeChain(firstStack); + firstStack = NULL; +} + +/* ----------------------------------------------------------------------------- + * Returns rtsTrue if the whole stack is empty. + * -------------------------------------------------------------------------- */ +static inline rtsBool +isEmptyRetainerStack( void ) +{ + return (firstStack == currentStack) && stackTop == stackLimit; +} + +/* ----------------------------------------------------------------------------- + * Returns rtsTrue if stackTop is at the stack boundary of the current stack, + * i.e., if the current stack chunk is empty. + * -------------------------------------------------------------------------- */ +static inline rtsBool +isOnBoundary( void ) +{ + return stackTop == currentStackBoundary; +} + +/* ----------------------------------------------------------------------------- + * Initializes *info from ptrs and payload. + * Invariants: + * payload[] begins with ptrs pointers followed by non-pointers. + * -------------------------------------------------------------------------- */ +static inline void +init_ptrs( stackPos *info, nat ptrs, StgPtr payload ) +{ + info->type = posTypePtrs; + info->next.ptrs.pos = 0; + info->next.ptrs.ptrs = ptrs; + info->next.ptrs.payload = payload; +} + +/* ----------------------------------------------------------------------------- + * Find the next object from *info. + * -------------------------------------------------------------------------- */ +static inline StgClosure * +find_ptrs( stackPos *info ) +{ + if (info->next.ptrs.pos < info->next.ptrs.ptrs) { + return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++]; + } else { + return NULL; + } +} + +/* ----------------------------------------------------------------------------- + * Initializes *info from SRT information stored in *infoTable. + * -------------------------------------------------------------------------- */ +static inline void +init_srt( stackPos *info, StgInfoTable *infoTable ) +{ + info->type = posTypeSRT; + info->next.srt.srt = (StgClosure **)(infoTable->srt); + info->next.srt.srt_end = info->next.srt.srt + infoTable->srt_len; +} + +/* ----------------------------------------------------------------------------- + * Find the next object from *info. + * -------------------------------------------------------------------------- */ +static inline StgClosure * +find_srt( stackPos *info ) +{ + StgClosure *c; + + if (info->next.srt.srt < info->next.srt.srt_end) { + // See scavenge_srt() in GC.c for details. +#ifdef ENABLE_WIN32_DLL_SUPPORT + if ((unsigned long)(*(info->next.srt.srt)) & 0x1) + c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1); + else + c = *(info->next.srt.srt); +#else + c = *(info->next.srt.srt); +#endif + info->next.srt.srt++; + return c; + } else { + return NULL; + } +} + +/* ----------------------------------------------------------------------------- + * push() pushes a stackElement representing the next child of *c + * onto the traverse stack. If *c has no child, *first_child is set + * to NULL and nothing is pushed onto the stack. If *c has only one + * child, *c_chlid is set to that child and nothing is pushed onto + * the stack. If *c has more than two children, *first_child is set + * to the first child and a stackElement representing the second + * child is pushed onto the stack. + + * Invariants: + * *c_child_r is the most recent retainer of *c's children. + * *c is not any of TSO, PAP, or AP_UPD, which means that + * there cannot be any stack objects. + * Note: SRTs are considered to be children as well. + * -------------------------------------------------------------------------- */ +static inline void +push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child ) +{ + stackElement se; + bdescr *nbd; // Next Block Descriptor + +#ifdef DEBUG_RETAINER + // fprintf(stderr, "push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary); +#endif + + ASSERT(get_itbl(c)->type != TSO); + ASSERT(get_itbl(c)->type != PAP); + ASSERT(get_itbl(c)->type != AP_UPD); + + // + // fill in se + // + + se.c = c; + se.c_child_r = c_child_r; + + // fill in se.info + switch (get_itbl(c)->type) { + // no child, no SRT + case CONSTR_0_1: + case CONSTR_0_2: + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case ARR_WORDS: + *first_child = NULL; + return; + + // one child (fixed), no SRT + case MUT_VAR: + case MUT_CONS: + *first_child = ((StgMutVar *)c)->var; + return; + case BLACKHOLE_BQ: + // blocking_queue must be TSO and the head of a linked list of TSOs. + // Shoule it be a child? Seems to be yes. + *first_child = (StgClosure *)((StgBlockingQueue *)c)->blocking_queue; + return; + case THUNK_SELECTOR: + *first_child = ((StgSelector *)c)->selectee; + return; + case IND_PERM: + case IND_OLDGEN_PERM: + case IND_OLDGEN: + *first_child = ((StgIndOldGen *)c)->indirectee; + return; + case CONSTR_1_0: + case CONSTR_1_1: + *first_child = c->payload[0]; + return; + + // For CONSTR_2_0 and MVAR, we use se.info.step to record the position + // of the next child. We do not write a separate initialization code. + // Also we do not have to initialize info.type; + + // two children (fixed), no SRT + // need to push a stackElement, but nothing to store in se.info + case CONSTR_2_0: + *first_child = c->payload[0]; // return the first pointer + // se.info.type = posTypeStep; + // se.info.next.step = 2; // 2 = second + break; + + // three children (fixed), no SRT + // need to push a stackElement + case MVAR: + // head must be TSO and the head of a linked list of TSOs. + // Shoule it be a child? Seems to be yes. + *first_child = (StgClosure *)((StgMVar *)c)->head; + // se.info.type = posTypeStep; + se.info.next.step = 2; // 2 = second + break; + + // three children (fixed), no SRT + case WEAK: + *first_child = ((StgWeak *)c)->key; + // se.info.type = posTypeStep; + se.info.next.step = 2; + break; + + // layout.payload.ptrs, no SRT + case CONSTR: + case FOREIGN: + case STABLE_NAME: + case BCO: + case CONSTR_STATIC: + init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, + (StgPtr)c->payload); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + return; // no child + break; + + // StgMutArrPtr.ptrs, no SRT + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs, + (StgPtr)(((StgMutArrPtrs *)c)->payload)); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + return; + break; + + // layout.payload.ptrs, SRT + case FUN: // *c is a heap object. + case FUN_2_0: + case THUNK: + case THUNK_2_0: + init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + // no child from ptrs, so check SRT + goto srt_only; + break; + + // 1 fixed child, SRT + case FUN_1_0: + case FUN_1_1: + case THUNK_1_0: + case THUNK_1_1: + *first_child = c->payload[0]; + ASSERT(*first_child != NULL); + init_srt(&se.info, get_itbl(c)); + break; + + // SRT only + case THUNK_STATIC: + case FUN_STATIC: // *c is a heap object. + ASSERT(get_itbl(c)->srt_len != 0); + case FUN_0_1: + case FUN_0_2: + case THUNK_0_1: + case THUNK_0_2: + srt_only: + init_srt(&se.info, get_itbl(c)); + *first_child = find_srt(&se.info); + if (*first_child == NULL) + return; // no child + break; + + // cannot appear + case PAP: + case AP_UPD: + case TSO: + case IND_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // stack objects + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case SEQ_FRAME: + case RET_DYN: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // invalid objects + case IND: + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case EVACUATED: + case INVALID_OBJECT: + default: + barf("Invalid object *c in push()"); + return; + } + + if (stackTop - 1 < stackBottom) { +#ifdef DEBUG_RETAINER + // fprintf(stderr, "push() to the next stack.\n"); +#endif + // currentStack->free is updated when the active stack is switched + // to the next stack. + currentStack->free = (StgPtr)stackTop; + + if (currentStack->link == NULL) { + nbd = allocGroup(BLOCKS_IN_STACK); + nbd->link = NULL; + nbd->u.back = currentStack; + currentStack->link = nbd; + } else + nbd = currentStack->link; + + newStackBlock(nbd); + } + + // adjust stackTop (acutal push) + stackTop--; + // If the size of stackElement was huge, we would better replace the + // following statement by either a memcpy() call or a switch statement + // on the type of the element. Currently, the size of stackElement is + // small enough (5 words) that this direct assignment seems to be enough. + *stackTop = se; + +#ifdef DEBUG_RETAINER + stackSize++; + if (stackSize > maxStackSize) maxStackSize = stackSize; + // ASSERT(stackSize >= 0); + // fprintf(stderr, "stackSize = %d\n", stackSize); +#endif +} + +/* ----------------------------------------------------------------------------- + * popOff() and popOffReal(): Pop a stackElement off the traverse stack. + * Invariants: + * stackTop cannot be equal to stackLimit unless the whole stack is + * empty, in which case popOff() is not allowed. + * Note: + * You can think of popOffReal() as a part of popOff() which is + * executed at the end of popOff() in necessary. Since popOff() is + * likely to be executed quite often while popOffReal() is not, we + * separate popOffReal() from popOff(), which is declared as an + * inline function (for the sake of execution speed). popOffReal() + * is called only within popOff() and nowhere else. + * -------------------------------------------------------------------------- */ +static void +popOffReal(void) +{ + bdescr *pbd; // Previous Block Descriptor + +#ifdef DEBUG_RETAINER + // fprintf(stderr, "pop() to the previous stack.\n"); +#endif + + ASSERT(stackTop + 1 == stackLimit); + ASSERT(stackBottom == (stackElement *)currentStack->start); + + if (firstStack == currentStack) { + // The stack is completely empty. + stackTop++; + ASSERT(stackTop == stackLimit); +#ifdef DEBUG_RETAINER + stackSize--; + if (stackSize > maxStackSize) maxStackSize = stackSize; + /* + ASSERT(stackSize >= 0); + fprintf(stderr, "stackSize = %d\n", stackSize); + */ +#endif + return; + } + + // currentStack->free is updated when the active stack is switched back + // to the previous stack. + currentStack->free = (StgPtr)stackLimit; + + // find the previous block descriptor + pbd = currentStack->u.back; + ASSERT(pbd != NULL); + + returnToOldStack(pbd); + +#ifdef DEBUG_RETAINER + stackSize--; + if (stackSize > maxStackSize) maxStackSize = stackSize; + /* + ASSERT(stackSize >= 0); + fprintf(stderr, "stackSize = %d\n", stackSize); + */ +#endif +} + +static inline void +popOff(void) { +#ifdef DEBUG_RETAINER + // fprintf(stderr, "\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary); +#endif + + ASSERT(stackTop != stackLimit); + ASSERT(!isEmptyRetainerStack()); + + // <= (instead of <) is wrong! + if (stackTop + 1 < stackLimit) { + stackTop++; +#ifdef DEBUG_RETAINER + stackSize--; + if (stackSize > maxStackSize) maxStackSize = stackSize; + /* + ASSERT(stackSize >= 0); + fprintf(stderr, "stackSize = %d\n", stackSize); + */ +#endif + return; + } + + popOffReal(); +} + +/* ----------------------------------------------------------------------------- + * Finds the next object to be considered for retainer profiling and store + * its pointer to *c. + * Test if the topmost stack element indicates that more objects are left, + * and if so, retrieve the first object and store its pointer to *c. Also, + * set *cp and *r appropriately, both of which are stored in the stack element. + * The topmost stack element then is overwritten so as for it to now denote + * the next object. + * If the topmost stack element indicates no more objects are left, pop + * off the stack element until either an object can be retrieved or + * the current stack chunk becomes empty, indicated by rtsTrue returned by + * isOnBoundary(), in which case *c is set to NULL. + * Note: + * It is okay to call this function even when the current stack chunk + * is empty. + * -------------------------------------------------------------------------- */ +static inline void +pop( StgClosure **c, StgClosure **cp, StgClosure **r ) +{ + stackElement *se; + +#ifdef DEBUG_RETAINER + // fprintf(stderr, "pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary); +#endif + + do { + if (isOnBoundary()) { // if the current stack chunk is depleted + *c = NULL; + return; + } + + se = stackTop; + + switch (get_itbl(se->c)->type) { + // two children (fixed), no SRT + // nothing in se.info + case CONSTR_2_0: + *c = se->c->payload[1]; + *cp = se->c; + *r = se->c_child_r; + popOff(); + return; + + // three children (fixed), no SRT + // need to push a stackElement + case MVAR: + if (se->info.next.step == 2) { + *c = (StgClosure *)((StgMVar *)se->c)->tail; + se->info.next.step++; // move to the next step + // no popOff + } else { + *c = ((StgMVar *)se->c)->value; + popOff(); + } + *cp = se->c; + *r = se->c_child_r; + return; + + // three children (fixed), no SRT + case WEAK: + if (se->info.next.step == 2) { + *c = ((StgWeak *)se->c)->value; + se->info.next.step++; + // no popOff + } else { + *c = ((StgWeak *)se->c)->finalizer; + popOff(); + } + *cp = se->c; + *r = se->c_child_r; + return; + + case CONSTR: + case FOREIGN: + case STABLE_NAME: + case BCO: + case CONSTR_STATIC: + // StgMutArrPtr.ptrs, no SRT + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + *c = find_ptrs(&se->info); + if (*c == NULL) { + popOff(); + break; + } + *cp = se->c; + *r = se->c_child_r; + return; + + // layout.payload.ptrs, SRT + case FUN: // always a heap object + case FUN_2_0: + case THUNK: + case THUNK_2_0: + if (se->info.type == posTypePtrs) { + *c = find_ptrs(&se->info); + if (*c != NULL) { + *cp = se->c; + *r = se->c_child_r; + return; + } + init_srt(&se->info, get_itbl(se->c)); + } + // fall through + + // SRT + case THUNK_STATIC: + case FUN_STATIC: + case FUN_0_1: + case FUN_0_2: + case THUNK_0_1: + case THUNK_0_2: + case FUN_1_0: + case FUN_1_1: + case THUNK_1_0: + case THUNK_1_1: + *c = find_srt(&se->info); + if (*c != NULL) { + *cp = se->c; + *r = se->c_child_r; + return; + } + popOff(); + break; + + // no child (fixed), no SRT + case CONSTR_0_1: + case CONSTR_0_2: + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case ARR_WORDS: + // one child (fixed), no SRT + case MUT_VAR: + case MUT_CONS: + case BLACKHOLE_BQ: + case THUNK_SELECTOR: + case IND_PERM: + case IND_OLDGEN_PERM: + case IND_OLDGEN: + case CONSTR_1_1: + // cannot appear + case PAP: + case AP_UPD: + case TSO: + case IND_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // stack objects + case RET_DYN: + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case SEQ_FRAME: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // invalid objects + case IND: + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case EVACUATED: + case INVALID_OBJECT: + default: + barf("Invalid object *c in pop()"); + return; + } + } while (rtsTrue); +} + +/* ----------------------------------------------------------------------------- + * RETAINER PROFILING ENGINE + * -------------------------------------------------------------------------- */ + +void +initRetainerProfiling( void ) +{ + initializeAllRetainerSet(); + retainerGeneration = 0; +} + +/* ----------------------------------------------------------------------------- + * This function must be called before f-closing prof_file. + * -------------------------------------------------------------------------- */ +void +endRetainerProfiling( void ) +{ +#ifdef SECOND_APPROACH + outputAllRetainerSet(prof_file); +#endif +} + +/* ----------------------------------------------------------------------------- + * Returns the actual pointer to the retainer set of the closure *c. + * It may adjust RSET(c) subject to flip. + * Side effects: + * RSET(c) is initialized to NULL if its current value does not + * conform to flip. + * Note: + * Even though this function has side effects, they CAN be ignored because + * subsequent calls to retainerSetOf() always result in the same return value + * and retainerSetOf() is the only way to retrieve retainerSet of a given + * closure. + * We have to perform an XOR (^) operation each time a closure is examined. + * The reason is that we do not know when a closure is visited last. + * -------------------------------------------------------------------------- */ +static inline void +maybeInitRetainerSet( StgClosure *c ) +{ + if (!isRetainerSetFieldValid(c)) { + setRetainerSetToNull(c); + } +} + +static inline RetainerSet * +retainerSetOf( StgClosure *c ) +{ + ASSERT( isRetainerSetFieldValid(c) ); + // StgWord has the same size as pointers, so the following type + // casting is okay. + return (RetainerSet *)((StgWord)RSET(c) ^ flip); +} + +/* ----------------------------------------------------------------------------- + * Returns the cost of the closure *c, e.g., the amount of heap memory + * allocated to *c. Static objects cost 0. + * The cost includes even the words allocated for profiling purpose. + * Cf. costPure(). + * -------------------------------------------------------------------------- */ +static inline nat +cost( StgClosure *c ) +{ + StgInfoTable *info; + + info = get_itbl(c); + switch (info->type) { + case TSO: + return tso_sizeW((StgTSO *)c); + + case THUNK: + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE); + + // static objects + case CONSTR_STATIC: + case FUN_STATIC: + case THUNK_STATIC: + return 0; + + case MVAR: + return sizeofW(StgMVar); + + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c); + + case AP_UPD: + case PAP: + return pap_sizeW((StgPAP *)c); + + case ARR_WORDS: + return arr_words_sizeW((StgArrWords *)c); + + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + case FUN: + case FUN_1_0: + case FUN_0_1: + case FUN_2_0: + case FUN_1_1: + case FUN_0_2: + case WEAK: + case MUT_VAR: + case MUT_CONS: + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case BLACKHOLE_BQ: + case IND_PERM: + case IND_OLDGEN: + case IND_OLDGEN_PERM: + case FOREIGN: + case BCO: + case STABLE_NAME: + return sizeW_fromITBL(info); + + case THUNK_SELECTOR: + return sizeofW(StgHeader) + MIN_UPD_SIZE; + + /* + Error case + */ + // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop. + case IND_STATIC: + // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC + // cannot be *c, *cp, *r in the retainer profiling loop. + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // Stack objects are invalid because they are never treated as + // legal objects during retainer profiling. + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case SEQ_FRAME: + case RET_DYN: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // other cases + case IND: + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case EVACUATED: + case INVALID_OBJECT: + default: + barf("Invalid object in cost(): %d", get_itbl(c)->type); + } +} + +/* ----------------------------------------------------------------------------- + * Returns the pure cost of the closure *c, i.e., the size of memory + * allocated for this object without profiling. + * Note & Todo: + * costPure() subtracts the overhead incurred by profiling for all types + * of objects except TSO. Even though the overhead in the TSO object + * itself is taken into account, the additional costs due to larger + * stack objects (with unnecessary retainer profiling fields) is not + * considered. Still, costPure() should result in an accurate estimate + * of heap use because stacks in TSO objects are allocated in large blocks. + * If we get rid of the (currently unused) retainer profiling field in + * all stack objects, the result will be accurate. + * ------------------------------------------------------------------------- */ +static inline nat +costPure( StgClosure *c ) +{ + nat cst; + + if (!closureSatisfiesConstraints(c)) { + return 0; + } + + cst = cost(c); + + ASSERT(cst == 0 || cst - sizeofW(StgProfHeader) > 0); + + if (cst > 0) { + return cst - sizeofW(StgProfHeader); + } else { + return 0; + } +} + +/* ----------------------------------------------------------------------------- + * Returns rtsTrue if *c is a retainer. + * -------------------------------------------------------------------------- */ +static inline rtsBool +isRetainer( StgClosure *c ) +{ + if (get_itbl(c)->prof.closure_desc != NULL && !strcmp(get_itbl(c)->prof.closure_desc,"PCS")) { return rtsTrue; } + + switch (get_itbl(c)->type) { + // + // True case + // + // TSOs MUST be retainers: they constitute the set of roots. + case TSO: + + // mutable objects + case MVAR: + case MUT_VAR: + case MUT_CONS: + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + + // thunks are retainers. + case THUNK: + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_SELECTOR: + case AP_UPD: + + // Static thunks, or CAFS, are obviously retainers. + case THUNK_STATIC: + + // WEAK objects are roots; there is separate code in which traversing + // begins from WEAK objects. + case WEAK: + return rtsTrue; + + // + // False case + // + + // constructors + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + // functions + case FUN: + case FUN_1_0: + case FUN_0_1: + case FUN_2_0: + case FUN_1_1: + case FUN_0_2: + // partial applications + case PAP: + // blackholes + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case BLACKHOLE_BQ: + // indirection + case IND_PERM: + case IND_OLDGEN_PERM: + case IND_OLDGEN: + // static objects + case CONSTR_STATIC: + case FUN_STATIC: + // misc + case FOREIGN: + case STABLE_NAME: + case BCO: + case ARR_WORDS: + return rtsFalse; + + // + // Error case + // + // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop. + case IND_STATIC: + // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC + // cannot be *c, *cp, *r in the retainer profiling loop. + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // Stack objects are invalid because they are never treated as + // legal objects during retainer profiling. + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case SEQ_FRAME: + case RET_DYN: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // other cases + case IND: + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case EVACUATED: + case INVALID_OBJECT: + default: + barf("Invalid object in isRetainer(): %d", get_itbl(c)->type); + return rtsFalse; + } +} + +/* ----------------------------------------------------------------------------- + * Returns the retainer function value for the closure *c, i.e., R(*c). + * This function does NOT return the retainer(s) of *c. + * Invariants: + * *c must be a retainer. + * Note: + * Depending on the definition of this function, the maintenance of retainer + * sets can be made easier. If most retainer sets are likely to be created + * again across garbage collections, refreshAllRetainerSet() in + * RetainerSet.c can simply set the cost field of each retainer set. + * If this is not the case, we can free all the retainer sets and + * re-initialize the hash table. + * See refreshAllRetainerSet() in RetainerSet.c. + * -------------------------------------------------------------------------- */ +static inline retainer +getRetainerFrom( StgClosure *c ) +{ + ASSERT(isRetainer(c)); + +#if defined(RETAINER_SCHEME_INFO) + // Retainer scheme 1: retainer = info table + return get_itbl(c); +#elif defined(RETAINER_SCHEME_CCS) + // Retainer scheme 2: retainer = cost centre stack + return c->header.prof.ccs; +#elif defined(RETAINER_SCHEME_CC) + // Retainer scheme 3: retainer = cost centre + return c->header.prof.ccs->cc; +#endif +} + +/* ----------------------------------------------------------------------------- + * Associates the retainer set *s with the closure *c, that is, *s becomes + * the retainer set of *c. + * Invariants: + * c != NULL + * s != NULL + * -------------------------------------------------------------------------- */ +static inline void +associate( StgClosure *c, RetainerSet *rsOfc, RetainerSet *s ) +{ + nat cost_c; + + cost_c = costPure(c); // not cost(c) + if (rsOfc != NULL) { + ASSERT(rsOfc->cost >= cost_c); + rsOfc->cost -= cost_c; + } + // StgWord has the same size as pointers, so the following type + // casting is okay. + RSET(c) = (RetainerSet *)((StgWord)s | flip); + s->cost += cost_c; +} + +/* ----------------------------------------------------------------------------- + * Process all the objects in the stack chunk from stackStart to stackEnd + * with *c and *c_child_r being their parent and their most recent retainer, + * respectively. Treat stackOptionalFun as another child of *c if it is + * not NULL. + * Invariants: + * *c is one of the following: TSO, PAP, and AP_UPD. + * If *c is AP_UPD or PAP, stackOptionalFun is not NULL. Otherwise, + * it is NULL. + * If *c is TSO, c == c_child_r. + * stackStart < stackEnd. + * RSET(c) and RSET(c_child_r) are valid, i.e., their + * interpretation conforms to the current value of flip (even when they + * are interpreted to be NULL). + * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete, + * or ThreadKilled, which means that its stack is ready to process. + * Note: + * This code was almost plagiarzied from GC.c! For each pointer, + * retainClosure() is invoked instead of evacuate(). + * -------------------------------------------------------------------------- */ +static void +retainStack( StgClosure *c, StgClosure *c_child_r, + StgClosure *stackOptionalFun, StgPtr stackStart, + StgPtr stackEnd ) +{ + stackElement *oldStackBoundary; + StgPtr p, q; + StgInfoTable *info; + StgWord32 bitmap; + +#ifdef DEBUG_RETAINER + cStackSize++; + if (cStackSize > maxCStackSize) maxCStackSize = cStackSize; +#endif + + /* + Each invocation of retainStack() creates a new virtual + stack. Since all such stacks share a single common stack, we + record the current currentStackBoundary, which will be restored + at the exit. + */ + oldStackBoundary = currentStackBoundary; + currentStackBoundary = stackTop; + +#ifdef DEBUG_RETAINER + // fprintf(stderr, "retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary); +#endif + + if (stackOptionalFun != NULL) { + ASSERT(get_itbl(c)->type == AP_UPD || get_itbl(c)->type == PAP); + retainClosure(stackOptionalFun, c, c_child_r); + } else { + ASSERT(get_itbl(c)->type == TSO); + ASSERT(((StgTSO *)c)->what_next != ThreadRelocated && + ((StgTSO *)c)->what_next != ThreadComplete && + ((StgTSO *)c)->what_next != ThreadKilled); + } + + p = stackStart; + while (p < stackEnd) { + q = *(StgPtr *)p; + + // + // Note & Todo: + // The correctness of retainer profiling is subject to the + // correctness of the two macros IS_ARG_TAG() and + // LOOKS_LIKE_GHC_INFO(). Since LOOKS_LIKE_GHC_INFO() is a bit + // precarious macro, so I believe that the current + // implementation may not be quite safe. Also, scavenge_stack() + // in GC.c also exploits this macro in order to identify shallow + // pointers. I am not sure whether scavenge_stack() takes + // further measurements to discern real shallow pointers. + // + // I think this can be a serious problem if a stack chunk + // contains some word which looks like a pointer but is + // actually, say, a word constituting a floating number. + // + + // skip tagged words + if (IS_ARG_TAG((StgWord)q)) { + p += 1 + ARG_SIZE(q); + continue; + } + + // check if *p is a shallow closure pointer + if (!LOOKS_LIKE_GHC_INFO(q)) { + retainClosure((StgClosure *)q, c, c_child_r); + p++; + continue; + } + + // regular stack objects + info = get_itbl((StgClosure *)p); + switch(info->type) { + case RET_DYN: + bitmap = ((StgRetDyn *)p)->liveness; + p = ((StgRetDyn *)p)->payload; + goto small_bitmap; + + // FUN and FUN_STATIC keep only their info pointer. + case FUN: + case FUN_STATIC: + p++; + goto follow_srt; + + case UPDATE_FRAME: + retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r); + p += sizeofW(StgUpdateFrame); + continue; + + case STOP_FRAME: + case CATCH_FRAME: + case SEQ_FRAME: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + bitmap = info->layout.bitmap; + p++; + small_bitmap: + while (bitmap != 0) { + if ((bitmap & 1) == 0) + retainClosure((StgClosure *)*p, c, c_child_r); + p++; + bitmap = bitmap >> 1; + } + follow_srt: + { + StgClosure **srt, **srt_end; + + srt = (StgClosure **)(info->srt); + srt_end = srt + info->srt_len; + for (; srt < srt_end; srt++) { + // See scavenge_srt() in GC.c for details. +#ifdef ENABLE_WIN32_DLL_SUPPORT + if ((unsigned long)(*srt) & 0x1) + retainClosure(*(StgClosure **)(((unsigned long)*srt & ~0x1)), c, c_child_r); + else + retainClosure(*srt, c, c_child_r); +#else + retainClosure(*srt, c, c_child_r); +#endif + } + } + continue; + + case RET_BIG: + case RET_VEC_BIG: + { + StgPtr q; + StgLargeBitmap *large_bitmap; + nat i; + + large_bitmap = info->layout.large_bitmap; + p++; + + for (i = 0; i < large_bitmap->size; i++) { + bitmap = large_bitmap->bitmap[i]; + q = p + sizeofW(StgWord) * 8; + while (bitmap != 0) { + if ((bitmap & 1) == 0) + retainClosure((StgClosure *)*p, c, c_child_r); + p++; + bitmap = bitmap >> 1; + } + if (i + 1 < large_bitmap->size) { + while (p < q) { + retainClosure((StgClosure *)*p, c, c_child_r); + p++; + } + } + } + } + goto follow_srt; + default: + barf("Invalid object found in retainStack(): %d", + (int)(info->type)); + } + } + + // restore currentStackBoundary + currentStackBoundary = oldStackBoundary; +#ifdef DEBUG_RETAINER + // fprintf(stderr, "retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary); +#endif + +#ifdef DEBUG_RETAINER + cStackSize--; +#endif +} + +/* ----------------------------------------------------------------------------- + * Compute the retainer set of *c0 and all its desecents by traversing. + * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0. + * Invariants: + * c0 = cp0 = r0 holds only for root objects. + * RSET(cp0) and RSET(r0) are valid, i.e., their + * interpretation conforms to the current value of flip (even when they + * are interpreted to be NULL). + * However, RSET(c0) may be corrupt, i.e., it may not conform to + * the current value of flip. If it does not, during the execution + * of this function, RSET(c0) must be initialized as well as all + * its descendants. + * Note: + * stackTop must be the same at the beginning and the exit of this function. + * *c0 can be TSO (as well as PAP and AP_UPD). + * -------------------------------------------------------------------------- */ +static void +retainClosure( StgClosure *c0, StgClosure *cp0, StgClosure *r0 ) +{ + // c = Current closure + // cp = Current closure's Parent + // r = current closures' most recent Retainer + // c_child_r = current closure's children's most recent retainer + // first_child = first child of c + StgClosure *c, *cp, *r, *c_child_r, *first_child; + RetainerSet *s, *retainerSetOfc; + retainer R_r; + StgWord typeOfc; + +#ifdef DEBUG_RETAINER + // StgPtr oldStackTop; +#endif + +#ifdef DEBUG_RETAINER + // oldStackTop = stackTop; + // fprintf(stderr, "retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0); +#endif + + // (c, cp, r) = (c0, cp0, r0) + c = c0; + cp = cp0; + r = r0; + goto inner_loop; + +loop: + //fprintf(stderr, "loop"); + // pop to (c, cp, r); + pop(&c, &cp, &r); + + if (c == NULL) { +#ifdef DEBUG_RETAINER + // fprintf(stderr, "retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop); +#endif + return; + } + + //fprintf(stderr, "inner_loop"); + +inner_loop: + // c = current closure under consideration, + // cp = current closure's parent, + // r = current closure's most recent retainer + // + // Loop invariants (on the meaning of c, cp, r, and their retainer sets): + // RSET(cp) and RSET(r) are valid. + // RSET(c) is valid only if c has been visited before. + // + // Loop invariants (on the relation between c, cp, and r) + // if cp is not a retainer, r belongs to RSET(cp). + // if cp is a retainer, r == cp. + + typeOfc = get_itbl(c)->type; + +#ifdef DEBUG_RETAINER + switch (typeOfc) { + case IND_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + case CONSTR_STATIC: + case THUNK_STATIC: + case FUN_STATIC: + break; + default: + if (retainerSetOf(c) == NULL) { // first visit? + costArray[typeOfc] += cost(c); + sumOfNewCost += cost(c); + } + break; + } +#endif + + // special cases + switch (typeOfc) { + case TSO: + if (((StgTSO *)c)->what_next == ThreadComplete || + ((StgTSO *)c)->what_next == ThreadKilled) { +#ifdef DEBUG_RETAINER + fprintf(stderr, "ThreadComplete or ThreadKilled encountered in retainClosure()\n"); +#endif + goto loop; + } + if (((StgTSO *)c)->what_next == ThreadRelocated) { +#ifdef DEBUG_RETAINER + fprintf(stderr, "ThreadRelocated encountered in retainClosure()\n"); +#endif + c = (StgClosure *)((StgTSO *)c)->link; + goto inner_loop; + } + break; + + case IND_STATIC: + // We just skip IND_STATIC, so its retainer set is never computed. + c = ((StgIndStatic *)c)->indirectee; + goto inner_loop; + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + // static objects with no pointers out, so goto loop. + case CONSTR_NOCAF_STATIC: + // It is not just enough not to compute the retainer set for *c; it is + // mandatory because CONSTR_NOCAF_STATIC are not reachable from + // scavenged_static_objects, the list from which is assumed to traverse + // all static objects after major garbage collections. + goto loop; + case THUNK_STATIC: + case FUN_STATIC: + if (get_itbl(c)->srt_len == 0) { + // No need to compute the retainer set; no dynamic objects + // are reachable from *c. + // + // Static objects: if we traverse all the live closures, + // including static closures, during each heap census then + // we will observe that some static closures appear and + // disappear. eg. a closure may contain a pointer to a + // static function 'f' which is not otherwise reachable + // (it doesn't indirectly point to any CAFs, so it doesn't + // appear in any SRTs), so we would find 'f' during + // traversal. However on the next sweep there may be no + // closures pointing to 'f'. + // + // We must therefore ignore static closures whose SRT is + // empty, because these are exactly the closures that may + // "appear". A closure with a non-empty SRT, and which is + // still required, will always be reachable. + // + // But what about CONSTR_STATIC? Surely these may be able + // to appear, and they don't have SRTs, so we can't + // check. So for now, we're calling + // resetStaticObjectForRetainerProfiling() from the + // garbage collector to reset the retainer sets in all the + // reachable static objects. + goto loop; + } + default: + break; + } + + // The above objects are ignored in computing the average number of times + // an object is visited. + timesAnyObjectVisited++; + + // If this is the first visit to c, initialize its retainer set. + maybeInitRetainerSet(c); + retainerSetOfc = retainerSetOf(c); + + // Now compute s: + // isRetainer(cp) == rtsTrue => s == NULL + // isRetainer(cp) == rtsFalse => s == cp.retainer + if (isRetainer(cp)) + s = NULL; + else + s = retainerSetOf(cp); + + // (c, cp, r, s) is available. + R_r = getRetainerFrom(r); + + // (c, cp, r, s, R_r) is available, so compute the retainer set for *c. + if (retainerSetOfc == NULL) { + // This is the first visit to *c. + numObjectVisited++; + + if (s == NULL) + associate(c, NULL, singleton(R_r)); + else + // s is actually the retainer set of *c! + associate(c, NULL, s); + + // compute c_child_r + c_child_r = isRetainer(c) ? c : r; + } else { + // This is not the first visit to *c. + if (isMember(R_r, retainerSetOfc)) + goto loop; // no need to process child + + if (s == NULL) + associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc)); + else { + // s is not NULL and cp is not a retainer. This means that + // each time *cp is visited, so is *c. Thus, if s has + // exactly one more element in its retainer set than c, s + // is also the new retainer set for *c. + if (s->num == retainerSetOfc->num + 1) { + associate(c, retainerSetOfc, s); + } + // Otherwise, just add R_r to the current retainer set of *c. + else { + associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc)); + } + } + + if (isRetainer(c)) + goto loop; // no need to process child + + // compute c_child_r + c_child_r = r; + } + + // now, RSET() of all of *c, *cp, and *r is valid. + // (c, c_child_r) are available. + + // process child + + if (typeOfc == TSO) { + retainStack(c, c_child_r, + NULL, + ((StgTSO *)c)->sp, + ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size); + // no more children + goto loop; + } else if (typeOfc == PAP) { + retainStack(c, c_child_r, + ((StgPAP *)c)->fun, + (StgPtr)((StgPAP *)c)->payload, + (StgPtr)((StgPAP *)c)->payload + ((StgPAP *)c)->n_args); + // no more children + goto loop; + } else if (typeOfc == AP_UPD) { + retainStack(c, c_child_r, + ((StgAP_UPD *)c)->fun, + (StgPtr)((StgAP_UPD *)c)->payload, + (StgPtr)((StgAP_UPD *)c)->payload + + ((StgAP_UPD *)c)->n_args); + // no more children + goto loop; + } + + push(c, c_child_r, &first_child); + + // If first_child is null, c has no child. + // If first_child is not null, the top stack element points to the next + // object. push() may or may not push a stackElement on the stack. + if (first_child == NULL) + goto loop; + + // (c, cp, r) = (first_child, c, c_child_r) + r = c_child_r; + cp = c; + c = first_child; + goto inner_loop; +} + +/* ----------------------------------------------------------------------------- + * Compute the retainer set for every object reachable from *tl. + * -------------------------------------------------------------------------- */ +static void +retainRoot( StgClosure **tl ) +{ + // We no longer assume that only TSOs and WEAKs are roots; any closure can + // be a root. + + ASSERT(isEmptyRetainerStack()); + currentStackBoundary = stackTop; + + retainClosure(*tl, *tl, *tl); + + // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl))); + // *tl might be a TSO which is ThreadComplete, in which + // case we ignore it for the purposes of retainer profiling. +} + +/* ----------------------------------------------------------------------------- + * Compute the retainer set for each of the objects in the heap. + * -------------------------------------------------------------------------- */ +static void +computeRetainerSet( void ) +{ + StgWeak *weak; + RetainerSet *rtl; + nat g; + StgMutClosure *ml; +#ifdef DEBUG_RETAINER + RetainerSet tmpRetainerSet; +#endif + + GetRoots(retainRoot); // for scheduler roots + + // This function is called after a major GC, when key, value, and finalizer + // all are guaranteed to be valid, or reachable. + // + // The following code assumes that WEAK objects are considered to be roots + // for retainer profilng. + for (weak = weak_ptr_list; weak != NULL; weak = weak->link) + // retainRoot((StgClosure *)weak); + retainRoot((StgClosure **)&weak); + + // The following code resets the rs field of each unvisited mutable + // object (computing sumOfNewCostExtra and updating costArray[] when + // debugging retainer profiler). + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + ASSERT(g != 0 || + (generations[g].mut_list == END_MUT_LIST && + generations[g].mut_once_list == END_MUT_LIST)); + + // Todo: + // I think traversing through mut_list is unnecessary. + // Think about removing this part. + for (ml = generations[g].mut_list; ml != END_MUT_LIST; + ml = ml->mut_link) { + + maybeInitRetainerSet((StgClosure *)ml); + rtl = retainerSetOf((StgClosure *)ml); + +#ifdef DEBUG_RETAINER + if (rtl == NULL) { + // first visit to *ml + // This is a violation of the interface rule! + RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip); + + switch (get_itbl((StgClosure *)ml)->type) { + case IND_STATIC: + // no cost involved + break; + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + case CONSTR_STATIC: + case THUNK_STATIC: + case FUN_STATIC: + barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type); + break; + default: + // dynamic objects + costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml); + sumOfNewCostExtra += cost((StgClosure *)ml); + break; + } + } +#endif + } + + // Traversing through mut_once_list is, in contrast, necessary + // because we can find MUT_VAR objects which have not been + // visited during retainer profiling. + for (ml = generations[g].mut_once_list; ml != END_MUT_LIST; + ml = ml->mut_link) { + + maybeInitRetainerSet((StgClosure *)ml); + rtl = retainerSetOf((StgClosure *)ml); +#ifdef DEBUG_RETAINER + if (rtl == NULL) { + // first visit to *ml + // This is a violation of the interface rule! + RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip); + + switch (get_itbl((StgClosure *)ml)->type) { + case IND_STATIC: + // no cost involved + break; + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + case CONSTR_STATIC: + case THUNK_STATIC: + case FUN_STATIC: + barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type); + break; + default: + // dynamic objects + costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml); + sumOfNewCostExtra += cost((StgClosure *)ml); + break; + } + } +#endif + } + } +} + +/* ----------------------------------------------------------------------------- + * Traverse all static objects for which we compute retainer sets, + * and reset their rs fields to NULL, which is accomplished by + * invoking maybeInitRetainerSet(). This function must be called + * before zeroing all objects reachable from scavenged_static_objects + * in the case of major gabage collections. See GarbageCollect() in + * GC.c. + * Note: + * The mut_once_list of the oldest generation must also be traversed? + * Why? Because if the evacuation of an object pointed to by a static + * indirection object fails, it is put back to the mut_once_list of + * the oldest generation. + * However, this is not necessary because any static indirection objects + * are just traversed through to reach dynamic objects. In other words, + * they are not taken into consideration in computing retainer sets. + * -------------------------------------------------------------------------- */ +void +resetStaticObjectForRetainerProfiling( void ) +{ +#ifdef DEBUG_RETAINER + nat count; +#endif + StgClosure *p; + +#ifdef DEBUG_RETAINER + count = 0; +#endif + p = scavenged_static_objects; + while (p != END_OF_STATIC_LIST) { +#ifdef DEBUG_RETAINER + count++; +#endif + switch (get_itbl(p)->type) { + case IND_STATIC: + // Since we do not compute the retainer set of any + // IND_STATIC object, we don't have to reset its retainer + // field. + p = IND_STATIC_LINK(p); + break; + case THUNK_STATIC: + maybeInitRetainerSet(p); + p = THUNK_STATIC_LINK(p); + break; + case FUN_STATIC: + maybeInitRetainerSet(p); + p = FUN_STATIC_LINK(p); + break; + case CONSTR_STATIC: + maybeInitRetainerSet(p); + p = STATIC_LINK(get_itbl(p), p); + break; + default: + barf("resetStaticObjectForRetainerProfiling: %p (%s)", + p, get_itbl(p)->type); + break; + } + } +#ifdef DEBUG_RETAINER + // fprintf(stderr, "count in scavenged_static_objects = %d\n", count); +#endif +} + +/* ----------------------------------------------------------------------------- + * Perform retainer profiling. + * N is the oldest generation being profilied, where the generations are + * numbered starting at 0. + * Invariants: + * Note: + * This function should be called only immediately after major garbage + * collection. + * ------------------------------------------------------------------------- */ +void +retainerProfile(void) +{ + nat allCost, numSet; +#ifdef DEBUG_RETAINER + nat i; + nat totalHeapSize; // total raw heap size (computed by linear scanning) +#endif + +#ifdef DEBUG_RETAINER + fprintf(stderr, " < retainerProfile() invoked : %d>\n", retainerGeneration); +#endif + + stat_startRP(); + + // We haven't flipped the bit yet. +#ifdef DEBUG_RETAINER + fprintf(stderr, "Before traversing:\n"); + sumOfCostLinear = 0; + for (i = 0;i < N_CLOSURE_TYPES; i++) + costArrayLinear[i] = 0; + totalHeapSize = checkHeapSanityForRetainerProfiling(); + + fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize); + /* + fprintf(stderr, "costArrayLinear[] = "); + for (i = 0;i < N_CLOSURE_TYPES; i++) + fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]); + fprintf(stderr, "\n"); + */ + + ASSERT(sumOfCostLinear == totalHeapSize); + +/* +#define pcostArrayLinear(index) \ + if (costArrayLinear[index] > 0) \ + fprintf(stderr, "costArrayLinear[" #index "] = %u\n", costArrayLinear[index]) + pcostArrayLinear(THUNK_STATIC); + pcostArrayLinear(FUN_STATIC); + pcostArrayLinear(CONSTR_STATIC); + pcostArrayLinear(CONSTR_NOCAF_STATIC); + pcostArrayLinear(CONSTR_INTLIKE); + pcostArrayLinear(CONSTR_CHARLIKE); +*/ +#endif + + // Now we flips flip. + flip = flip ^ 1; + +#ifdef DEBUG_RETAINER + stackSize = 0; + maxStackSize = 0; + cStackSize = 0; + maxCStackSize = 0; +#endif + numObjectVisited = 0; + timesAnyObjectVisited = 0; + +#ifdef DEBUG_RETAINER + fprintf(stderr, "During traversing:\n"); + sumOfNewCost = 0; + sumOfNewCostExtra = 0; + for (i = 0;i < N_CLOSURE_TYPES; i++) + costArray[i] = 0; +#endif + + /* + We initialize the traverse stack each time the retainer profiling is + performed (because the traverse stack size varies on each retainer profiling + and this operation is not costly anyhow). However, we just refresh the + retainer sets. + */ + initializeTraverseStack(); +#ifdef DEBUG_RETAINER + initializeAllRetainerSet(); +#else + refreshAllRetainerSet(); +#endif + computeRetainerSet(); + + outputRetainerSet(hp_file, &allCost, &numSet); + +#ifdef DEBUG_RETAINER + fprintf(stderr, "After traversing:\n"); + sumOfCostLinear = 0; + for (i = 0;i < N_CLOSURE_TYPES; i++) + costArrayLinear[i] = 0; + totalHeapSize = checkHeapSanityForRetainerProfiling(); + + fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize); + ASSERT(sumOfCostLinear == totalHeapSize); + + // now, compare the two results + /* + Note: + costArray[] must be exactly the same as costArrayLinear[]. + Known exceptions: + 1) Dead weak pointers, whose type is CONSTR. These objects are not + reachable from any roots. + */ + fprintf(stderr, "Comparison:\n"); + fprintf(stderr, "\tcostArrayLinear[] (must be empty) = "); + for (i = 0;i < N_CLOSURE_TYPES; i++) + if (costArray[i] != costArrayLinear[i]) + // nothing should be printed except MUT_VAR after major GCs + fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]); + fprintf(stderr, "\n"); + + fprintf(stderr, "\tsumOfNewCost = %u\n", sumOfNewCost); + fprintf(stderr, "\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra); + fprintf(stderr, "\tcostArray[] (must be empty) = "); + for (i = 0;i < N_CLOSURE_TYPES; i++) + if (costArray[i] != costArrayLinear[i]) + // nothing should be printed except MUT_VAR after major GCs + fprintf(stderr, "[%u:%u] ", i, costArray[i]); + fprintf(stderr, "\n"); + + // only for major garbage collection + ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear); +#endif + + // post-processing + closeTraverseStack(); +#ifdef DEBUG_RETAINER + closeAllRetainerSet(); +#else + // Note that there is no post-processing for the retainer sets. +#endif + retainerGeneration++; + + stat_endRP( + retainerGeneration - 1, // retainerGeneration has just been incremented! +#ifdef DEBUG_RETAINER + maxCStackSize, maxStackSize, +#endif + (double)timesAnyObjectVisited / numObjectVisited, + allCost, numSet); +} + +/* ----------------------------------------------------------------------------- + * DEBUGGING CODE + * -------------------------------------------------------------------------- */ + +#ifdef DEBUG_RETAINER + +#define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \ + ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) && \ + ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa)) + +static nat +sanityCheckHeapClosure( StgClosure *c ) +{ + StgInfoTable *info; + + ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info)); + ASSERT(!closure_STATIC(c)); + ASSERT(LOOKS_LIKE_PTR(c)); + + if ((((StgWord)RSET(c) & 1) ^ flip) != 0) { + if (get_itbl(c)->type == CONSTR && + !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") && + !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) { + fprintf(stderr, "\tUnvisited dead weak pointer object found: c = %p\n", c); + costArray[get_itbl(c)->type] += cost(c); + sumOfNewCost += cost(c); + } else + fprintf(stderr, + "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n", + flip, c, get_itbl(c)->type, + get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc, + RSET(c)); + } else { + // fprintf(stderr, "sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c)); + } + + info = get_itbl(c); + switch (info->type) { + case TSO: + return tso_sizeW((StgTSO *)c); + + case THUNK: + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE); + + case MVAR: + return sizeofW(StgMVar); + + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c); + + case AP_UPD: + case PAP: + return pap_sizeW((StgPAP *)c); + + case ARR_WORDS: + return arr_words_sizeW((StgArrWords *)c); + + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + case FUN: + case FUN_1_0: + case FUN_0_1: + case FUN_2_0: + case FUN_1_1: + case FUN_0_2: + case WEAK: + case MUT_VAR: + case MUT_CONS: + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case BLACKHOLE_BQ: + case IND_PERM: + case IND_OLDGEN: + case IND_OLDGEN_PERM: + case FOREIGN: + case BCO: + case STABLE_NAME: + return sizeW_fromITBL(info); + + case THUNK_SELECTOR: + return sizeofW(StgHeader) + MIN_UPD_SIZE; + + /* + Error case + */ + case IND_STATIC: + case CONSTR_STATIC: + case FUN_STATIC: + case THUNK_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case SEQ_FRAME: + case RET_DYN: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + case IND: + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case EVACUATED: + case INVALID_OBJECT: + default: + barf("Invalid object in sanityCheckHeapClosure(): %d", + get_itbl(c)->type); + return 0; + } +} + +static nat +heapCheck( bdescr *bd ) +{ + StgPtr p; + static nat costSum, size; + + costSum = 0; + while (bd != NULL) { + p = bd->start; + while (p < bd->free) { + size = sanityCheckHeapClosure((StgClosure *)p); + sumOfCostLinear += size; + costArrayLinear[get_itbl((StgClosure *)p)->type] += size; + p += size; + // no need for slop check; I think slops are not used currently. + } + ASSERT(p == bd->free); + costSum += bd->free - bd->start; + bd = bd->link; + } + + return costSum; +} + +static nat +smallObjectPoolCheck(void) +{ + bdescr *bd; + StgPtr p; + static nat costSum, size; + + bd = small_alloc_list; + costSum = 0; + + // first block + if (bd == NULL) + return costSum; + + p = bd->start; + while (p < alloc_Hp) { + size = sanityCheckHeapClosure((StgClosure *)p); + sumOfCostLinear += size; + costArrayLinear[get_itbl((StgClosure *)p)->type] += size; + p += size; + } + ASSERT(p == alloc_Hp); + costSum += alloc_Hp - bd->start; + + bd = bd->link; + while (bd != NULL) { + p = bd->start; + while (p < bd->free) { + size = sanityCheckHeapClosure((StgClosure *)p); + sumOfCostLinear += size; + costArrayLinear[get_itbl((StgClosure *)p)->type] += size; + p += size; + } + ASSERT(p == bd->free); + costSum += bd->free - bd->start; + bd = bd->link; + } + + return costSum; +} + +static nat +chainCheck(bdescr *bd) +{ + nat costSum, size; + + costSum = 0; + while (bd != NULL) { + // bd->free - bd->start is not an accurate measurement of the + // object size. Actually it is always zero, so we compute its + // size explicitly. + size = sanityCheckHeapClosure((StgClosure *)bd->start); + sumOfCostLinear += size; + costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size; + costSum += size; + bd = bd->link; + } + + return costSum; +} + +static nat +checkHeapSanityForRetainerProfiling( void ) +{ + nat costSum, g, s; + + costSum = 0; + fprintf(stderr, "START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + if (RtsFlags.GcFlags.generations == 1) { + costSum += heapCheck(g0s0->to_blocks); + fprintf(stderr, "heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + costSum += chainCheck(g0s0->large_objects); + fprintf(stderr, "chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + } else { + for (g = 0; g < RtsFlags.GcFlags.generations; g++) + for (s = 0; s < generations[g].n_steps; s++) { + /* + After all live objects have been scavenged, the garbage + collector may create some objects in + scheduleFinalizers(). These objects are created throught + allocate(), so the small object pool or the large object + pool of the g0s0 may not be empty. + */ + if (g == 0 && s == 0) { + costSum += smallObjectPoolCheck(); + fprintf(stderr, "smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + costSum += chainCheck(generations[g].steps[s].large_objects); + fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + } else { + costSum += heapCheck(generations[g].steps[s].blocks); + fprintf(stderr, "heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + costSum += chainCheck(generations[g].steps[s].large_objects); + fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + } + } + } + + return costSum; +} + +void +findPointer(StgPtr p) +{ + StgPtr q, r, e; + bdescr *bd; + nat g, s; + + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + // if (g == 0 && s == 0) continue; + bd = generations[g].steps[s].blocks; + for (; bd; bd = bd->link) { + for (q = bd->start; q < bd->free; q++) { + if (*q == (StgWord)p) { + r = q; + while (!LOOKS_LIKE_GHC_INFO(*r)) r--; + fprintf(stderr, "Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r); + // return; + } + } + } + bd = generations[g].steps[s].large_objects; + for (; bd; bd = bd->link) { + e = bd->start + cost((StgClosure *)bd->start); + for (q = bd->start; q < e; q++) { + if (*q == (StgWord)p) { + r = q; + while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--; + fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, r); + // return; + } + } + } + } + } +} + +static void +belongToHeap(StgPtr p) +{ + bdescr *bd; + nat g, s; + + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + // if (g == 0 && s == 0) continue; + bd = generations[g].steps[s].blocks; + for (; bd; bd = bd->link) { + if (bd->start <= p && p < bd->free) { + fprintf(stderr, "Belongs to gen[%d], step[%d]", g, s); + return; + } + } + bd = generations[g].steps[s].large_objects; + for (; bd; bd = bd->link) { + if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) { + fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, bd->start); + return; + } + } + } + } +} +#endif // DEBUG_RETAINER + +#endif /* PROFILING */ diff --git a/ghc/rts/RetainerProfile.h b/ghc/rts/RetainerProfile.h new file mode 100644 index 0000000000..7a2f0fb16e --- /dev/null +++ b/ghc/rts/RetainerProfile.h @@ -0,0 +1,29 @@ +/* ----------------------------------------------------------------------------- + * $Id: RetainerProfile.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $ + * + * (c) The GHC Team, 2001 + * Author: Sungwoo Park + * + * Retainer profiling interface. + * + * ---------------------------------------------------------------------------*/ + +#ifndef RETAINERPROFILE_H +#define RETAINERPROFILE_H + +#ifdef PROFILING + +extern void initRetainerProfiling ( void ); +extern void endRetainerProfiling ( void ); +extern void printRetainer ( FILE *, retainer ); +extern void retainerProfile ( void ); +extern void resetStaticObjectForRetainerProfiling ( void ); + +// firstStack is exported because memInventory() in Schedule.c uses it. +#ifdef DEBUG +extern bdescr *firstStack; +#endif + +#endif /* PROFILING */ + +#endif /* RETAINERPROFILE_H */ diff --git a/ghc/rts/RetainerSet.c b/ghc/rts/RetainerSet.c new file mode 100644 index 0000000000..709555a82e --- /dev/null +++ b/ghc/rts/RetainerSet.c @@ -0,0 +1,587 @@ +/* ----------------------------------------------------------------------------- + * $Id: RetainerSet.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $ + * + * (c) The GHC Team, 2001 + * Author: Sungwoo Park + * + * Retainer set implementation for retainer profiling (see RetainerProfile.c) + * + * ---------------------------------------------------------------------------*/ + +#ifdef PROFILING + +#include "Rts.h" +#include "Stats.h" +#include "RtsUtils.h" +#include "RetainerSet.h" +#include "Arena.h" +#include "Profiling.h" + +#include <string.h> + +#define HASH_TABLE_SIZE 255 +#define hash(hk) (hk % HASH_TABLE_SIZE) +static RetainerSet *hashTable[HASH_TABLE_SIZE]; + +static Arena *arena; // arena in which we store retainer sets + +static int nextId; // id of next retainer set + +/* ----------------------------------------------------------------------------- + * rs_MANY is a distinguished retainer set, such that + * + * isMember(e, rs_MANY) = True + * + * addElement(e, rs) = rs_MANY, if rs->num >= maxRetainerSetSize + * addElement(e, rs_MANY) = rs_MANY + * + * The point of rs_MANY is to keep the total number of retainer sets + * from growing too large. + * -------------------------------------------------------------------------- */ +RetainerSet rs_MANY = { + num : 0, + cost : 0, + hashKey : 0, + link : NULL, + id : 1, + element : {} +}; + +nat maxRetainerSetSize = 16; + +/* ----------------------------------------------------------------------------- + * calculate the size of a RetainerSet structure + * -------------------------------------------------------------------------- */ +static inline size_t +sizeofRetainerSet( int elems ) +{ + return (sizeof(RetainerSet) + elems * sizeof(retainer)); +} + +/* ----------------------------------------------------------------------------- + * Creates the first pool and initializes hashTable[]. + * Frees all pools if any. + * -------------------------------------------------------------------------- */ +void +initializeAllRetainerSet(void) +{ + int i; + + arena = newArena(); + + for (i = 0; i < HASH_TABLE_SIZE; i++) + hashTable[i] = NULL; + nextId = 2; // Initial value must be positive, 2 is MANY. +} + +/* ----------------------------------------------------------------------------- + * Refreshes all pools for reuse and initializes hashTable[]. + * -------------------------------------------------------------------------- */ +void +refreshAllRetainerSet(void) +{ + int i; + + // Choose one of the following two approaches. + +#ifdef FIRST_APPROACH + // first approach: completely refresh + arenaFree(arena); + arena = newArena(); + + for (i = 0; i < HASH_TABLE_SIZE; i++) + hashTable[i] = NULL; + nextId = 2; +#endif // FIRST_APPROACH + +#ifdef SECOND_APPROACH + // second approach: leave all the retainer sets for reuse + RetainerSet *rs; + for (i = 0;i < HASH_TABLE_SIZE; i++) { + rs = hashTable[i]; + while (rs != NULL) { + rs->cost = 0; + rs = rs->link; + } + } + rs_MANY.cost = 0; +#endif // SECOND_APPROACH +} + +/* ----------------------------------------------------------------------------- + * Frees all pools. + * -------------------------------------------------------------------------- */ +void +closeAllRetainerSet(void) +{ + arenaFree(arena); +} + +/* ----------------------------------------------------------------------------- + * Finds or creates if needed a singleton retainer set. + * -------------------------------------------------------------------------- */ +RetainerSet * +singleton(retainer r) +{ + RetainerSet *rs; + StgWord hk; + + hk = hashKeySingleton(r); + for (rs = hashTable[hash(hk)]; rs != NULL; rs = rs->link) + if (rs->num == 1 && rs->element[0] == r) return rs; // found it + + // create it + rs = arenaAlloc( arena, sizeofRetainerSet(1) ); + rs->num = 1; + rs->cost = 0; + rs->hashKey = hk; + rs->link = hashTable[hash(hk)]; + rs->id = nextId++; + rs->element[0] = r; + + // The new retainer set is placed at the head of the linked list. + hashTable[hash(hk)] = rs; + + return rs; +} + +/* ----------------------------------------------------------------------------- + * Finds or creates a retainer set *rs augmented with r. + * Invariants: + * r is not a member of rs, i.e., isMember(r, rs) returns rtsFalse. + * rs is not NULL. + * Note: + * We could check if rs is NULL, in which case this function call + * reverts to singleton(). We do not choose this strategy because + * in most cases addElement() is invoked with non-NULL rs. + * -------------------------------------------------------------------------- */ +RetainerSet * +addElement(retainer r, RetainerSet *rs) +{ + nat i; + nat nl; // Number of retainers in *rs Less than r + RetainerSet *nrs; // New Retainer Set + StgWord hk; // Hash Key + +#ifdef DEBUG_RETAINER + // fprintf(stderr, "addElement(%p, %p) = ", r, rs); +#endif + + ASSERT(rs != NULL); + ASSERT(rs->num <= maxRetainerSetSize); + + if (rs == &rs_MANY || rs->num == maxRetainerSetSize) { + return &rs_MANY; + } + + ASSERT(!isMember(r, rs)); + + for (nl = 0; nl < rs->num; nl++) + if (r < rs->element[nl]) break; + // Now nl is the index for r into the new set. + // Also it denotes the number of retainers less than r in *rs. + // Thus, compare the first nl retainers, then r itself, and finally the + // remaining (rs->num - nl) retainers. + + hk = hashKeyAddElement(r, rs); + for (nrs = hashTable[hash(hk)]; nrs != NULL; nrs = nrs->link) { + // test *rs and *nrs for equality + + // check their size + if (rs->num + 1 != nrs->num) continue; + + // compare the first nl retainers and find the first non-matching one. + for (i = 0; i < nl; i++) + if (rs->element[i] != nrs->element[i]) break; + if (i < nl) continue; + + // compare r itself + if (r != nrs->element[i]) continue; // i == nl + + // compare the remaining retainers + for (; i < rs->num; i++) + if (rs->element[i] != nrs->element[i + 1]) break; + if (i < rs->num) continue; + +#ifdef DEBUG_RETAINER + // fprintf(stderr, "%p\n", nrs); +#endif + // The set we are seeking already exists! + return nrs; + } + + // create a new retainer set + nrs = arenaAlloc( arena, sizeofRetainerSet(rs->num + 1) ); + nrs->num = rs->num + 1; + nrs->cost = 0; + nrs->hashKey = hk; + nrs->link = hashTable[hash(hk)]; + nrs->id = nextId++; + for (i = 0; i < nl; i++) { // copy the first nl retainers + nrs->element[i] = rs->element[i]; + } + nrs->element[i] = r; // copy r + for (; i < rs->num; i++) { // copy the remaining retainers + nrs->element[i + 1] = rs->element[i]; + } + + hashTable[hash(hk)] = nrs; + +#ifdef DEBUG_RETAINER + // fprintf(stderr, "%p\n", nrs); +#endif + return nrs; +} + +/* ----------------------------------------------------------------------------- + * Call f() for each retainer set. + * -------------------------------------------------------------------------- */ +void +traverseAllRetainerSet(void (*f)(RetainerSet *)) +{ + int i; + RetainerSet *rs; + + (*f)(&rs_MANY); + for (i = 0; i < HASH_TABLE_SIZE; i++) + for (rs = hashTable[i]; rs != NULL; rs = rs->link) + (*f)(rs); +} + + +/* ----------------------------------------------------------------------------- + * printRetainer() prints the full information on a given retainer, + * not a retainer set. + * -------------------------------------------------------------------------- */ +#if defined(RETAINER_SCHEME_INFO) +// Retainer scheme 1: retainer = info table +void +printRetainer(FILE *f, retainer itbl) +{ + fprintf(f, "%s[%s]", itbl->prof.closure_desc, itbl->prof.closure_type); +} +#elif defined(RETAINER_SCHEME_CCS) +// Retainer scheme 2: retainer = cost centre stack +void +printRetainer(FILE *f, retainer ccs) +{ + fprintCCS(f, ccs); +} +#elif defined(RETAINER_SCHEME_CC) +// Retainer scheme 3: retainer = cost centre +void +printRetainer(FILE *f, retainer cc) +{ + fprintf(f,"%s.%s", cc->module, cc->label); +} +#endif + +/* ----------------------------------------------------------------------------- + * printRetainerSetShort() should always display the same output for + * a given retainer set regardless of the time of invocation. + * -------------------------------------------------------------------------- */ +#ifdef SECOND_APPROACH +#if defined(RETAINER_SCHEME_INFO) +// Retainer scheme 1: retainer = info table +void +printRetainerSetShort(FILE *f, RetainerSet *rs) +{ +#define MAX_RETAINER_SET_SPACE 24 + char tmp[MAX_RETAINER_SET_SPACE + 1]; + int size; + nat j; + + ASSERT(rs->id < 0); + + tmp[MAX_RETAINER_SET_SPACE] = '\0'; + + // No blank characters are allowed. + sprintf(tmp + 0, "(%d)", -(rs->id)); + size = strlen(tmp); + ASSERT(size < MAX_RETAINER_SET_SPACE); + + for (j = 0; j < rs->num; j++) { + if (j < rs->num - 1) { + strncpy(tmp + size, rs->element[j]->prof.closure_desc, MAX_RETAINER_SET_SPACE - size); + size = strlen(tmp); + if (size == MAX_RETAINER_SET_SPACE) + break; + strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size); + size = strlen(tmp); + if (size == MAX_RETAINER_SET_SPACE) + break; + } + else { + strncpy(tmp + size, rs->element[j]->prof.closure_desc, MAX_RETAINER_SET_SPACE - size); + // size = strlen(tmp); + } + } + fprintf(f, tmp); +} +#elif defined(RETAINER_SCHEME_CC) +// Retainer scheme 3: retainer = cost centre +void +printRetainerSetShort(FILE *f, RetainerSet *rs) +{ +#define MAX_RETAINER_SET_SPACE 24 + char tmp[MAX_RETAINER_SET_SPACE + 1]; + int size; + nat j; + +} +#elif defined(RETAINER_SCHEME_CCS) +// Retainer scheme 2: retainer = cost centre stack +void +printRetainerSetShort(FILE *f, RetainerSet *rs) +{ +#define MAX_RETAINER_SET_SPACE 24 + char tmp[MAX_RETAINER_SET_SPACE + 1]; + int size; + nat j; + + ASSERT(rs->id < 0); + + tmp[MAX_RETAINER_SET_SPACE] = '\0'; + + // No blank characters are allowed. + sprintf(tmp + 0, "(%d)", -(rs->id)); + size = strlen(tmp); + ASSERT(size < MAX_RETAINER_SET_SPACE); + + for (j = 0; j < rs->num; j++) { + if (j < rs->num - 1) { + strncpy(tmp + size, rs->element[j]->cc->label, MAX_RETAINER_SET_SPACE - size); + size = strlen(tmp); + if (size == MAX_RETAINER_SET_SPACE) + break; + strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size); + size = strlen(tmp); + if (size == MAX_RETAINER_SET_SPACE) + break; + } + else { + strncpy(tmp + size, rs->element[j]->cc->label, MAX_RETAINER_SET_SPACE - size); + // size = strlen(tmp); + } + } + fprintf(f, tmp); +} +#elif defined(RETAINER_SCHEME_CC) +// Retainer scheme 3: retainer = cost centre +static void +printRetainerSetShort(FILE *f, retainerSet *rs) +{ +#define MAX_RETAINER_SET_SPACE 24 + char tmp[MAX_RETAINER_SET_SPACE + 1]; + int size; + nat j; + + ASSERT(rs->id < 0); + + tmp[MAX_RETAINER_SET_SPACE] = '\0'; + + // No blank characters are allowed. + sprintf(tmp + 0, "(%d)", -(rs->id)); + size = strlen(tmp); + ASSERT(size < MAX_RETAINER_SET_SPACE); + + for (j = 0; j < rs->num; j++) { + if (j < rs->num - 1) { + strncpy(tmp + size, rs->element[j]->label, + MAX_RETAINER_SET_SPACE - size); + size = strlen(tmp); + if (size == MAX_RETAINER_SET_SPACE) + break; + strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size); + size = strlen(tmp); + if (size == MAX_RETAINER_SET_SPACE) + break; + } + else { + strncpy(tmp + size, rs->element[j]->label, + MAX_RETAINER_SET_SPACE - size); + // size = strlen(tmp); + } + } + fprintf(f, tmp); +/* + #define MAX_RETAINER_SET_SPACE 24 + #define DOT_NUMBER 3 + // 1. 32 > MAX_RETAINER_SET_SPACE + 1 (1 for '\0') + // 2. (MAX_RETAINER_SET_SPACE - DOT_NUMBER ) characters should be enough for + // printing one natural number (plus '(' and ')'). + char tmp[32]; + int size, ts; + nat j; + + ASSERT(rs->id < 0); + + // No blank characters are allowed. + sprintf(tmp + 0, "(%d)", -(rs->id)); + size = strlen(tmp); + ASSERT(size < MAX_RETAINER_SET_SPACE - DOT_NUMBER); + + for (j = 0; j < rs->num; j++) { + ts = strlen(rs->element[j]->label); + if (j < rs->num - 1) { + if (size + ts + 1 > MAX_RETAINER_SET_SPACE - DOT_NUMBER) { + sprintf(tmp + size, "..."); + break; + } + sprintf(tmp + size, "%s,", rs->element[j]->label); + size += ts + 1; + } + else { + if (size + ts > MAX_RETAINER_SET_SPACE - DOT_NUMBER) { + sprintf(tmp + size, "..."); + break; + } + sprintf(tmp + size, "%s", rs->element[j]->label); + size += ts; + } + } + fprintf(f, tmp); +*/ +} +#endif /* RETAINER_SCHEME_CC */ +#endif /* SECOND_APPROACH */ + +/* ----------------------------------------------------------------------------- + * Print the statistics. This function is called after each + * retainer profiling. *allCost is set the sum of all costs retained + * by any retainer sets. *numSet is set to the number of all + * retainer sets (including those with 0 cost). + * -------------------------------------------------------------------------- */ +void +outputRetainerSet( FILE *hp_file, nat *allCost, nat *numSet ) +{ + nat i; +#ifdef FIRST_APPROACH + nat j; +#endif + RetainerSet *rs; + double duration; + + *allCost = 0; + *numSet = 0; + duration = mut_user_time_during_RP(); + + fprintf(hp_file, "MARK %f\n", duration); + fprintf(hp_file, "BEGIN_SAMPLE %f\n", duration); + + if (rs_MANY.cost > 0) { + fprintf(hp_file, "MANY\t%u\n", rs_MANY.cost * sizeof(StgWord)); + } + + for (i = 0; i < HASH_TABLE_SIZE; i++) { + for (rs = hashTable[i]; rs != NULL; rs = rs->link) { + (*numSet)++; + /* + Note: If rs->cost is 0, it means that there exists at + least one object which is retained by this retainer set + *rs temporarily. Since its new retainer set of this + object (replacing *rs) is at least larger than *rs, if + the cost of every object was a positive quantity, the + following invariants would hold: If rs->cost == 0, there + exists a retainer set rs' such that rs'->cost > 0 and + rs'->num > rs->num. However, static objects cost zero, + this does not hold. If we set the cost of each static + object to a positive quantity, it should hold, which is + actually the case. + */ + if (rs->cost == 0) + continue; + + *allCost += rs->cost; + +#ifdef SECOND_APPROACH + if (rs->id > 0) // if having a positive cost for the first time? + rs->id = -(rs->id); // mark as having a positive cost + // Now, this retainer set has a permanent negative id. + + // report in the unit of bytes: * sizeof(StgWord) + printRetainerSetShort(hp_file, rs); + fprintf(hp_file, "\t%u\n", rs->cost * sizeof(StgWord)); +#endif + +#ifdef FIRST_APPROACH + fprintf(hp_file, "{"); + for (j = 0; j < rs->num - 1; j++) { + printRetainer(hp_file, rs->element[j]); + fprintf(hp_file, ","); + } + printRetainer(hp_file, rs->element[j]); + fprintf(hp_file, "}\t%u\n", rs->cost * sizeof(StgWord)); +#endif + } + } + fprintf(hp_file, "END_SAMPLE %f\n", duration); +} + +/* + This function is called at the exit of the program. + */ +#ifdef SECOND_APPROACH +void +outputAllRetainerSet(FILE *prof_file) +{ + nat i, j; + nat numSet; + RetainerSet *rs, **rsArray, *tmp; + + // find out the number of retainer sets which have had a non-zero cost at + // least once during retainer profiling + numSet = 0; + for (i = 0; i < HASH_TABLE_SIZE; i++) + for (rs = hashTable[i]; rs != NULL; rs = rs->link) { + if (rs->id < 0) + numSet++; + } + + if (numSet == 0) // retainer profiling was not done at all. + return; + + // allocate memory + rsArray = stgMallocBytes(numSet * sizeof(RetainerSet *), + "outputAllRetainerSet()"); + + // prepare for sorting + j = 0; + for (i = 0; i < HASH_TABLE_SIZE; i++) + for (rs = hashTable[i]; rs != NULL; rs = rs->link) { + if (rs->id < 0) { + rsArray[j] = rs; + j++; + } + } + + ASSERT(j == numSet); + + // sort rsArray[] according to the id of each retainer set + for (i = numSet - 1; i > 0; i--) { + for (j = 0; j <= i - 1; j++) { + // if (-(rsArray[j]->id) < -(rsArray[j + 1]->id)) + if (rsArray[j]->id < rsArray[j + 1]->id) { + tmp = rsArray[j]; + rsArray[j] = rsArray[j + 1]; + rsArray[j + 1] = tmp; + } + } + } + + fprintf(prof_file, "\nRetainer sets created during profiling:\n"); + for (i = 0;i < numSet; i++) { + fprintf(prof_file, "SET %u = {", -(rsArray[i]->id)); + for (j = 0; j < rsArray[i]->num - 1; j++) { + printRetainer(prof_file, rsArray[i]->element[j]); + fprintf(prof_file, ", "); + } + printRetainer(prof_file, rsArray[i]->element[j]); + fprintf(prof_file, "}\n"); + } + + free(rsArray); +} +#endif // SECOND_APPROACH + +#endif /* PROFILING */ diff --git a/ghc/rts/RetainerSet.h b/ghc/rts/RetainerSet.h new file mode 100644 index 0000000000..feed43e74a --- /dev/null +++ b/ghc/rts/RetainerSet.h @@ -0,0 +1,139 @@ +/* ----------------------------------------------------------------------------- + * $Id: RetainerSet.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $ + * + * (c) The GHC Team, 2001 + * Author: Sungwoo Park + * + * Retainer set interface for retainer profiling. + * + * ---------------------------------------------------------------------------*/ + +#ifdef PROFILING + +/* + Note: + There are two ways of maintaining all retainer sets. The first is simply by + freeing all the retainer sets and re-initialize the hash table at each + retainer profiling. The second is by setting the cost field of each + retainer set. The second is preferred to the first if most retainer sets + are likely to be observed again during the next retainer profiling. Note + that in the first approach, we do not free the memory allocated for + retainer sets; we just invalidate all retainer sets. + */ +#ifdef DEBUG_RETAINER +// In thise case, FIRST_APPROACH must be turned on because the memory pool +// for retainer sets is freed each time. +#define FIRST_APPROACH +#else +// #define FIRST_APPROACH +#define SECOND_APPROACH +#endif + +// Creates the first pool and initializes a hash table. Frees all pools if any. +void initializeAllRetainerSet(void); + +// Refreshes all pools for reuse and initializes a hash table. +void refreshAllRetainerSet(void); + +// Frees all pools. +void closeAllRetainerSet(void); + +// Finds or creates if needed a singleton retainer set. +RetainerSet *singleton(retainer r); + +extern RetainerSet rs_MANY; + +// Checks if a given retainer is a memeber of the retainer set. +// +// Note & (maybe) Todo: +// This function needs to be declared as an inline function, so it is declared +// as an inline static function here. +// This make the interface really bad, but isMember() returns a value, so +// it is not easy either to write it as a macro (due to my lack of C +// programming experience). Sungwoo +// +// rtsBool isMember(retainer, retainerSet *); +/* + Returns rtsTrue if r is a member of *rs. + Invariants: + rs is not NULL. + Note: + The efficiency of this function is subject to the typical size of + retainer sets. If it is small, linear scan is better. If it + is large in most cases, binary scan is better. + The current implementation mixes the two search strategies. + */ + +#define BINARY_SEARCH_THRESHOLD 8 +static inline rtsBool +isMember(retainer r, RetainerSet *rs) +{ + int i, left, right; // must be int, not nat (because -1 can appear) + retainer ri; + + if (rs == &rs_MANY) { return rtsTrue; } + + if (rs->num < BINARY_SEARCH_THRESHOLD) { + for (i = 0; i < (int)rs->num; i++) { + ri = rs->element[i]; + if (r == ri) return rtsTrue; + else if (r < ri) return rtsFalse; + } + } else { + left = 0; + right = rs->num - 1; + while (left <= right) { + i = (left + right) / 2; + ri = rs->element[i]; + if (r == ri) return rtsTrue; + else if (r < ri) right = i - 1; + else left = i + 1; + } + } + return rtsFalse; +} + +// Finds or creates a retainer set augmented with a new retainer. +RetainerSet *addElement(retainer, RetainerSet *); + +// Call f() for each retainer set. +void traverseAllRetainerSet(void (*f)(RetainerSet *)); + +#ifdef SECOND_APPROACH +// Prints a single retainer set. +void printRetainerSetShort(FILE *, RetainerSet *); +#endif + +// Print the statistics on all the retainer sets. +// store the sum of all costs and the number of all retainer sets. +void outputRetainerSet(FILE *, nat *, nat *); + +#ifdef SECOND_APPROACH +// Print all retainer sets at the exit of the program. +void outputAllRetainerSet(FILE *); +#endif + +// Hashing functions +/* + Invariants: + Once either initializeAllRetainerSet() or refreshAllRetainerSet() + is called, there exists only one copy of any retainer set created + through singleton() and addElement(). The pool (the storage for + retainer sets) is consumed linearly. All the retainer sets of the + same hash function value are linked together from an element in + hashTable[]. See the invariants of allocateInPool() for the + maximum size of retainer sets. The hashing function is defined by + hashKeySingleton() and hashKeyAddElement(). The hash key for a set + must be unique regardless of the order its elements are inserted, + i.e., the hashing function must be additive(?). +*/ +#define hashKeySingleton(r) ((StgWord)(r)) +#define hashKeyAddElement(r, s) (hashKeySingleton((r)) + (s)->hashKey) + +// Prints the full information on a given retainer. +// Note: This function is not part of retainerSet interface, but this is +// the best place to define it. +void printRetainer(FILE *, retainer); + +#endif /* PROFILING */ + diff --git a/ghc/rts/RtsFlags.c b/ghc/rts/RtsFlags.c index d070c130fe..e23346b7b3 100644 --- a/ghc/rts/RtsFlags.c +++ b/ghc/rts/RtsFlags.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: RtsFlags.c,v 1.51 2001/10/01 11:36:28 simonmar Exp $ + * $Id: RtsFlags.c,v 1.52 2001/11/22 14:25:12 simonmar Exp $ * * (c) The AQUA Project, Glasgow University, 1994-1997 * (c) The GHC Team, 1998-1999 @@ -250,12 +250,13 @@ void initRtsFlagsDefaults(void) #ifdef PROFILING RtsFlags.ProfFlags.doHeapProfile = rtsFalse; - RtsFlags.ProfFlags.profileFrequency = 20; + RtsFlags.ProfFlags.profileInterval = 20; RtsFlags.ProfFlags.showCCSOnException = rtsFalse; RtsFlags.ProfFlags.modSelector = NULL; RtsFlags.ProfFlags.descrSelector = NULL; RtsFlags.ProfFlags.typeSelector = NULL; RtsFlags.ProfFlags.ccSelector = NULL; + #elif defined(DEBUG) RtsFlags.ProfFlags.doHeapProfile = rtsFalse; #endif @@ -417,6 +418,8 @@ usage_text[] = { " -h<break-down> Heap residency profile (text) (output file <program>.prof)", " break-down: C = cost centre stack (default), M = module", " D = closure description, Y = type description", +" -hR Retainer profile (output files <program>.hp)", +" -hL Lag/Drag/Void/Use profile (output files <program>.hp)", " A subset of closures may be selected thusly:", " -hc{cc, cc ...} specific cost centre(s) (NOT STACKS!)", " -hm{mod,mod...} all cost centres from the specified modules(s)", @@ -838,18 +841,53 @@ error = rtsTrue; PROFILING_BUILD_ONLY( switch (rts_argv[arg][2]) { case '\0': - case CCchar: - RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS; - break; - case MODchar: - RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD; - break; - case DESCRchar: - RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR; - break; - case TYPEchar: - RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE; - break; + case 'C': + if (RtsFlags.ProfFlags.doHeapProfile == 0) { + RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS; + break; + } else { + goto many_hps; + } + case 'M': + if (RtsFlags.ProfFlags.doHeapProfile == 0) { + RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD; + break; + } else { + goto many_hps; + } + case 'D': + if (RtsFlags.ProfFlags.doHeapProfile == 0) { + RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR; + break; + } else { + goto many_hps; + } + case 'Y': + if (RtsFlags.ProfFlags.doHeapProfile == 0) { + RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE; + break; + } else { + goto many_hps; + } + case 'R': + if (RtsFlags.ProfFlags.doHeapProfile == 0) { + RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_RETAINER; + break; + } else { + goto many_hps; + } + case 'L': + if (RtsFlags.ProfFlags.doHeapProfile == 0) { + RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_LDV; + break; + } else { + goto many_hps; + } + many_hps: + prog_belch("multiple heap profile options"); + error = rtsTrue; + break; + case 'c': /* cost centre label select */ case 'm': /* cost centre module select */ case 'd': /* closure descr select */ @@ -904,11 +942,9 @@ error = rtsTrue; if (cst != 0 && cst < CS_MIN_MILLISECS) cst = CS_MIN_MILLISECS; - RtsFlags.ProfFlags.profileFrequency = cst; + RtsFlags.ProfFlags.profileInterval = cst; } - break; - #endif /* =========== CONCURRENT ========================= */ diff --git a/ghc/rts/RtsStartup.c b/ghc/rts/RtsStartup.c index 87c804fd17..69de672294 100644 --- a/ghc/rts/RtsStartup.c +++ b/ghc/rts/RtsStartup.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: RtsStartup.c,v 1.55 2001/11/08 12:46:31 simonmar Exp $ + * $Id: RtsStartup.c,v 1.56 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -33,6 +33,7 @@ #if defined(PROFILING) || defined(DEBUG) # include "Profiling.h" # include "ProfHeap.h" +# include "RetainerProfile.h" #endif #if defined(GRAN) @@ -84,7 +85,7 @@ getProgArgv(int *argc, char **argv[]) void startupHaskell(int argc, char *argv[], void (*init_root)(void)) { - /* To avoid repeated initialisations of the RTS */ + /* To avoid repeated initialisations of the RTS */ if (rts_has_started_up) return; else @@ -218,13 +219,14 @@ startupHaskell(int argc, char *argv[], void (*init_root)(void)) /* The init functions use an explicit stack... */ -#define INIT_STACK_SIZE (BLOCK_SIZE * 4) +#define INIT_STACK_BLOCKS 4 F_ *init_stack = NULL; nat init_sp = 0; static void initModules ( void (*init_root)(void) ) { + bdescr *bd; #ifdef SMP Capability cap; #else @@ -232,7 +234,8 @@ initModules ( void (*init_root)(void) ) #endif init_sp = 0; - init_stack = (F_ *)allocate(INIT_STACK_SIZE / sizeof(W_)); + bd = allocGroup(4); + init_stack = (F_ *)bd->start; init_stack[init_sp++] = (F_)stg_init_ret; init_stack[init_sp++] = (F_)__stginit_Prelude; if (init_root != NULL) { @@ -241,6 +244,8 @@ initModules ( void (*init_root)(void) ) cap.r.rSp = (P_)(init_stack + init_sp); StgRun((StgFunPtr)stg_init, &cap.r); + + freeGroup(bd); } /* ----------------------------------------------------------------------------- @@ -272,6 +277,26 @@ shutdownHaskell(void) /* start timing the shutdown */ stat_startExit(); +#ifdef PROFILING + // @LDV profiling + // + // Note: + // We do not need to perform a major garbage collection because all the + // closures created since the last census will not affect the profiling + // statistics anyhow. + // + // Note: + // We ignore any object created afterwards. + // finalizeWeakPointersNow() may corrupt the heap (because it executes + // rts_evalIO(), which adds an initial evaluation stack again). + // Thus, we call LdvCensusKillAll() here, and prohibit LDV profiling + // afterwards. + // Acutally, it is pointless to call LdvCensusKillAll() any later because + // no object created later will be taken into account for profiling. + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) + LdvCensusKillAll(); +#endif + #if !defined(GRAN) /* Finalize any remaining weak pointers */ finalizeWeakPointersNow(); @@ -316,12 +341,19 @@ shutdownHaskell(void) } #endif +#if defined(PROFILING) + report_ccs_profiling(); +#endif + #if defined(PROFILING) || defined(DEBUG) endProfiling(); #endif -#if defined(PROFILING) - report_ccs_profiling(); +#ifdef PROFILING + // Originally, this was in report_ccs_profiling(). Now, retainer + // profiling might tack some extra stuff on to the end of this file + // during endProfiling(). + fclose(prof_file); #endif #if defined(TICKY_TICKY) diff --git a/ghc/rts/Schedule.c b/ghc/rts/Schedule.c index ec0ac22b9f..256aab93a2 100644 --- a/ghc/rts/Schedule.c +++ b/ghc/rts/Schedule.c @@ -1,5 +1,5 @@ /* --------------------------------------------------------------------------- - * $Id: Schedule.c,v 1.106 2001/11/08 16:17:35 simonmar Exp $ + * $Id: Schedule.c,v 1.107 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -95,6 +95,12 @@ #include "Stats.h" #include "Itimer.h" #include "Prelude.h" +#ifdef PROFILING +#include "Proftimer.h" +#include "ProfHeap.h" +#include "RetainerProfile.h" +#include "LdvProfile.h" +#endif #if defined(GRAN) || defined(PAR) # include "GranSimRts.h" # include "GranSim.h" @@ -181,7 +187,6 @@ StgTSO *all_threads; */ static StgTSO *suspended_ccalling_threads; -static void GetRoots(evac_fn); static StgTSO *threadStackOverflow(StgTSO *tso); /* KH: The following two flags are shared memory locations. There is no need @@ -923,10 +928,14 @@ schedule( void ) * the user specified "context switch as often as possible", with * +RTS -C0 */ - if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0 - && (run_queue_hd != END_TSO_QUEUE - || blocked_queue_hd != END_TSO_QUEUE - || sleeping_queue != END_TSO_QUEUE)) + if ( +#ifdef PROFILING + RtsFlags.ProfFlags.profileInterval == 0 || +#endif + (RtsFlags.ConcFlags.ctxtSwitchTicks == 0 + && (run_queue_hd != END_TSO_QUEUE + || blocked_queue_hd != END_TSO_QUEUE + || sleeping_queue != END_TSO_QUEUE))) context_switch = 1; else context_switch = 0; @@ -936,6 +945,10 @@ schedule( void ) IF_DEBUG(scheduler, sched_belch("-->> Running TSO %ld (%p) %s ...", t->id, t, whatNext_strs[t->what_next])); +#ifdef PROFILING + startHeapProfTimer(); +#endif + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ /* Run the current thread */ @@ -961,6 +974,7 @@ schedule( void ) /* Costs for the scheduler are assigned to CCS_SYSTEM */ #ifdef PROFILING + stopHeapProfTimer(); CCCS = CCS_SYSTEM; #endif @@ -1262,6 +1276,39 @@ schedule( void ) n_free_capabilities++; #endif +#ifdef PROFILING + if (RtsFlags.ProfFlags.profileInterval==0 || performHeapProfile) { + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { + // + // Note: currently retainer profiling is performed after + // a major garbage collection. + // + GarbageCollect(GetRoots, rtsTrue); + retainerProfile(); + } else if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) { + // + // We have LdvCensus() preceded by a major garbage + // collection because we don't want *genuinely* dead + // closures to be involved in LDV profiling. Another good + // reason is to produce consistent profiling results + // regardless of the interval at which GCs are performed. + // In other words, we want LDV profiling results to be + // completely independent of the GC interval. + // + GarbageCollect(GetRoots, rtsTrue); + LdvCensus(); + } else { + // + // Normal creator-based heap profile + // + GarbageCollect(GetRoots, rtsTrue); + heapCensus(); + } + performHeapProfile = rtsFalse; + ready_to_gc = rtsFalse; // we already GC'd + } +#endif + #ifdef SMP if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes) #else @@ -2170,7 +2217,7 @@ take_off_run_queue(StgTSO *tso) { KH @ 25/10/99 */ -static void +void GetRoots(evac_fn evac) { StgMainThread *m; diff --git a/ghc/rts/Schedule.h b/ghc/rts/Schedule.h index f8976ebb00..b81087d9a2 100644 --- a/ghc/rts/Schedule.h +++ b/ghc/rts/Schedule.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Schedule.h,v 1.24 2001/11/13 13:38:02 simonmar Exp $ + * $Id: Schedule.h,v 1.25 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team 1998-1999 * @@ -93,6 +93,15 @@ void awaitEvent(rtsBool wait); /* In Select.c */ */ rtsBool wakeUpSleepingThreads(nat); /* In Select.c */ +/* GetRoots(evac_fn f) + * + * Call f() for each root known to the scheduler. + * + * Called from STG : NO + * Locks assumed : ???? + */ +void GetRoots(evac_fn); + // ToDo: check whether all fcts below are used in the SMP version, too //@cindex awaken_blocked_queue #if defined(GRAN) diff --git a/ghc/rts/Stats.c b/ghc/rts/Stats.c index 1ebd5a7bf7..a82f395094 100644 --- a/ghc/rts/Stats.c +++ b/ghc/rts/Stats.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Stats.c,v 1.35 2001/11/20 21:39:12 sof Exp $ + * $Id: Stats.c,v 1.36 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -19,6 +19,7 @@ #include "Schedule.h" #include "Stats.h" #include "ParTicky.h" // ToDo: move into Rts.h +#include "Profiling.h" #ifdef HAVE_UNISTD_H #include <unistd.h> @@ -93,8 +94,14 @@ static TICK_TYPE ExitElapsedTime = 0; static ullong GC_tot_alloc = 0; static ullong GC_tot_copied = 0; -static TICK_TYPE GC_start_time, GC_tot_time = 0; /* User GC Time */ -static TICK_TYPE GCe_start_time, GCe_tot_time = 0; /* Elapsed GC time */ +static TICK_TYPE GC_start_time, GC_tot_time = 0; // User GC Time +static TICK_TYPE GCe_start_time, GCe_tot_time = 0; // Elapsed GC time + +static TICK_TYPE RP_start_time, RP_tot_time = 0; // retainer prof user time +static TICK_TYPE RPe_start_time, RPe_tot_time = 0; // retainer prof elap time + +static TICK_TYPE LDV_start_time, LDV_tot_time = 0; // LDV prof user time +static TICK_TYPE LDVe_start_time, LDVe_tot_time = 0; // LDV prof elap time lnat MaxResidency = 0; /* in words; for stats only */ lnat AvgResidency = 0; @@ -210,16 +217,33 @@ getTimes(void) * stat_startGC() for details) */ double -mut_user_time_during_GC(void) +mut_user_time_during_GC( void ) { - return TICK_TO_DBL(GC_start_time - GC_tot_time); + return TICK_TO_DBL(GC_start_time - GC_tot_time - RP_tot_time - LDV_tot_time); } double -mut_user_time(void) +mut_user_time( void ) { getTimes(); - return TICK_TO_DBL(CurrentUserTime - GC_tot_time); + return TICK_TO_DBL(CurrentUserTime - GC_tot_time - RP_tot_time - LDV_tot_time); +} + +/* + mut_user_time_during_RP() is similar to mut_user_time_during_GC(); + it returns the MUT time during retainer profiling. + The same is for mut_user_time_during_LDV(); + */ +double +mut_user_time_during_RP( void ) +{ + return TICK_TO_DBL(RP_start_time - GC_tot_time - RP_tot_time - LDV_tot_time); +} + +double +mut_user_time_during_LDV( void ) +{ + return TICK_TO_DBL(LDV_start_time - GC_tot_time - RP_tot_time - LDV_tot_time); } static nat @@ -332,7 +356,7 @@ stat_startExit(void) #ifdef SMP MutUserTime = CurrentUserTime; #else - MutUserTime = CurrentUserTime - GC_tot_time - InitUserTime; + MutUserTime = CurrentUserTime - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime; if (MutUserTime < 0) { MutUserTime = 0; } #endif } @@ -344,7 +368,7 @@ stat_endExit(void) #ifdef SMP ExitUserTime = CurrentUserTime - MutUserTime; #else - ExitUserTime = CurrentUserTime - MutUserTime - GC_tot_time - InitUserTime; + ExitUserTime = CurrentUserTime - MutUserTime - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime; #endif ExitElapsedTime = CurrentElapsedTime - MutElapsedStamp; if (ExitUserTime < 0) { @@ -471,6 +495,64 @@ stat_endGC(lnat alloc, lnat collect, lnat live, lnat copied, lnat gen) } /* ----------------------------------------------------------------------------- + Called at the beginning of each Retainer Profiliing + -------------------------------------------------------------------------- */ +void stat_startRP(void) +{ + getTimes(); + RP_start_time = CurrentUserTime; + RPe_start_time = CurrentElapsedTime; +} + +/* ----------------------------------------------------------------------------- + Called at the end of each Retainer Profiliing + -------------------------------------------------------------------------- */ +void stat_endRP( + nat retainerGeneration, +#ifdef DEBUG_RETAINER + nat maxCStackSize, + int maxStackSize, +#endif + double averageNumVisit, + nat allCost, + nat numSet) +{ + getTimes(); + RP_tot_time += CurrentUserTime - RP_start_time; + RPe_tot_time += CurrentElapsedTime - RPe_start_time; + + fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n", + retainerGeneration, mut_user_time_during_RP()); +#ifdef DEBUG_RETAINER + fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize); + fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize); +#endif + fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit); + fprintf(prof_file, "\tCurrent total costs in bytes = %u\n", allCost * sizeof(StgWord)); + fprintf(prof_file, "\tNumber of retainer sets = %u\n\n", numSet); +} + +/* ----------------------------------------------------------------------------- + Called at the beginning of each LDV Profiliing + -------------------------------------------------------------------------- */ +void stat_startLDV(void) +{ + getTimes(); + LDV_start_time = CurrentUserTime; + LDVe_start_time = CurrentElapsedTime; +} + +/* ----------------------------------------------------------------------------- + Called at the end of each LDV Profiliing + -------------------------------------------------------------------------- */ +void stat_endLDV(void) +{ + getTimes(); + LDV_tot_time += CurrentUserTime - LDV_start_time; + LDVe_tot_time += CurrentElapsedTime - LDVe_start_time; +} + +/* ----------------------------------------------------------------------------- stat_workerStop Called under SMP when a worker thread finishes. We drop the timing @@ -598,6 +680,14 @@ stat_exit(int alloc) TICK_TO_DBL(MutUserTime), TICK_TO_DBL(MutElapsedTime)); fprintf(sf, " GC time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time)); +#ifdef PROFILING + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) + fprintf(sf, " RP time %6.2fs (%6.2fs elapsed)\n", + TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time)); + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) + fprintf(sf, " LDV time %6.2fs (%6.2fs elapsed)\n", + TICK_TO_DBL(LDV_tot_time), TICK_TO_DBL(LDVe_tot_time)); +#endif fprintf(sf, " EXIT time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(ExitUserTime), TICK_TO_DBL(ExitElapsedTime)); fprintf(sf, " Total time %6.2fs (%6.2fs elapsed)\n\n", @@ -606,20 +696,20 @@ stat_exit(int alloc) TICK_TO_DBL(GC_tot_time)*100/TICK_TO_DBL(time), TICK_TO_DBL(GCe_tot_time)*100/TICK_TO_DBL(etime)); - if (time - GC_tot_time == 0) + if (time - GC_tot_time - RP_tot_time - LDV_tot_time == 0) ullong_format_string(0, temp, rtsTrue/*commas*/); else ullong_format_string( (ullong)((GC_tot_alloc*sizeof(W_))/ - TICK_TO_DBL(time - GC_tot_time)), + TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time)), temp, rtsTrue/*commas*/); fprintf(sf, " Alloc rate %s bytes per MUT second\n\n", temp); fprintf(sf, " Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n", - TICK_TO_DBL(time - GC_tot_time - InitUserTime) * 100 + TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime) * 100 / TICK_TO_DBL(time), - TICK_TO_DBL(time - GC_tot_time - InitUserTime) * 100 + TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime) * 100 / TICK_TO_DBL(etime)); } diff --git a/ghc/rts/Stats.h b/ghc/rts/Stats.h index b5c9826a8b..535bef3a08 100644 --- a/ghc/rts/Stats.h +++ b/ghc/rts/Stats.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Stats.h,v 1.11 2001/07/23 17:23:20 simonmar Exp $ + * $Id: Stats.h,v 1.12 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -14,6 +14,16 @@ extern void stat_startGC(void); extern void stat_endGC(lnat alloc, lnat collect, lnat live, lnat copied, lnat gen); +extern void stat_startRP(void); +extern void stat_endRP(nat, +#ifdef DEBUG_RETAINER + nat, int, +#endif + double, nat, nat); + +extern void stat_startLDV(void); +extern void stat_endLDV(void); + extern void stat_startExit(void); extern void stat_endExit(void); @@ -23,6 +33,11 @@ extern void stat_workerStop(void); extern void initStats(void); extern double mut_user_time_during_GC(void); +#ifdef PROFILING +// @retainer profiling +extern double mut_user_time_during_RP(void); +extern double mut_user_time_during_LDV(void); +#endif extern double mut_user_time(void); extern void statDescribeGens( void ); diff --git a/ghc/rts/StgMiscClosures.hc b/ghc/rts/StgMiscClosures.hc index de36bea1ff..63da5b16d5 100644 --- a/ghc/rts/StgMiscClosures.hc +++ b/ghc/rts/StgMiscClosures.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgMiscClosures.hc,v 1.69 2001/11/08 12:46:31 simonmar Exp $ + * $Id: StgMiscClosures.hc,v 1.70 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -288,19 +288,18 @@ STGFUN(stg_BCO_entry) { Entry code for an indirection. -------------------------------------------------------------------------- */ -INFO_TABLE(stg_IND_info,stg_IND_entry,1,0,IND,,EF_,0,0); +INFO_TABLE(stg_IND_info,stg_IND_entry,1,0,IND,,EF_,"IND","IND"); STGFUN(stg_IND_entry) { FB_ TICK_ENT_IND(Node); /* tick */ - R1.p = (P_) ((StgInd*)R1.p)->indirectee; TICK_ENT_VIA_NODE(); JMP_(ENTRY_CODE(*R1.p)); FE_ } -INFO_TABLE(stg_IND_STATIC_info,stg_IND_STATIC_entry,1,0,IND_STATIC,,EF_,0,0); +INFO_TABLE(stg_IND_STATIC_info,stg_IND_STATIC_entry,1,0,IND_STATIC,,EF_,"IND_STATIC","IND_STATIC"); STGFUN(stg_IND_STATIC_entry) { FB_ @@ -323,6 +322,8 @@ STGFUN(stg_IND_PERM_entry) TICK_ENT_PERM_IND(R1.p); /* tick */ #endif + LDV_ENTER((StgInd *)R1.p); + /* Enter PAP cost centre -- lexical scoping only */ ENTER_CCS_PAP_CL(R1.cl); @@ -353,19 +354,18 @@ STGFUN(stg_IND_PERM_entry) FE_ } -INFO_TABLE(stg_IND_OLDGEN_info,stg_IND_OLDGEN_entry,1,1,IND_OLDGEN,,EF_,0,0); +INFO_TABLE(stg_IND_OLDGEN_info,stg_IND_OLDGEN_entry,1,1,IND_OLDGEN,,EF_,"IND_OLDGEN","IND_OLDGEN"); STGFUN(stg_IND_OLDGEN_entry) { FB_ TICK_ENT_IND(Node); /* tick */ - R1.p = (P_) ((StgInd*)R1.p)->indirectee; TICK_ENT_VIA_NODE(); JMP_(ENTRY_CODE(*R1.p)); FE_ } -INFO_TABLE(stg_IND_OLDGEN_PERM_info,stg_IND_OLDGEN_PERM_entry,1,1,IND_OLDGEN_PERM,,EF_,0,0); +INFO_TABLE(stg_IND_OLDGEN_PERM_info,stg_IND_OLDGEN_PERM_entry,1,1,IND_OLDGEN_PERM,,EF_,"IND_OLDGEN_PERM","IND_OLDGEN_PERM"); STGFUN(stg_IND_OLDGEN_PERM_entry) { FB_ @@ -375,7 +375,9 @@ STGFUN(stg_IND_OLDGEN_PERM_entry) /* TICKY_TICKY && !PROFILING means PERM_IND *replaces* an IND, rather than being extra */ TICK_ENT_PERM_IND(R1.p); /* tick */ #endif - + + LDV_ENTER((StgInd *)R1.p); + /* Enter PAP cost centre -- lexical scoping only */ ENTER_CCS_PAP_CL(R1.cl); @@ -433,7 +435,10 @@ STGFUN(stg_BLACKHOLE_entry) #endif TICK_ENT_BH(); - // Put ourselves on the blocking queue for this black hole + // Actually this is not necessary because R1.p is about to be destroyed. + LDV_ENTER((StgClosure *)R1.p); + + /* Put ourselves on the blocking queue for this black hole */ #if defined(GRAN) || defined(PAR) // in fact, only difference is the type of the end-of-queue marker! CurrentTSO->link = END_BQ_QUEUE; @@ -446,8 +451,19 @@ STGFUN(stg_BLACKHOLE_entry) CurrentTSO->why_blocked = BlockedOnBlackHole; CurrentTSO->block_info.closure = R1.cl; - // Change the CAF_BLACKHOLE into a BLACKHOLE_BQ_STATIC + /* Change the BLACKHOLE into a BLACKHOLE_BQ */ +#ifdef PROFILING + + // The size remains the same, so we call LDV_recordDead() - no need to fill slop. + LDV_recordDead((StgClosure *)R1.p, BLACKHOLE_sizeW()); +#endif + // + // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? + // ((StgBlockingQueue *)R1.p)->header.info = &stg_BLACKHOLE_BQ_info; +#ifdef PROFILING + LDV_recordCreate((StgClosure *)R1.p); +#endif // closure is mutable since something has just been added to its BQ recordMutable((StgMutClosure *)R1.cl); @@ -483,6 +499,7 @@ STGFUN(stg_BLACKHOLE_BQ_entry) #endif TICK_ENT_BH(); + LDV_ENTER((StgClosure *)R1.p); /* Put ourselves on the blocking queue for this black hole */ CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue; @@ -515,7 +532,7 @@ STGFUN(stg_BLACKHOLE_BQ_entry) #if defined(PAR) || defined(GRAN) -INFO_TABLE(stg_RBH_info, stg_RBH_entry,1,1,RBH,,EF_,0,0); +INFO_TABLE(stg_RBH_info, stg_RBH_entry,1,1,RBH,,EF_,"RBH","RBH"); STGFUN(stg_RBH_entry) { FB_ @@ -539,13 +556,13 @@ STGFUN(stg_RBH_entry) FE_ } -INFO_TABLE(stg_RBH_Save_0_info, stg_RBH_Save_0_entry,0,2,CONSTR,,EF_,0,0); +INFO_TABLE(stg_RBH_Save_0_info, stg_RBH_Save_0_entry,0,2,CONSTR,,EF_,"RBH_Save_0","RBH_Save_0"); NON_ENTERABLE_ENTRY_CODE(RBH_Save_0); -INFO_TABLE(stg_RBH_Save_1_info, stg_RBH_Save_1_entry,1,1,CONSTR,,EF_,0,0); +INFO_TABLE(stg_RBH_Save_1_info, stg_RBH_Save_1_entry,1,1,CONSTR,,EF_,"RBH_Save_1","RBH_Save_1"); NON_ENTERABLE_ENTRY_CODE(RBH_Save_1); -INFO_TABLE(stg_RBH_Save_2_info, stg_RBH_Save_2_entry,2,0,CONSTR,,EF_,0,0); +INFO_TABLE(stg_RBH_Save_2_info, stg_RBH_Save_2_entry,2,0,CONSTR,,EF_,"RBH_Save_2","RBH_Save_2"); NON_ENTERABLE_ENTRY_CODE(RBH_Save_2); #endif /* defined(PAR) || defined(GRAN) */ @@ -574,6 +591,7 @@ STGFUN(stg_CAF_BLACKHOLE_entry) #endif TICK_ENT_BH(); + LDV_ENTER((StgClosure *)R1.p); // Put ourselves on the blocking queue for this black hole #if defined(GRAN) || defined(PAR) @@ -602,7 +620,7 @@ STGFUN(stg_CAF_BLACKHOLE_entry) } #ifdef TICKY_TICKY -INFO_TABLE(stg_SE_BLACKHOLE_info, stg_SE_BLACKHOLE_entry,0,2,SE_BLACKHOLE,,EF_,0,0); +INFO_TABLE(stg_SE_BLACKHOLE_info, stg_SE_BLACKHOLE_entry,0,2,SE_BLACKHOLE,,EF_,"SE_BLACKHOLE","SE_BLACKHOLE"); STGFUN(stg_SE_BLACKHOLE_entry) { FB_ @@ -611,7 +629,7 @@ STGFUN(stg_SE_BLACKHOLE_entry) FE_ } -INFO_TABLE(SE_CAF_BLACKHOLE_info, SE_CAF_BLACKHOLE_entry,0,2,SE_CAF_BLACKHOLE,,EF_,0,0); +INFO_TABLE(SE_CAF_BLACKHOLE_info, SE_CAF_BLACKHOLE_entry,0,2,SE_CAF_BLACKHOLE,,EF_,"CAF_BLACKHOLE","CAF_BLACKHOLE"); STGFUN(stg_SE_CAF_BLACKHOLE_entry) { FB_ @@ -622,7 +640,7 @@ STGFUN(stg_SE_CAF_BLACKHOLE_entry) #endif #ifdef SMP -INFO_TABLE(stg_WHITEHOLE_info, stg_WHITEHOLE_entry,0,2,CONSTR_NOCAF_STATIC,,EF_,0,0); +INFO_TABLE(stg_WHITEHOLE_info, stg_WHITEHOLE_entry,0,2,CONSTR_NOCAF_STATIC,,EF_,"WHITEHOLE","WHITEHOLE"); STGFUN(stg_WHITEHOLE_entry) { FB_ @@ -645,7 +663,7 @@ NON_ENTERABLE_ENTRY_CODE(TSO); one is a real bug. -------------------------------------------------------------------------- */ -INFO_TABLE(stg_EVACUATED_info,stg_EVACUATED_entry,1,0,EVACUATED,,EF_,0,0); +INFO_TABLE(stg_EVACUATED_info,stg_EVACUATED_entry,1,0,EVACUATED,,EF_,"EVACUATED","EVACUATED"); NON_ENTERABLE_ENTRY_CODE(EVACUATED); /* ----------------------------------------------------------------------------- @@ -659,7 +677,19 @@ NON_ENTERABLE_ENTRY_CODE(EVACUATED); INFO_TABLE(stg_WEAK_info,stg_WEAK_entry,0,4,WEAK,,EF_,"WEAK","WEAK"); NON_ENTERABLE_ENTRY_CODE(WEAK); -INFO_TABLE_CONSTR(stg_DEAD_WEAK_info,stg_DEAD_WEAK_entry,0,1,0,CONSTR,,EF_,"DEAD_WEAK","DEAD_WEAK"); +// XXX! The garbage collector replaces a WEAK with a DEAD_WEAK +// in-place, which causes problems if the heap is scanned linearly +// after GC (certain kinds of profiling do this). So when profiling, +// we set the size of a DEAD_WEAK to 4 non-pointers, rather than its +// usual 1. + +#ifdef PROFILING +#define DEAD_WEAK_PAYLOAD_WORDS 4 +#else +#define DEAD_WEAK_PAYLOAD_WORDS 1 +#endif + +INFO_TABLE_CONSTR(stg_DEAD_WEAK_info,stg_DEAD_WEAK_entry,0,DEAD_WEAK_PAYLOAD_WORDS,0,CONSTR,,EF_,"DEAD_WEAK","DEAD_WEAK"); NON_ENTERABLE_ENTRY_CODE(DEAD_WEAK); /* ----------------------------------------------------------------------------- @@ -669,7 +699,7 @@ NON_ENTERABLE_ENTRY_CODE(DEAD_WEAK); finalizer in a weak pointer object. -------------------------------------------------------------------------- */ -INFO_TABLE_CONSTR(stg_NO_FINALIZER_info,stg_NO_FINALIZER_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0); +INFO_TABLE_CONSTR(stg_NO_FINALIZER_info,stg_NO_FINALIZER_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"NO_FINALIZER","NO_FINALIZER"); NON_ENTERABLE_ENTRY_CODE(NO_FINALIZER); SET_STATIC_HDR(stg_NO_FINALIZER_closure,stg_NO_FINALIZER_info,0/*CC*/,,EI_) @@ -709,7 +739,7 @@ NON_ENTERABLE_ENTRY_CODE(EMPTY_MVAR); end of a linked TSO queue. -------------------------------------------------------------------------- */ -INFO_TABLE_CONSTR(stg_END_TSO_QUEUE_info,stg_END_TSO_QUEUE_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0); +INFO_TABLE_CONSTR(stg_END_TSO_QUEUE_info,stg_END_TSO_QUEUE_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_TSO_QUEUE","END_TSO_QUEUE"); NON_ENTERABLE_ENTRY_CODE(END_TSO_QUEUE); SET_STATIC_HDR(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE_info,0/*CC*/,,EI_) @@ -723,26 +753,26 @@ SET_STATIC_HDR(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE_info,0/*CC*/,,EI_) an END_MUT_LIST closure. -------------------------------------------------------------------------- */ -INFO_TABLE_CONSTR(stg_END_MUT_LIST_info,stg_END_MUT_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0); +INFO_TABLE_CONSTR(stg_END_MUT_LIST_info,stg_END_MUT_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_MUT_LIST","END_MUT_LIST"); NON_ENTERABLE_ENTRY_CODE(END_MUT_LIST); SET_STATIC_HDR(stg_END_MUT_LIST_closure,stg_END_MUT_LIST_info,0/*CC*/,,EI_) , /*payload*/{} }; -INFO_TABLE(stg_MUT_CONS_info, stg_MUT_CONS_entry, 1, 1, MUT_CONS, , EF_, 0, 0); +INFO_TABLE(stg_MUT_CONS_info, stg_MUT_CONS_entry, 1, 1, MUT_CONS, , EF_, "MUT_CONS", "MUT_CONS"); NON_ENTERABLE_ENTRY_CODE(MUT_CONS); /* ----------------------------------------------------------------------------- Exception lists -------------------------------------------------------------------------- */ -INFO_TABLE_CONSTR(stg_END_EXCEPTION_LIST_info,stg_END_EXCEPTION_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0); +INFO_TABLE_CONSTR(stg_END_EXCEPTION_LIST_info,stg_END_EXCEPTION_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_EXCEPTION_LIST","END_EXCEPTION_LIST"); NON_ENTERABLE_ENTRY_CODE(END_EXCEPTION_LIST); SET_STATIC_HDR(stg_END_EXCEPTION_LIST_closure,stg_END_EXCEPTION_LIST_info,0/*CC*/,,EI_) , /*payload*/{} }; -INFO_TABLE(stg_EXCEPTION_CONS_info, stg_EXCEPTION_CONS_entry, 1, 1, CONSTR, , EF_, 0, 0); +INFO_TABLE(stg_EXCEPTION_CONS_info, stg_EXCEPTION_CONS_entry, 1, 1, CONSTR, , EF_, "EXCEPTION_CONS", "EXCEPTION_CONS"); NON_ENTERABLE_ENTRY_CODE(EXCEPTION_CONS); /* ----------------------------------------------------------------------------- @@ -804,7 +834,7 @@ STGFUN(stg_error_entry) \ just enter the top stack word to start the thread. (see deleteThread) * -------------------------------------------------------------------------- */ -INFO_TABLE(stg_dummy_ret_info, stg_dummy_ret_entry, 0, 0, CONSTR_NOCAF_STATIC, , EF_, 0, 0); +INFO_TABLE(stg_dummy_ret_info, stg_dummy_ret_entry, 0, 0, CONSTR_NOCAF_STATIC, , EF_, "DUMMY_RET", "DUMMY_RET"); STGFUN(stg_dummy_ret_entry) { W_ ret_addr; @@ -855,7 +885,7 @@ STGFUN(stg_forceIO_ret_entry) } #endif -INFO_TABLE(stg_forceIO_info,stg_forceIO_entry,1,0,FUN_STATIC,,EF_,0,0); +INFO_TABLE(stg_forceIO_info,stg_forceIO_entry,1,0,FUN_STATIC,,EF_,"FORCE_IO","FORCE_IO"); FN_(stg_forceIO_entry) { FB_ diff --git a/ghc/rts/StgStartup.hc b/ghc/rts/StgStartup.hc index 92dc7018db..5920e5ec08 100644 --- a/ghc/rts/StgStartup.hc +++ b/ghc/rts/StgStartup.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgStartup.hc,v 1.16 2001/09/04 18:29:21 ken Exp $ + * $Id: StgStartup.hc,v 1.17 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -45,8 +45,8 @@ EXTFUN(stg_stop_thread_entry); -#ifdef PROFILING -#define STOP_THREAD_BITMAP 1 +#if defined(PROFILING) +#define STOP_THREAD_BITMAP 3 #else #define STOP_THREAD_BITMAP 0 #endif diff --git a/ghc/rts/StgStdThunks.hc b/ghc/rts/StgStdThunks.hc index 9373dab228..c3d77ac59a 100644 --- a/ghc/rts/StgStdThunks.hc +++ b/ghc/rts/StgStdThunks.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgStdThunks.hc,v 1.17 2001/11/08 12:46:31 simonmar Exp $ + * $Id: StgStdThunks.hc,v 1.18 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-2000 * @@ -32,7 +32,7 @@ #define SAVE_CCCS(fs) CCS_HDR(Sp-fs)=CCCS #define GET_SAVED_CCCS RESTORE_CCCS(CCS_HDR(Sp)) #define ENTER_CCS(p) ENTER_CCS_TCL(p) -#define RET_BITMAP 1 +#define RET_BITMAP 3 #else #define SAVE_CCCS(fs) /* empty */ #define GET_SAVED_CCCS /* empty */ @@ -58,6 +58,7 @@ FB_ \ STK_CHK_NP(UPD_FRAME_SIZE,1,); \ UPD_BH_UPDATABLE(&stg_sel_##offset##_upd_info); \ + LDV_ENTER(R1.cl); \ PUSH_UPD_FRAME(R1.p,0); \ ENTER_CCS(R1.p); \ SAVE_CCCS(UPD_FRAME_SIZE); \ @@ -103,6 +104,7 @@ SELECTOR_CODE_UPD(15); FB_ \ STK_CHK_NP(NOUPD_FRAME_SIZE,1,) \ UPD_BH_SINGLE_ENTRY(&stg_sel_##offset##_noupd_info); \ + LDV_ENTER(R1.cl); \ ENTER_CCS(R1.p); \ SAVE_CCCS(NOUPD_FRAME_SIZE); \ Sp[-NOUPD_FRAME_SIZE]=(W_)&stg_sel_ret_##offset##_noupd_info; \ @@ -163,6 +165,7 @@ FN_(stg_ap_1_upd_entry) { FB_ STK_CHK_NP(sizeofW(StgUpdateFrame),1,); UPD_BH_UPDATABLE(&stg_ap_1_upd_info); + LDV_ENTER(R1.cl); ENTER_CCS(R1.p); PUSH_UPD_FRAME(R1.p,0); R1.p=(P_)(R1.cl->payload[0]); @@ -176,6 +179,7 @@ FN_(stg_ap_2_upd_entry) { FB_ STK_CHK_NP(sizeofW(StgUpdateFrame)+1,1,); UPD_BH_UPDATABLE(&stg_ap_2_upd_info); + LDV_ENTER(R1.cl); ENTER_CCS(R1.p); PUSH_UPD_FRAME(R1.p,0); Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[1]); @@ -190,6 +194,7 @@ FN_(stg_ap_3_upd_entry) { FB_ STK_CHK_NP(sizeofW(StgUpdateFrame)+2,1,); UPD_BH_UPDATABLE(&stg_ap_3_upd_info); + LDV_ENTER(R1.cl); ENTER_CCS(R1.p); PUSH_UPD_FRAME(R1.p,0); Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[2]); @@ -205,6 +210,7 @@ FN_(stg_ap_4_upd_entry) { FB_ STK_CHK_NP(sizeofW(StgUpdateFrame)+3,1,); UPD_BH_UPDATABLE(&stg_ap_4_upd_info); + LDV_ENTER(R1.cl); ENTER_CCS(R1.p); PUSH_UPD_FRAME(R1.p,0); Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[3]); @@ -221,6 +227,7 @@ FN_(stg_ap_5_upd_entry) { FB_ STK_CHK_NP(sizeofW(StgUpdateFrame)+4,1,); UPD_BH_UPDATABLE(&stg_ap_5_upd_info); + LDV_ENTER(R1.cl); ENTER_CCS(R1.p); PUSH_UPD_FRAME(R1.p,0); Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[4]); @@ -238,6 +245,7 @@ FN_(stg_ap_6_upd_entry) { FB_ STK_CHK_NP(sizeofW(StgUpdateFrame)+5,1,); UPD_BH_UPDATABLE(&stg_ap_6_upd_info); + LDV_ENTER(R1.cl); ENTER_CCS(R1.p); PUSH_UPD_FRAME(R1.p,0); Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[5]); @@ -256,6 +264,7 @@ FN_(stg_ap_7_upd_entry) { FB_ STK_CHK_NP(sizeofW(StgUpdateFrame)+6,1,); UPD_BH_UPDATABLE(&stg_ap_7_upd_info); + LDV_ENTER(R1.cl); ENTER_CCS(R1.p); PUSH_UPD_FRAME(R1.p,0); Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[6]); @@ -275,6 +284,7 @@ FN_(stg_ap_8_upd_entry) { FB_ STK_CHK_NP(sizeofW(StgUpdateFrame)+7,1,); UPD_BH_UPDATABLE(&stg_ap_8_upd_info); + LDV_ENTER(R1.cl); ENTER_CCS(R1.p); PUSH_UPD_FRAME(R1.p,0); Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[7]); diff --git a/ghc/rts/Storage.c b/ghc/rts/Storage.c index 9080bf624d..ee8cfd85c6 100644 --- a/ghc/rts/Storage.c +++ b/ghc/rts/Storage.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Storage.c,v 1.53 2001/11/08 12:46:31 simonmar Exp $ + * $Id: Storage.c,v 1.54 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -23,6 +23,8 @@ #include "Schedule.h" #include "StoragePriv.h" +#include "RetainerProfile.h" // for counting memory blocks (memInventory) + StgClosure *caf_list = NULL; bdescr *small_alloc_list; /* allocate()d small objects */ @@ -63,23 +65,6 @@ initStorage( void ) step *stp; generation *gen; - /* If we're doing heap profiling, we want a two-space heap with a - * fixed-size allocation area so that we get roughly even-spaced - * samples. - */ - - /* As an experiment, try a 2 generation collector - */ - -#if defined(PROFILING) || defined(DEBUG) - if (RtsFlags.ProfFlags.doHeapProfile) { - RtsFlags.GcFlags.generations = 1; - RtsFlags.GcFlags.steps = 1; - RtsFlags.GcFlags.oldGenFactor = 0; - RtsFlags.GcFlags.heapSizeSuggestion = 0; - } -#endif - if (RtsFlags.GcFlags.maxHeapSize != 0 && RtsFlags.GcFlags.heapSizeSuggestion > RtsFlags.GcFlags.maxHeapSize) { @@ -350,6 +335,20 @@ resetNurseries( void ) } #else for (bd = g0s0->blocks; bd; bd = bd->link) { +#ifdef PROFILING + // @LDV profiling + // Reset every word in the nursery to zero when doing LDV profiling. + // This relieves the mutator of the burden of zeroing every new closure, + // which is stored in the nursery. + // + // Todo: make it more efficient, e.g. memcpy() + // + StgPtr p; + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) { + for (p = bd->start; p < bd->start + BLOCK_SIZE_W; p++) + *p = 0; + } +#endif bd->free = bd->start; ASSERT(bd->gen_no == 0); ASSERT(bd->step == g0s0); @@ -370,6 +369,12 @@ allocNursery (bdescr *tail, nat blocks) // cons them on to the front of the list, not forgetting to update // the back pointer on the tail of the list to point to the new block. for (i=0; i < blocks; i++) { + // @LDV profiling + /* + processNursery() in LdvProfile.c assumes that every block group in + the nursery contains only a single block. So, if a block group is + given multiple blocks, change processNursery() accordingly. + */ bd = allocBlock(); bd->link = tail; // double-link the nursery: we might need to insert blocks @@ -786,7 +791,14 @@ memInventory(void) for (bd = large_alloc_list; bd; bd = bd->link) { total_blocks += bd->blocks; } - + +#ifdef PROFILING + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { + for (bd = firstStack; bd != NULL; bd = bd->link) + total_blocks += bd->blocks; + } +#endif + // count the blocks allocated by the arena allocator total_blocks += arenaBlocks(); diff --git a/ghc/rts/Storage.h b/ghc/rts/Storage.h index 1156dd4da4..f746d1f0ca 100644 --- a/ghc/rts/Storage.h +++ b/ghc/rts/Storage.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Storage.h,v 1.36 2001/08/08 10:50:37 simonmar Exp $ + * $Id: Storage.h,v 1.37 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -13,6 +13,9 @@ #include "Block.h" #include "BlockAlloc.h" #include "StoragePriv.h" +#ifdef PROFILING +#include "LdvProfile.h" +#endif /* ----------------------------------------------------------------------------- Initialisation / De-initialisation @@ -152,7 +155,10 @@ recordOldToNewPtrs(StgMutClosure *p) } } -#ifndef DEBUG +// @LDV profiling +// We zero out the slop when PROFILING is on. +// #ifndef DEBUG +#if !defined(DEBUG) && !defined(PROFILING) #define updateWithIndirection(info, p1, p2) \ { \ bdescr *bd; \ @@ -174,6 +180,41 @@ recordOldToNewPtrs(StgMutClosure *p) TICK_UPD_OLD_IND(); \ } \ } +#elif defined(PROFILING) +// @LDV profiling +// We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in +// which p1 resides. +// +// Note: +// After all, we do *NOT* need to call LDV_recordCreate() for both IND and +// IND_OLDGEN closures because they are inherently used. But, it corrupts +// the invariants that every closure keeps its creation time in the profiling +// field. So, we call LDV_recordCreate(). + +#define updateWithIndirection(info, p1, p2) \ + { \ + bdescr *bd; \ + \ + LDV_recordDead_FILL_SLOP_DYNAMIC((p1)); \ + bd = Bdescr((P_)p1); \ + if (bd->gen_no == 0) { \ + ((StgInd *)p1)->indirectee = p2; \ + SET_INFO(p1,&stg_IND_info); \ + LDV_recordCreate((p1)); \ + TICK_UPD_NEW_IND(); \ + } else { \ + ((StgIndOldGen *)p1)->indirectee = p2; \ + if (info != &stg_BLACKHOLE_BQ_info) { \ + ACQUIRE_LOCK(&sm_mutex); \ + ((StgIndOldGen *)p1)->mut_link = generations[bd->gen_no].mut_once_list; \ + generations[bd->gen_no].mut_once_list = (StgMutClosure *)p1; \ + RELEASE_LOCK(&sm_mutex); \ + } \ + SET_INFO(p1,&stg_IND_OLDGEN_info); \ + LDV_recordCreate((p1)); \ + } \ + } + #else /* In the DEBUG case, we also zero out the slop of the old closure, @@ -242,10 +283,17 @@ updateWithPermIndirection(const StgInfoTable *info, StgClosure *p1, StgClosure * bdescr *bd; ASSERT( p1 != p2 && !closure_IND(p1) ); + + // @LDV profiling + // Destroy the old closure. + LDV_recordDead_FILL_SLOP_DYNAMIC(p1); bd = Bdescr((P_)p1); if (bd->gen_no == 0) { ((StgInd *)p1)->indirectee = p2; SET_INFO(p1,&stg_IND_PERM_info); + // @LDV profiling + // We have just created a new closure. + LDV_recordCreate(p1); TICK_UPD_NEW_PERM_IND(p1); } else { ((StgIndOldGen *)p1)->indirectee = p2; @@ -256,6 +304,9 @@ updateWithPermIndirection(const StgInfoTable *info, StgClosure *p1, StgClosure * RELEASE_LOCK(&sm_mutex); } SET_INFO(p1,&stg_IND_OLDGEN_PERM_info); + // @LDV profiling + // We have just created a new closure. + LDV_recordCreate(p1); TICK_UPD_OLD_PERM_IND(); } } diff --git a/ghc/rts/Updates.hc b/ghc/rts/Updates.hc index 989ce2f90e..81f49de86f 100644 --- a/ghc/rts/Updates.hc +++ b/ghc/rts/Updates.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Updates.hc,v 1.35 2001/11/08 12:46:31 simonmar Exp $ + * $Id: Updates.hc,v 1.36 2001/11/22 14:25:12 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -117,8 +117,8 @@ UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_7_entry,RET_VEC(Sp[0],7)); return size! */ -#ifdef PROFILING -#define UPD_FRAME_BITMAP 3 +#if defined(PROFILING) +#define UPD_FRAME_BITMAP 7 #else #define UPD_FRAME_BITMAP 1 #endif @@ -207,6 +207,7 @@ STGFUN(stg_PAP_entry) Sp -= Words; TICK_ENT_PAP(pap); + LDV_ENTER(pap); /* Enter PAP cost centre -- lexical scoping only */ ENTER_CCS_PAP_CL(pap); @@ -286,6 +287,14 @@ EXTFUN(__stg_update_PAP) * such as removing the update frame. */ if ((Hp += PapSize) > HpLim) { +#ifdef PROFILING + // @LDV profiling + // Not filling the slop for the object (because there is none), but + // filling in the trailing words in the current block. + // This is unnecessary because we fills the entire nursery with + // zeroes after each garbage collection. + // FILL_SLOP(HpLim, PapSize - (Hp - HpLim)); +#endif Sp -= 1; Sp[0] = (W_)Fun; JMP_(stg_gc_entertop); @@ -351,7 +360,7 @@ EXTFUN(__stg_update_PAP) Updatee = Su->updatee; -#if defined(PROFILING) +#if defined(PROFILING) if (Words != 0) { UPD_IND(Updatee,PapClosure); TICK_UPD_PAP_IN_NEW(Words+1); @@ -436,6 +445,7 @@ STGFUN(stg_AP_UPD_entry) Sp -= sizeofW(StgUpdateFrame) + Words; TICK_ENT_AP_UPD(ap); + LDV_ENTER(ap); /* Enter PAP cost centre -- lexical scoping only */ ENTER_CCS_PAP_CL(ap); /* ToDo: ENTER_CC_AP_UPD_CL */ diff --git a/ghc/rts/Weak.c b/ghc/rts/Weak.c index 1c03e69cdc..c80cf8c3c1 100644 --- a/ghc/rts/Weak.c +++ b/ghc/rts/Weak.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Weak.c,v 1.18 2001/08/14 13:40:09 sewardj Exp $ + * $Id: Weak.c,v 1.19 2001/11/22 14:25:13 simonmar Exp $ * * (c) The GHC Team, 1998-1999 * @@ -36,6 +36,15 @@ finalizeWeakPointersNow(void) while ((w = weak_ptr_list)) { weak_ptr_list = w->link; if (w->header.info != &stg_DEAD_WEAK_info) { + // @LDV profiling + // Even thought the info type of w changes, we DO NOT perform any + // LDV profiling because at this moment, LDV profiling must already + // have been terminated. See the comments in shutdownHaskell(). + // At any rate, there is no need to call LDV_recordDead() because + // weak pointers are inherently used. +#ifdef PROFILING + ASSERT(ldvTime == 0); // LDV profiling is turned off. +#endif w->header.info = &stg_DEAD_WEAK_info; IF_DEBUG(weak,fprintf(stderr,"Finalising weak pointer at %p -> %p\n", w, w->key)); if (w->finalizer != &stg_NO_FINALIZER_closure) { @@ -85,7 +94,17 @@ scheduleFinalizers(StgWeak *list) arr->payload[n] = w->finalizer; n++; } - w->header.info = &stg_DEAD_WEAK_info; + +#ifdef PROFILING + // A weak pointer is inherently used, so we do not need to call + // LDV_recordDead(). + // + // Furthermore, when PROFILING is turned on, dead weak + // pointers are exactly as large as weak pointers, so there is + // no need to fill the slop, either. See stg_DEAD_WEAK_info + // in StgMiscClosures.hc. +#endif + SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs); } t = createIOThread(RtsFlags.GcFlags.initialStkSize, |
