summaryrefslogtreecommitdiff
path: root/rts/sm
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm')
-rw-r--r--rts/sm/BlockAlloc.c15
-rw-r--r--rts/sm/CNF.c16
-rw-r--r--rts/sm/Compact.c26
-rw-r--r--rts/sm/Compact.h12
-rw-r--r--rts/sm/Evac.c130
-rw-r--r--rts/sm/GC.c232
-rw-r--r--rts/sm/GC.h10
-rw-r--r--rts/sm/GCThread.h6
-rw-r--r--rts/sm/GCUtils.c31
-rw-r--r--rts/sm/GCUtils.h6
-rw-r--r--rts/sm/MarkWeak.c17
-rw-r--r--rts/sm/OSMem.h1
-rw-r--r--rts/sm/Sanity.c23
-rw-r--r--rts/sm/Scav.c217
-rw-r--r--rts/sm/Storage.c126
15 files changed, 482 insertions, 386 deletions
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 2a02ecc9c5..bbb4f8a6c1 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -210,6 +210,12 @@ void recordFreedBlocks(uint32_t node, uint32_t n)
Allocation
-------------------------------------------------------------------------- */
+STATIC_INLINE bdescr *
+tail_of (bdescr *bd)
+{
+ return bd + bd->blocks - 1;
+}
+
STATIC_INLINE void
initGroup(bdescr *head)
{
@@ -223,7 +229,7 @@ initGroup(bdescr *head)
// mblocks don't have bdescrs; freeing these is handled in a
// different way by free_mblock_group().
if (head->blocks > 1 && head->blocks <= BLOCKS_PER_MBLOCK) {
- bdescr *last = head + head->blocks-1;
+ bdescr *last = tail_of(head);
last->blocks = 0;
last->link = head;
}
@@ -285,13 +291,6 @@ free_list_insert (uint32_t node, bdescr *bd)
dbl_link_onto(bd, &free_list[node][ln]);
}
-
-STATIC_INLINE bdescr *
-tail_of (bdescr *bd)
-{
- return bd + bd->blocks - 1;
-}
-
// After splitting a group, the last block of each group must have a
// tail that points to the head block, to keep our invariants for
// coalescing.
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index c12f53a120..6bc58cde75 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -722,14 +722,14 @@ verify_consistency_block (StgCompactNFData *str, StgCompactNFDataBlock *block)
p += arr_words_sizeW((StgArrBytes*)p);
break;
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
verify_mut_arr_ptrs(str, (StgMutArrPtrs*)p);
p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
break;
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
{
uint32_t i;
StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p;
@@ -969,14 +969,14 @@ fixup_block(StgCompactNFDataBlock *block, StgWord *fixup_table, uint32_t count)
p += arr_words_sizeW((StgArrBytes*)p);
break;
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
fixup_mut_arr_ptrs(fixup_table, count, (StgMutArrPtrs*)p);
p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
break;
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
{
uint32_t i;
StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p;
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index 0e2fea8990..004e042069 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -25,7 +25,8 @@
#include "Trace.h"
#include "Weak.h"
#include "MarkWeak.h"
-#include "Stable.h"
+#include "StablePtr.h"
+#include "StableName.h"
// Turn off inlining when debugging - it obfuscates things
#if defined(DEBUG)
@@ -212,7 +213,7 @@ thread_static( StgClosure* p )
p = *THUNK_STATIC_LINK(p);
continue;
case FUN_STATIC:
- p = *FUN_STATIC_LINK(p);
+ p = *STATIC_LINK(info,p);
continue;
case CONSTR:
case CONSTR_NOCAF:
@@ -482,8 +483,8 @@ update_fwd_large( bdescr *bd )
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgMutArrPtrs *a;
@@ -497,8 +498,8 @@ update_fwd_large( bdescr *bd )
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgSmallMutArrPtrs *a;
@@ -682,8 +683,8 @@ thread_obj (const StgInfoTable *info, StgPtr p)
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgMutArrPtrs *a;
@@ -698,8 +699,8 @@ thread_obj (const StgInfoTable *info, StgPtr p)
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgSmallMutArrPtrs *a;
@@ -1000,7 +1001,10 @@ compact(StgClosure *static_objects)
thread_static(static_objects /* ToDo: ok? */);
// the stable pointer table
- threadStableTables((evac_fn)thread_root, NULL);
+ threadStablePtrTable((evac_fn)thread_root, NULL);
+
+ // the stable name table
+ threadStableNameTable((evac_fn)thread_root, NULL);
// the CAF list (used by GHCi)
markCAFs((evac_fn)thread_root, NULL);
diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h
index 6dcb50b1aa..63abfc7180 100644
--- a/rts/sm/Compact.h
+++ b/rts/sm/Compact.h
@@ -20,8 +20,8 @@ mark(StgPtr p, bdescr *bd)
{
uint32_t offset_within_block = p - bd->start; // in words
StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
- (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
- StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ (offset_within_block / BITS_IN(W_));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_) - 1));
*bitmap_word |= bit_mask;
}
@@ -30,8 +30,8 @@ unmark(StgPtr p, bdescr *bd)
{
uint32_t offset_within_block = p - bd->start; // in words
StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
- (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
- StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ (offset_within_block / BITS_IN(W_));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_) - 1));
*bitmap_word &= ~bit_mask;
}
@@ -40,8 +40,8 @@ is_marked(StgPtr p, bdescr *bd)
{
uint32_t offset_within_block = p - bd->start; // in words
StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
- (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
- StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ (offset_within_block / BITS_IN(W_));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_)- 1));
return (*bitmap_word & bit_mask);
}
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index fb1af0f692..289031945d 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -28,10 +28,6 @@
#include "CNF.h"
#include "Scav.h"
-#if defined(PROF_SPIN) && defined(THREADED_RTS) && defined(PARALLEL_GC)
-StgWord64 whitehole_spin = 0;
-#endif
-
#if defined(THREADED_RTS) && !defined(PARALLEL_GC)
#define evacuate(p) evacuate1(p)
#define evacuate_BLACKHOLE(p) evacuate_BLACKHOLE1(p)
@@ -47,7 +43,7 @@ StgWord64 whitehole_spin = 0;
*/
#define MAX_THUNK_SELECTOR_DEPTH 16
-static void eval_thunk_selector (StgClosure **q, StgSelector * p, bool);
+static void eval_thunk_selector (StgClosure **q, StgSelector *p, bool);
STATIC_INLINE void evacuate_large(StgPtr p);
/* -----------------------------------------------------------------------------
@@ -197,8 +193,9 @@ spin:
info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
if (info == (W_)&stg_WHITEHOLE_info) {
#if defined(PROF_SPIN)
- whitehole_spin++;
+ whitehole_gc_spin++;
#endif
+ busy_wait_nop();
goto spin;
}
if (IS_FORWARDING_PTR(info)) {
@@ -281,14 +278,7 @@ evacuate_large(StgPtr p)
}
// remove from large_object list
- if (bd->u.back) {
- bd->u.back->link = bd->link;
- } else { // first object in the list
- gen->large_objects = bd->link;
- }
- if (bd->link) {
- bd->link->u.back = bd->u.back;
- }
+ dbl_link_remove(bd, &gen->large_objects);
/* link it on to the evacuated large object list of the destination gen
*/
@@ -417,14 +407,7 @@ evacuate_compact (StgPtr p)
}
// remove from compact_objects list
- if (bd->u.back) {
- bd->u.back->link = bd->link;
- } else { // first object in the list
- gen->compact_objects = bd->link;
- }
- if (bd->link) {
- bd->link->u.back = bd->u.back;
- }
+ dbl_link_remove(bd, &gen->compact_objects);
/* link it on to the evacuated compact object list of the destination gen
*/
@@ -539,14 +522,14 @@ loop:
switch (info->type) {
case THUNK_STATIC:
- if (info->srt_bitmap != 0) {
+ if (info->srt != 0) {
evacuate_static_object(THUNK_STATIC_LINK((StgClosure *)q), q);
}
return;
case FUN_STATIC:
- if (info->srt_bitmap != 0) {
- evacuate_static_object(FUN_STATIC_LINK((StgClosure *)q), q);
+ if (info->srt != 0 || info->layout.payload.ptrs != 0) {
+ evacuate_static_object(STATIC_LINK(info,(StgClosure *)q), q);
}
return;
@@ -707,9 +690,6 @@ loop:
case THUNK_1_1:
case THUNK_2_0:
case THUNK_0_2:
-#if defined(NO_PROMOTE_THUNKS)
-#error bitrotted
-#endif
copy(p,info,q,sizeofW(StgThunk)+2,gen_no);
return;
@@ -753,6 +733,19 @@ loop:
copy(p,info,q,sizeofW(StgInd),gen_no);
return;
}
+ // Note [BLACKHOLE pointing to IND]
+ //
+ // BLOCKING_QUEUE can be overwritten by IND (see
+ // wakeBlockingQueue()). However, when this happens we must
+ // be updating the BLACKHOLE, so the BLACKHOLE's indirectee
+ // should now point to the value.
+ //
+ // The mutator might observe an inconsistent state, because
+ // the writes are happening in another thread, so it's
+ // possible for the mutator to follow an indirectee and find
+ // an IND. But this should never happen in the GC, because
+ // the mutators are all stopped and the writes have
+ // completed.
ASSERT(i != &stg_IND_info);
}
q = r;
@@ -818,16 +811,16 @@ loop:
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// just copy the block
copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen_no);
return;
case SMALL_MUT_ARR_PTRS_CLEAN:
case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// just copy the block
copy(p,info,q,small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)q),gen_no);
return;
@@ -898,9 +891,16 @@ evacuate_BLACKHOLE(StgClosure **p)
bd = Bdescr((P_)q);
- // blackholes can't be in a compact, or large
- ASSERT((bd->flags & (BF_COMPACT | BF_LARGE)) == 0);
+ // blackholes can't be in a compact
+ ASSERT((bd->flags & BF_COMPACT) == 0);
+ // blackholes *can* be in a large object: when raiseAsync() creates an
+ // AP_STACK the payload might be large enough to create a large object.
+ // See #14497.
+ if (bd->flags & BF_LARGE) {
+ evacuate_large((P_)q);
+ return;
+ }
if (bd->flags & BF_EVACUATED) {
if (bd->gen_no < gct->evac_gen_no) {
gct->failed_to_evac = true;
@@ -934,23 +934,34 @@ evacuate_BLACKHOLE(StgClosure **p)
copy(p,info,q,sizeofW(StgInd),gen_no);
}
-/* -----------------------------------------------------------------------------
- Evaluate a THUNK_SELECTOR if possible.
+/* ----------------------------------------------------------------------------
+ Update a chain of thunk selectors with the given value. All selectors in the
+ chain become IND pointing to the value, except when there is a loop (i.e.
+ the value of a THUNK_SELECTOR is the THUNK_SELECTOR itself), in that case we
+ leave the selector as-is.
+
+ p is the current selector to update. In eval_thunk_selector we make a list
+ from selectors using ((StgThunk*)p)->payload[0] for the link field and use
+ that field to traverse the chain here.
+
+ val is the final value of the selector chain.
+
+ A chain is formed when we've got something like:
- p points to a THUNK_SELECTOR that we want to evaluate. The
- result of "evaluating" it will be evacuated and a pointer to the
- to-space closure will be returned.
+ let x = C1 { f1 = e1 }
+ y = C2 { f2 = f1 x }
+ z = f2 y
- If the THUNK_SELECTOR could not be evaluated (its selectee is still
- a THUNK, for example), then the THUNK_SELECTOR itself will be
- evacuated.
+ Here the chain (p) we get when evacuating z is:
+
+ [ f2 y, f1 x ]
+
+ and val is e1.
-------------------------------------------------------------------------- */
+
static void
unchain_thunk_selectors(StgSelector *p, StgClosure *val)
{
- StgSelector *prev;
-
- prev = NULL;
while (p)
{
ASSERT(p->header.info == &stg_WHITEHOLE_info);
@@ -960,7 +971,7 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)
// not evacuate it), so in this case val is in from-space.
// ASSERT(!HEAP_ALLOCED_GC(val) || Bdescr((P_)val)->gen_no > N || (Bdescr((P_)val)->flags & BF_EVACUATED));
- prev = (StgSelector*)((StgClosure *)p)->payload[0];
+ StgSelector *prev = (StgSelector*)((StgClosure *)p)->payload[0];
// Update the THUNK_SELECTOR with an indirection to the
// value. The value is still in from-space at this stage.
@@ -997,8 +1008,18 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)
}
}
+/* -----------------------------------------------------------------------------
+ Evaluate a THUNK_SELECTOR if possible.
+
+ p points to a THUNK_SELECTOR that we want to evaluate.
+
+ If the THUNK_SELECTOR could not be evaluated (its selectee is still a THUNK,
+ for example), then the THUNK_SELECTOR itself will be evacuated depending on
+ the evac parameter.
+ -------------------------------------------------------------------------- */
+
static void
-eval_thunk_selector (StgClosure **q, StgSelector * p, bool evac)
+eval_thunk_selector (StgClosure **q, StgSelector *p, bool evac)
// NB. for legacy reasons, p & q are swapped around :(
{
uint32_t field;
@@ -1007,7 +1028,6 @@ eval_thunk_selector (StgClosure **q, StgSelector * p, bool evac)
StgClosure *selectee;
StgSelector *prev_thunk_selector;
bdescr *bd;
- StgClosure *val;
prev_thunk_selector = NULL;
// this is a chain of THUNK_SELECTORs that we are going to update
@@ -1057,9 +1077,14 @@ selector_chain:
// In threaded mode, we'll use WHITEHOLE to lock the selector
// thunk while we evaluate it.
{
- do {
+ while(true) {
info_ptr = xchg((StgPtr)&p->header.info, (W_)&stg_WHITEHOLE_info);
- } while (info_ptr == (W_)&stg_WHITEHOLE_info);
+ if (info_ptr != (W_)&stg_WHITEHOLE_info) { break; }
+#if defined(PROF_SPIN)
+ ++whitehole_gc_spin;
+#endif
+ busy_wait_nop();
+ }
// make sure someone else didn't get here first...
if (IS_FORWARDING_PTR(info_ptr) ||
@@ -1127,7 +1152,7 @@ selector_loop:
info->layout.payload.nptrs));
// Select the right field from the constructor
- val = selectee->payload[field];
+ StgClosure *val = selectee->payload[field];
#if defined(PROFILING)
// For the purposes of LDV profiling, we have destroyed
@@ -1159,6 +1184,8 @@ selector_loop:
val = ((StgInd *)val)->indirectee;
goto val_loop;
case THUNK_SELECTOR:
+ // Use payload to make a list of thunk selectors, to be
+ // used in unchain_thunk_selectors
((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector;
prev_thunk_selector = p;
p = (StgSelector*)val;
@@ -1273,5 +1300,4 @@ bale_out:
copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest_no);
}
unchain_thunk_selectors(prev_thunk_selector, *q);
- return;
}
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index aa804a8b76..70d6d8efe5 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -28,6 +28,7 @@
#include "Sparks.h"
#include "Sweep.h"
+#include "Arena.h"
#include "Storage.h"
#include "RtsUtils.h"
#include "Apply.h"
@@ -45,9 +46,15 @@
#include "RetainerProfile.h"
#include "LdvProfile.h"
#include "RaiseAsync.h"
-#include "Stable.h"
+#include "StableName.h"
+#include "StablePtr.h"
#include "CheckUnload.h"
#include "CNF.h"
+#include "RtsFlags.h"
+
+#if defined(PROFILING)
+#include "RetainerProfile.h"
+#endif
#include <string.h> // for memset()
#include <unistd.h>
@@ -89,6 +96,8 @@
*
* We build up a static object list while collecting generations 0..N,
* which is then appended to the static object list of generation N+1.
+ *
+ * See also: Note [STATIC_LINK fields] in Storage.h.
*/
/* N is the oldest generation being collected, where the generations
@@ -112,8 +121,6 @@ uint32_t mutlist_MUTVARS,
mutlist_TVAR_WATCH_QUEUE,
mutlist_TREC_CHUNK,
mutlist_TREC_HEADER,
- mutlist_ATOMIC_INVARIANT,
- mutlist_INVARIANT_CHECK_QUEUE,
mutlist_OTHERS;
#endif
@@ -122,7 +129,9 @@ uint32_t mutlist_MUTVARS,
gc_thread **gc_threads = NULL;
#if !defined(THREADED_RTS)
-StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
+// Must be aligned to 64-bytes to meet stated 64-byte alignment of gen_workspace
+StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
+ ATTRIBUTE_ALIGNED(64);
#endif
// Number of threads running in *this* GC. Affects how many
@@ -132,6 +141,13 @@ uint32_t n_gc_threads;
// For stats:
static long copied; // *words* copied & scavenged during this GC
+#if defined(PROF_SPIN) && defined(THREADED_RTS)
+// spin and yield counts for the quasi-SpinLock in waitForGcThreads
+volatile StgWord64 waitForGcThreads_spin = 0;
+volatile StgWord64 waitForGcThreads_yield = 0;
+volatile StgWord64 whitehole_gc_spin = 0;
+#endif
+
bool work_stealing;
uint32_t static_flag = STATIC_FLAG_B;
@@ -188,7 +204,9 @@ GarbageCollect (uint32_t collect_gen,
{
bdescr *bd;
generation *gen;
- StgWord live_blocks, live_words, par_max_copied, par_balanced_copied;
+ StgWord live_blocks, live_words, par_max_copied, par_balanced_copied,
+ gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
+ any_work, no_work, scav_find_work;
#if defined(THREADED_RTS)
gc_thread *saved_gct;
#endif
@@ -221,8 +239,9 @@ GarbageCollect (uint32_t collect_gen,
// tell the stats department that we've started a GC
stat_startGC(cap, gct);
- // lock the StablePtr table
- stableLock();
+ // Lock the StablePtr table. This prevents FFI calls manipulating
+ // the table from occurring during GC.
+ stablePtrLock();
#if defined(DEBUG)
mutlist_MUTVARS = 0;
@@ -232,8 +251,6 @@ GarbageCollect (uint32_t collect_gen,
mutlist_TVAR_WATCH_QUEUE = 0;
mutlist_TREC_CHUNK = 0;
mutlist_TREC_HEADER = 0;
- mutlist_ATOMIC_INVARIANT = 0;
- mutlist_INVARIANT_CHECK_QUEUE = 0;
mutlist_OTHERS = 0;
#endif
@@ -385,17 +402,15 @@ GarbageCollect (uint32_t collect_gen,
markScheduler(mark_root, gct);
-#if defined(RTS_USER_SIGNALS)
- // mark the signal handlers (signals should be already blocked)
- markSignalHandlers(mark_root, gct);
-#endif
-
// Mark the weak pointer list, and prepare to detect dead weak pointers.
markWeakPtrList();
initWeakForGC();
// Mark the stable pointer table.
- markStableTables(mark_root, gct);
+ markStablePtrTable(mark_root, gct);
+
+ // Remember old stable name addresses.
+ rememberOldStableNameAddresses ();
/* -------------------------------------------------------------------------
* Repeatedly scavenge all the areas we know about until there's no
@@ -421,7 +436,7 @@ GarbageCollect (uint32_t collect_gen,
shutdown_gc_threads(gct->thread_index, idle_cap);
// Now see which stable names are still alive.
- gcStableTables();
+ gcStableNameTable();
#if defined(THREADED_RTS)
if (n_gc_threads == 1) {
@@ -461,32 +476,53 @@ GarbageCollect (uint32_t collect_gen,
copied = 0;
par_max_copied = 0;
par_balanced_copied = 0;
+ gc_spin_spin = 0;
+ gc_spin_yield = 0;
+ mut_spin_spin = 0;
+ mut_spin_yield = 0;
+ any_work = 0;
+ no_work = 0;
+ scav_find_work = 0;
{
uint32_t i;
uint64_t par_balanced_copied_acc = 0;
+ const gc_thread* thread;
for (i=0; i < n_gc_threads; i++) {
copied += gc_threads[i]->copied;
}
for (i=0; i < n_gc_threads; i++) {
+ thread = gc_threads[i];
if (n_gc_threads > 1) {
debugTrace(DEBUG_gc,"thread %d:", i);
- debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
- debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
- debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
- debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
- debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
+ debugTrace(DEBUG_gc," copied %ld",
+ thread->copied * sizeof(W_));
+ debugTrace(DEBUG_gc," scanned %ld",
+ thread->scanned * sizeof(W_));
+ debugTrace(DEBUG_gc," any_work %ld",
+ thread->any_work);
+ debugTrace(DEBUG_gc," no_work %ld",
+ thread->no_work);
+ debugTrace(DEBUG_gc," scav_find_work %ld",
+ thread->scav_find_work);
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ gc_spin_spin += thread->gc_spin.spin;
+ gc_spin_yield += thread->gc_spin.yield;
+ mut_spin_spin += thread->mut_spin.spin;
+ mut_spin_yield += thread->mut_spin.yield;
+#endif
+
+ any_work += thread->any_work;
+ no_work += thread->no_work;
+ scav_find_work += thread->scav_find_work;
+
+ par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
+ par_balanced_copied_acc +=
+ stg_min(n_gc_threads * gc_threads[i]->copied, copied);
}
- par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
- par_balanced_copied_acc +=
- stg_min(n_gc_threads * gc_threads[i]->copied, copied);
}
- if (n_gc_threads == 1) {
- par_max_copied = 0;
- par_balanced_copied = 0;
- }
- else
- {
+ if (n_gc_threads > 1) {
// See Note [Work Balance] for an explanation of this computation
par_balanced_copied =
(par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
@@ -521,13 +557,11 @@ GarbageCollect (uint32_t collect_gen,
copied += mut_list_size;
debugTrace(DEBUG_gc,
- "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
+ "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d others)",
(unsigned long)(mut_list_size * sizeof(W_)),
mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
- mutlist_ATOMIC_INVARIANT,
- mutlist_INVARIANT_CHECK_QUEUE,
mutlist_OTHERS);
}
@@ -701,15 +735,15 @@ GarbageCollect (uint32_t collect_gen,
if (major_gc) { gcCAFs(); }
#endif
- // Update the stable pointer hash table.
- updateStableTables(major_gc);
+ // Update the stable name hash table
+ updateStableNameTable(major_gc);
// unlock the StablePtr table. Must be before scheduleFinalizers(),
// because a finalizer may call hs_free_fun_ptr() or
// hs_free_stable_ptr(), both of which access the StablePtr table.
- stableUnlock();
+ stablePtrUnlock();
- // Must be after stableUnlock(), because it might free stable ptrs.
+ // Must be after stablePtrUnlock(), because it might free stable ptrs.
if (major_gc) {
checkUnload (gct->scavenged_static_objects);
}
@@ -751,24 +785,51 @@ GarbageCollect (uint32_t collect_gen,
ACQUIRE_SM_LOCK;
if (major_gc) {
- W_ need, got;
- need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
- got = mblocks_allocated;
+ W_ need_prealloc, need_live, need, got;
+ uint32_t i;
+
+ need_live = 0;
+ for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
+ need_live += genLiveBlocks(&generations[i]);
+ }
+ need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
+
+ need_prealloc = 0;
+ for (i = 0; i < n_nurseries; i++) {
+ need_prealloc += nurseries[i].n_blocks;
+ }
+ need_prealloc += RtsFlags.GcFlags.largeAllocLim;
+ need_prealloc += countAllocdBlocks(exec_block);
+ need_prealloc += arenaBlocks();
+#if defined(PROFILING)
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
+ need_prealloc = retainerStackBlocks();
+ }
+#endif
+
/* If the amount of data remains constant, next major GC we'll
- require (F+1)*need. We leave (F+2)*need in order to reduce
- repeated deallocation and reallocation. */
- need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
+ * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
+ * in order to reduce repeated deallocation and reallocation. #14702
+ */
+ need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
+
+ /* Also, if user set heap size, do not drop below it.
+ */
+ need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
+
/* But with a large nursery, the above estimate might exceed
* maxHeapSize. A large resident set size might make the OS
* kill this process, or swap unnecessarily. Therefore we
* ensure that our estimate does not exceed maxHeapSize.
*/
if (RtsFlags.GcFlags.maxHeapSize != 0) {
- W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize);
- if (need > max) {
- need = max;
- }
+ need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
}
+
+ need = BLOCKS_TO_MBLOCKS(need);
+
+ got = mblocks_allocated;
+
if (got > need) {
returnMemoryToOS(got - need);
}
@@ -797,7 +858,9 @@ GarbageCollect (uint32_t collect_gen,
// ok, GC over: tell the stats department what happened.
stat_endGC(cap, gct, live_words, copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */,
- N, n_gc_threads, par_max_copied, par_balanced_copied);
+ N, n_gc_threads, par_max_copied, par_balanced_copied,
+ gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
+ any_work, no_work, scav_find_work);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
@@ -825,11 +888,6 @@ static void heapOverflow(void)
Initialise the gc_thread structures.
-------------------------------------------------------------------------- */
-#define GC_THREAD_INACTIVE 0
-#define GC_THREAD_STANDING_BY 1
-#define GC_THREAD_RUNNING 2
-#define GC_THREAD_WAITING_TO_CONTINUE 3
-
static void
new_gc_thread (uint32_t n, gc_thread *t)
{
@@ -1132,6 +1190,9 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
const uint32_t me = cap->no;
uint32_t i, j;
bool retry = true;
+ Time t0, t1, t2;
+
+ t0 = t1 = t2 = getProcessElapsedTime();
while(retry) {
for (i=0; i < n_threads; i++) {
@@ -1151,8 +1212,32 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
}
}
if (!retry) break;
+#if defined(PROF_SPIN)
+ waitForGcThreads_yield++;
+#endif
yieldThread();
}
+
+ t2 = getProcessElapsedTime();
+ if (RtsFlags.GcFlags.longGCSync != 0 &&
+ t2 - t1 > RtsFlags.GcFlags.longGCSync) {
+ /* call this every longGCSync of delay */
+ rtsConfig.longGCSync(cap->no, t2 - t0);
+ t1 = t2;
+ }
+ if (retry) {
+#if defined(PROF_SPIN)
+ // This is a bit strange, we'll get more yields than spins.
+ // I guess that means it's not a spin-lock at all, but these
+ // numbers are still useful (I think).
+ waitForGcThreads_spin++;
+#endif
+ }
+ }
+
+ if (RtsFlags.GcFlags.longGCSync != 0 &&
+ t2 - t0 > RtsFlags.GcFlags.longGCSync) {
+ rtsConfig.longGCSyncEnd(t2 - t0);
}
}
@@ -1324,7 +1409,7 @@ prepare_collected_gen (generation *gen)
bdescr *bitmap_bdescr;
StgWord *bitmap;
- bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+ bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
if (bitmap_size > 0) {
bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
@@ -1342,7 +1427,7 @@ prepare_collected_gen (generation *gen)
// block descriptor.
for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
bd->u.bitmap = bitmap;
- bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
+ bitmap += BLOCK_SIZE_W / BITS_IN(W_);
// Also at this point we set the BF_MARKED flag
// for this block. The invariant is that
@@ -1446,9 +1531,6 @@ collect_gct_blocks (void)
take a global lock. Here we collect those blocks from the
cap->pinned_object_blocks lists and put them on the
main g0->large_object list.
-
- Returns: the number of words allocated this way, for stats
- purposes.
-------------------------------------------------------------------------- */
static void
@@ -1744,8 +1826,8 @@ resize_nursery (void)
Sanity code for CAF garbage collection.
With DEBUG turned on, we manage a CAF list in addition to the SRT
- mechanism. After GC, we run down the CAF list and blackhole any
- CAFs which have been garbage collected. This means we get an error
+ mechanism. After GC, we run down the CAF list and make any
+ CAFs which have been garbage collected GCD_CAF. This means we get an error
whenever the program tries to enter a garbage collected CAF.
Any garbage collected CAFs are taken off the CAF list at the same
@@ -1771,7 +1853,10 @@ static void gcCAFs(void)
info = get_itbl((StgClosure*)p);
ASSERT(info->type == IND_STATIC);
- if (p->static_link == NULL) {
+ // See Note [STATIC_LINK fields] in Storage.h
+ // This condition identifies CAFs that have just been GC'd and
+ // don't have static_link==3 which means they should be ignored.
+ if ((((StgWord)(p->static_link)&STATIC_BITS) | prev_static_flag) != 3) {
debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
if (prev == NULL) {
@@ -1788,3 +1873,28 @@ static void gcCAFs(void)
debugTrace(DEBUG_gccafs, "%d CAFs live", i);
}
#endif
+
+
+/* -----------------------------------------------------------------------------
+ The GC can leave some work for the mutator to do before the next
+ GC, provided the work can be safely overlapped with mutation. This
+ can help reduce the GC pause time.
+
+ The mutator can call doIdleGCWork() any time it likes, but
+ preferably when it is idle. It's safe for multiple capabilities to
+ call doIdleGCWork().
+
+ When 'all' is
+ * false: doIdleGCWork() should only take a short, bounded, amount
+ of time.
+ * true: doIdleGCWork() will complete all the outstanding GC work.
+
+ The return value is
+ * true if there's more to do (only if 'all' is false).
+ * false otherwise.
+ -------------------------------------------------------------------------- */
+
+bool doIdleGCWork(Capability *cap STG_UNUSED, bool all)
+{
+ return runSomeFinalizers(all);
+}
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index c6b0c13a46..437a25f8d9 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -26,6 +26,8 @@ typedef void (*evac_fn)(void *user, StgClosure **root);
StgClosure * isAlive ( StgClosure *p );
void markCAFs ( evac_fn evac, void *user );
+bool doIdleGCWork(Capability *cap, bool all);
+
extern uint32_t N;
extern bool major_gc;
@@ -40,13 +42,13 @@ extern uint32_t mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS,
mutlist_TVAR,
mutlist_TVAR_WATCH_QUEUE,
mutlist_TREC_CHUNK,
- mutlist_TREC_HEADER,
- mutlist_ATOMIC_INVARIANT,
- mutlist_INVARIANT_CHECK_QUEUE;
+ mutlist_TREC_HEADER;
#endif
#if defined(PROF_SPIN) && defined(THREADED_RTS)
-extern StgWord64 whitehole_spin;
+extern volatile StgWord64 whitehole_gc_spin;
+extern volatile StgWord64 waitForGcThreads_spin;
+extern volatile StgWord64 waitForGcThreads_yield;
#endif
void gcWorkerThread (Capability *cap);
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index bb206db64c..e865dabe5d 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -116,6 +116,12 @@ typedef struct gen_workspace_ {
of the GC threads
------------------------------------------------------------------------- */
+/* values for the wakeup field */
+#define GC_THREAD_INACTIVE 0
+#define GC_THREAD_STANDING_BY 1
+#define GC_THREAD_RUNNING 2
+#define GC_THREAD_WAITING_TO_CONTINUE 3
+
typedef struct gc_thread_ {
Capability *cap;
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 0373c2b925..31b2913a37 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -81,6 +81,14 @@ freeChain_sync(bdescr *bd)
RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
}
+void
+freeGroup_sync(bdescr *bd)
+{
+ ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ freeGroup(bd);
+ RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+}
+
/* -----------------------------------------------------------------------------
Workspace utilities
-------------------------------------------------------------------------- */
@@ -261,7 +269,7 @@ todo_block_full (uint32_t size, gen_workspace *ws)
// object. However, if the object we're copying is
// larger than a block, then we might have an empty
// block here.
- freeGroup(bd);
+ freeGroup_sync(bd);
} else {
push_scanned_block(bd, ws);
}
@@ -341,24 +349,3 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)
return ws->todo_free;
}
-
-/* -----------------------------------------------------------------------------
- * Debugging
- * -------------------------------------------------------------------------- */
-
-#if defined(DEBUG)
-void
-printMutableList(bdescr *bd)
-{
- StgPtr p;
-
- debugBelch("mutable list %p: ", bd);
-
- for (; bd != NULL; bd = bd->link) {
- for (p = bd->start; p < bd->free; p++) {
- debugBelch("%p (%s), ", (void *)*p, info_type((StgClosure *)*p));
- }
- }
- debugBelch("\n");
-}
-#endif /* DEBUG */
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index 2e2d4b199d..8b6040769e 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -31,6 +31,7 @@ INLINE_HEADER bdescr *allocBlockOnNode_sync(uint32_t node)
}
void freeChain_sync(bdescr *bd);
+void freeGroup_sync(bdescr *bd);
void push_scanned_block (bdescr *bd, gen_workspace *ws);
StgPtr todo_block_full (uint32_t size, gen_workspace *ws);
@@ -50,11 +51,6 @@ isPartiallyFull(bdescr *bd)
return (bd->free + WORK_UNIT_WORDS < bd->start + BLOCK_SIZE_W);
}
-
-#if defined(DEBUG)
-void printMutableList (bdescr *bd);
-#endif
-
// Version of recordMutableGen for use during GC. This uses the
// mutable lists attached to the current gc_thread structure, which
// are the same as the mutable lists on the Capability.
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index 9a077b3d14..88037f6a34 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -32,11 +32,11 @@
/* -----------------------------------------------------------------------------
Weak Pointers
- traverse_weak_ptr_list is called possibly many times during garbage
+ traverseWeakPtrList is called possibly many times during garbage
collection. It returns a flag indicating whether it did any work
(i.e. called evacuate on any live pointers).
- Invariant: traverse_weak_ptr_list is called when the heap is in an
+ Invariant: traverseWeakPtrList is called when the heap is in an
idempotent state. That means that there are no pending
evacuate/scavenge operations. This invariant helps the weak
pointer code decide which weak pointers are dead - if there are no
@@ -60,7 +60,7 @@
Now, we discover which *threads* are still alive. Pointers to
threads from the all_threads and main thread lists are the
- weakest of all: a pointers from the finalizer of a dead weak
+ weakest of all: a pointer from the finalizer of a dead weak
pointer can keep a thread alive. Any threads found to be unreachable
are evacuated and placed on the resurrected_threads list so we
can send them a signal later.
@@ -72,7 +72,7 @@
-------------------------------------------------------------------------- */
/* Which stage of processing various kinds of weak pointer are we at?
- * (see traverse_weak_ptr_list() below for discussion).
+ * (see traverseWeakPtrList() below for discussion).
*/
typedef enum { WeakPtrs, WeakThreads, WeakDone } WeakStage;
static WeakStage weak_stage;
@@ -185,7 +185,7 @@ traverseWeakPtrList(void)
}
default:
- barf("traverse_weak_ptr_list");
+ barf("traverseWeakPtrList");
return true;
}
}
@@ -344,7 +344,7 @@ static void tidyThreadList (generation *gen)
if (tmp == NULL) {
// not alive (yet): leave this thread on the
- // old_all_threads list.
+ // old_threads list.
prev = &(t->global_link);
}
else {
@@ -378,14 +378,13 @@ static void checkWeakPtrSanity(StgWeak *hd, StgWeak *tl)
void collectFreshWeakPtrs()
{
uint32_t i;
- generation *gen = &generations[0];
// move recently allocated weak_ptr_list to the old list as well
for (i = 0; i < n_capabilities; i++) {
Capability *cap = capabilities[i];
if (cap->weak_ptr_list_tl != NULL) {
IF_DEBUG(sanity, checkWeakPtrSanity(cap->weak_ptr_list_hd, cap->weak_ptr_list_tl));
- cap->weak_ptr_list_tl->link = gen->weak_ptr_list;
- gen->weak_ptr_list = cap->weak_ptr_list_hd;
+ cap->weak_ptr_list_tl->link = g0->weak_ptr_list;
+ g0->weak_ptr_list = cap->weak_ptr_list_hd;
cap->weak_ptr_list_tl = NULL;
cap->weak_ptr_list_hd = NULL;
} else {
diff --git a/rts/sm/OSMem.h b/rts/sm/OSMem.h
index 3b0cee9630..7dd0efdc23 100644
--- a/rts/sm/OSMem.h
+++ b/rts/sm/OSMem.h
@@ -18,6 +18,7 @@ void osFreeAllMBlocks(void);
size_t getPageSize (void);
StgWord64 getPhysicalMemorySize (void);
void setExecutable (void *p, W_ len, bool exec);
+bool osBuiltWithNumaSupport(void); // See #14956
bool osNumaAvailable(void);
uint32_t osNumaNodes(void);
uint64_t osNumaMask(void);
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 53b101024a..8d4171b1cd 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -380,8 +380,8 @@ checkClosure( const StgClosure* p )
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
{
StgMutArrPtrs* a = (StgMutArrPtrs *)p;
uint32_t i;
@@ -391,6 +391,18 @@ checkClosure( const StgClosure* p )
return mut_arr_ptrs_sizeW(a);
}
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ {
+ StgSmallMutArrPtrs *a = (StgSmallMutArrPtrs *)p;
+ for (uint32_t i = 0; i < a->ptrs; i++) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(a->payload[i]));
+ }
+ return small_mut_arr_ptrs_sizeW(a);
+ }
+
case TSO:
checkTSO((StgTSO *)p);
return sizeofW(StgTSO);
@@ -535,7 +547,8 @@ checkTSO(StgTSO *tso)
ASSERT(next == END_TSO_QUEUE ||
info == &stg_MVAR_TSO_QUEUE_info ||
info == &stg_TSO_info ||
- info == &stg_WHITEHOLE_info); // happens due to STM doing lockTSO()
+ info == &stg_WHITEHOLE_info); // used to happen due to STM doing
+ // lockTSO(), might not happen now
if ( tso->why_blocked == BlockedOnMVar
|| tso->why_blocked == BlockedOnMVarRead
@@ -677,7 +690,7 @@ checkStaticObjects ( StgClosure* static_objects )
break;
case FUN_STATIC:
- p = *FUN_STATIC_LINK((StgClosure *)p);
+ p = *STATIC_LINK(info,(StgClosure *)p);
break;
case CONSTR:
@@ -859,7 +872,7 @@ void findSlop(bdescr *bd)
slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
if (slop > (1024/sizeof(W_))) {
debugBelch("block at %p (bdescr %p) has %" FMT_Word "KB slop\n",
- bd->start, bd, slop / (1024/sizeof(W_)));
+ bd->start, bd, slop / (1024/(W_)sizeof(W_)));
}
}
}
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 1ae8a4c19b..2f61914e55 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -11,6 +11,37 @@
*
* ---------------------------------------------------------------------------*/
+/* ----------------------------------------------------------------------------
+ We have two main scavenge functions:
+
+ - scavenge_block(bdescr *bd)
+ - scavenge_one(StgPtr p)
+
+ As the names and parameters suggest, first one scavenges a whole block while
+ the second one only scavenges one object. This however is not the only
+ difference. scavenge_block scavenges all SRTs while scavenge_one only
+ scavenges SRTs of stacks. The reason is because scavenge_one is called in two
+ cases:
+
+ - When scavenging a mut_list
+ - When scavenging a large object
+
+ We don't have to scavenge SRTs when scavenging a mut_list, because we only
+ scavenge mut_lists in minor GCs, and static objects are only collected in
+ major GCs.
+
+ However, because scavenge_one is also used to scavenge large objects (which
+ are scavenged even in major GCs), we need to deal with SRTs of large
+ objects. We never allocate large FUNs and THUNKs, but we allocate large
+ STACKs (e.g. in threadStackOverflow), and stack frames can have SRTs. So
+ scavenge_one skips FUN and THUNK SRTs but scavenges stack frame SRTs.
+
+ In summary, in a major GC:
+
+ - scavenge_block() scavenges all SRTs
+ - scavenge_one() scavenges only stack frame SRTs
+ ------------------------------------------------------------------------- */
+
#include "PosixSource.h"
#include "Rts.h"
@@ -329,105 +360,17 @@ scavenge_AP (StgAP *ap)
Scavenge SRTs
-------------------------------------------------------------------------- */
-/* Similar to scavenge_large_bitmap(), but we don't write back the
- * pointers we get back from evacuate().
- */
-static void
-scavenge_large_srt_bitmap( StgLargeSRT *large_srt )
-{
- uint32_t i, j, size;
- StgWord bitmap;
- StgClosure **p;
-
- size = (uint32_t)large_srt->l.size;
- p = (StgClosure **)large_srt->srt;
-
- for (i = 0; i < size / BITS_IN(W_); i++) {
- bitmap = large_srt->l.bitmap[i];
- // skip zero words: bitmaps can be very sparse, and this helps
- // performance a lot in some cases.
- if (bitmap != 0) {
- for (j = 0; j < BITS_IN(W_); j++) {
- if ((bitmap & 1) != 0) {
- evacuate(p);
- }
- p++;
- bitmap = bitmap >> 1;
- }
- } else {
- p += BITS_IN(W_);
- }
- }
- if (size % BITS_IN(W_) != 0) {
- bitmap = large_srt->l.bitmap[i];
- for (j = 0; j < size % BITS_IN(W_); j++) {
- if ((bitmap & 1) != 0) {
- evacuate(p);
- }
- p++;
- bitmap = bitmap >> 1;
- }
- }
-}
-
-/* evacuate the SRT. If srt_bitmap is zero, then there isn't an
- * srt field in the info table. That's ok, because we'll
- * never dereference it.
- */
-STATIC_INLINE GNUC_ATTR_HOT void
-scavenge_srt (StgClosure **srt, uint32_t srt_bitmap)
-{
- uint32_t bitmap;
- StgClosure **p;
-
- bitmap = srt_bitmap;
- p = srt;
-
- if (bitmap == (StgHalfWord)(-1)) {
- scavenge_large_srt_bitmap( (StgLargeSRT *)srt );
- return;
- }
-
- while (bitmap != 0) {
- if ((bitmap & 1) != 0) {
-#if defined(COMPILING_WINDOWS_DLL)
- // Special-case to handle references to closures hiding out in DLLs, since
- // double indirections required to get at those. The code generator knows
- // which is which when generating the SRT, so it stores the (indirect)
- // reference to the DLL closure in the table by first adding one to it.
- // We check for this here, and undo the addition before evacuating it.
- //
- // If the SRT entry hasn't got bit 0 set, the SRT entry points to a
- // closure that's fixed at link-time, and no extra magic is required.
- if ( (W_)(*srt) & 0x1 ) {
- evacuate( (StgClosure**) ((W_) (*srt) & ~0x1));
- } else {
- evacuate(p);
- }
-#else
- evacuate(p);
-#endif
- }
- p++;
- bitmap = bitmap >> 1;
- }
-}
-
-
STATIC_INLINE GNUC_ATTR_HOT void
scavenge_thunk_srt(const StgInfoTable *info)
{
StgThunkInfoTable *thunk_info;
- uint32_t bitmap;
if (!major_gc) return;
thunk_info = itbl_to_thunk_itbl(info);
- bitmap = thunk_info->i.srt_bitmap;
- if (bitmap) {
- // don't read srt_offset if bitmap==0, because it doesn't exist
- // and so the memory might not be readable.
- scavenge_srt((StgClosure **)GET_SRT(thunk_info), bitmap);
+ if (thunk_info->i.srt) {
+ StgClosure *srt = (StgClosure*)GET_SRT(thunk_info);
+ evacuate(&srt);
}
}
@@ -435,16 +378,13 @@ STATIC_INLINE GNUC_ATTR_HOT void
scavenge_fun_srt(const StgInfoTable *info)
{
StgFunInfoTable *fun_info;
- uint32_t bitmap;
if (!major_gc) return;
fun_info = itbl_to_fun_itbl(info);
- bitmap = fun_info->i.srt_bitmap;
- if (bitmap) {
- // don't read srt_offset if bitmap==0, because it doesn't exist
- // and so the memory might not be readable.
- scavenge_srt((StgClosure **)GET_FUN_SRT(fun_info), bitmap);
+ if (fun_info->i.srt) {
+ StgClosure *srt = (StgClosure*)GET_FUN_SRT(fun_info);
+ evacuate(&srt);
}
}
@@ -737,18 +677,16 @@ scavenge_block (bdescr *bd)
break;
}
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
- // If we're going to put this object on the mutable list, then
- // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -780,8 +718,8 @@ scavenge_block (bdescr *bd)
break;
}
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgPtr next;
@@ -791,12 +729,10 @@ scavenge_block (bdescr *bd)
evacuate((StgClosure **)p);
}
- // If we're going to put this object on the mutable list, then
- // set its info ptr to SMALL_MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1133,20 +1069,18 @@ scavenge_mark_stack(void)
break;
}
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgPtr q = p;
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
- // If we're going to put this object on the mutable list, then
- // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1180,8 +1114,8 @@ scavenge_mark_stack(void)
break;
}
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
// follow everything
{
StgPtr next, q = p;
@@ -1191,12 +1125,10 @@ scavenge_mark_stack(void)
evacuate((StgClosure **)p);
}
- // If we're going to put this object on the mutable list, then
- // set its info ptr to SMALL_MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1365,7 +1297,7 @@ scavenge_one(StgPtr p)
case WEAK:
// This WEAK object will not be considered by tidyWeakList during this
- // collection because it is in a generation >= N, but it is on the
+ // collection because it is in a generation > N, but it is on the
// mutable list so we must evacuate all of its pointers because some
// of them may point into a younger generation.
scavengeLiveWeak((StgWeak *)p);
@@ -1457,18 +1389,16 @@ scavenge_one(StgPtr p)
break;
}
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
{
// follow everything
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
- // If we're going to put this object on the mutable list, then
- // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1502,8 +1432,8 @@ scavenge_one(StgPtr p)
break;
}
- case SMALL_MUT_ARR_PTRS_FROZEN:
- case SMALL_MUT_ARR_PTRS_FROZEN0:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
{
// follow everything
StgPtr next, q=p;
@@ -1513,12 +1443,10 @@ scavenge_one(StgPtr p)
evacuate((StgClosure **)p);
}
- // If we're going to put this object on the mutable list, then
- // set its info ptr to SMALL_MUT_ARR_PTRS_FROZEN0 to indicate that.
if (gct->failed_to_evac) {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN0_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
- ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_info;
+ ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info;
}
break;
}
@@ -1653,8 +1581,8 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
mutlist_MUTVARS++; break;
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN:
- case MUT_ARR_PTRS_FROZEN0:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
mutlist_MUTARRS++; break;
case MVAR_CLEAN:
barf("MVAR_CLEAN on mutable list");
@@ -1669,10 +1597,6 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
mutlist_TVAR_WATCH_QUEUE++;
else if (((StgClosure*)p)->header.info == &stg_TREC_HEADER_info)
mutlist_TREC_HEADER++;
- else if (((StgClosure*)p)->header.info == &stg_ATOMIC_INVARIANT_info)
- mutlist_ATOMIC_INVARIANT++;
- else if (((StgClosure*)p)->header.info == &stg_INVARIANT_CHECK_QUEUE_info)
- mutlist_INVARIANT_CHECK_QUEUE++;
else
mutlist_OTHERS++;
break;
@@ -1690,6 +1614,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
//
switch (get_itbl((StgClosure *)p)->type) {
case MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_CLEAN:
recordMutableGen_GC((StgClosure *)p,gen_no);
continue;
case MUT_ARR_PTRS_DIRTY:
@@ -1813,7 +1738,11 @@ scavenge_static(void)
case FUN_STATIC:
scavenge_fun_srt(info);
- break;
+ /* fallthrough */
+
+ // a FUN_STATIC can also be an SRT, so it may have pointer
+ // fields. See Note [SRTs] in CmmBuildInfoTables, specifically
+ // the [FUN] optimisation.
case CONSTR:
case CONSTR_NOCAF:
@@ -1979,8 +1908,10 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
p = scavenge_small_bitmap(p, size, bitmap);
follow_srt:
- if (major_gc)
- scavenge_srt((StgClosure **)GET_SRT(info), info->i.srt_bitmap);
+ if (major_gc && info->i.srt) {
+ StgClosure *srt = (StgClosure*)GET_SRT(info);
+ evacuate(&srt);
+ }
continue;
case RET_BCO: {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index ffaed5f17c..dcc5b3a3c7 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -197,11 +197,7 @@ initStorage (void)
#if defined(THREADED_RTS)
initSpinLock(&gc_alloc_block_sync);
-#if defined(PROF_SPIN)
- whitehole_spin = 0;
#endif
-#endif
-
N = 0;
for (n = 0; n < n_numa_nodes; n++) {
@@ -224,6 +220,7 @@ initStorage (void)
void storageAddCapabilities (uint32_t from, uint32_t to)
{
uint32_t n, g, i, new_n_nurseries;
+ nursery *old_nurseries;
if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
new_n_nurseries = to;
@@ -233,6 +230,7 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
}
+ old_nurseries = nurseries;
if (from > 0) {
nurseries = stgReallocBytes(nurseries,
new_n_nurseries * sizeof(struct nursery_),
@@ -244,8 +242,9 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
// we've moved the nurseries, so we have to update the rNursery
// pointers from the Capabilities.
- for (i = 0; i < to; i++) {
- capabilities[i]->r.rNursery = &nurseries[i];
+ for (i = 0; i < from; i++) {
+ uint32_t index = capabilities[i]->r.rNursery - old_nurseries;
+ capabilities[i]->r.rNursery = &nurseries[index];
}
/* The allocation area. Policy: keep the allocation area
@@ -307,21 +306,21 @@ freeStorage (bool free_heap)
The entry code for every CAF does the following:
- - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically
+ - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically
updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
- - if newCaf returns zero, it re-enters the CAF (see Note [atomic
+ - if newCAF returns zero, it re-enters the CAF (see Note [atomic
CAF entry])
- pushes an update frame pointing to the CAF_BLACKHOLE
- Why do we build an BLACKHOLE in the heap rather than just updating
+ Why do we build a BLACKHOLE in the heap rather than just updating
the thunk directly? It's so that we only need one kind of update
frame - otherwise we'd need a static version of the update frame
too, and various other parts of the RTS that deal with update
frames would also need special cases for static update frames.
- newCaf() does the following:
+ newCAF() does the following:
- atomically locks the CAF (see [atomic CAF entry])
@@ -339,7 +338,7 @@ freeStorage (bool free_heap)
------------------
Note [atomic CAF entry]
- With THREADED_RTS, newCaf() is required to be atomic (see
+ With THREADED_RTS, newCAF() is required to be atomic (see
#5558). This is because if two threads happened to enter the same
CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
and so the normal threadPaused() machinery for detecting duplicate
@@ -359,7 +358,7 @@ freeStorage (bool free_heap)
- we must be able to *revert* CAFs that have been evaluated, to
their pre-evaluated form.
- To do this, we use an additional CAF list. When newCaf() is
+ To do this, we use an additional CAF list. When newCAF() is
called on a dynamically-loaded CAF, we add it to the CAF list
instead of the old-generation mutable list, and save away its
old info pointer (in caf->saved_info) for later reversion.
@@ -796,6 +795,20 @@ move_STACK (StgStack *src, StgStack *dest)
dest->sp = (StgPtr)dest->sp + diff;
}
+STATIC_INLINE void
+accountAllocation(Capability *cap, W_ n)
+{
+ TICK_ALLOC_HEAP_NOCTR(WDS(n));
+ CCS_ALLOC(cap->r.rCCCS,n);
+ if (cap->r.rCurrentTSO != NULL) {
+ // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
+ ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
+ (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
+ - n*sizeof(W_)));
+ }
+
+}
+
/* -----------------------------------------------------------------------------
StgPtr allocate (Capability *cap, W_ n)
@@ -812,21 +825,37 @@ move_STACK (StgStack *src, StgStack *dest)
that operation fails, then the whole process will be killed.
-------------------------------------------------------------------------- */
+/*
+ * Allocate some n words of heap memory; terminating
+ * on heap overflow
+ */
StgPtr
allocate (Capability *cap, W_ n)
{
+ StgPtr p = allocateMightFail(cap, n);
+ if (p == NULL) {
+ reportHeapOverflow();
+ // heapOverflow() doesn't exit (see #2592), but we aren't
+ // in a position to do a clean shutdown here: we
+ // either have to allocate the memory or exit now.
+ // Allocating the memory would be bad, because the user
+ // has requested that we not exceed maxHeapSize, so we
+ // just exit.
+ stg_exit(EXIT_HEAPOVERFLOW);
+ }
+ return p;
+}
+
+/*
+ * Allocate some n words of heap memory; returning NULL
+ * on heap overflow
+ */
+StgPtr
+allocateMightFail (Capability *cap, W_ n)
+{
bdescr *bd;
StgPtr p;
- TICK_ALLOC_HEAP_NOCTR(WDS(n));
- CCS_ALLOC(cap->r.rCCCS,n);
- if (cap->r.rCurrentTSO != NULL) {
- // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
- ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
- (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
- - n*sizeof(W_)));
- }
-
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
// The largest number of words such that
// the computation of req_blocks will not overflow.
@@ -845,16 +874,12 @@ allocate (Capability *cap, W_ n)
req_blocks >= HS_INT32_MAX) // avoid overflow when
// calling allocGroup() below
{
- reportHeapOverflow();
- // heapOverflow() doesn't exit (see #2592), but we aren't
- // in a position to do a clean shutdown here: we
- // either have to allocate the memory or exit now.
- // Allocating the memory would be bad, because the user
- // has requested that we not exceed maxHeapSize, so we
- // just exit.
- stg_exit(EXIT_HEAPOVERFLOW);
+ return NULL;
}
+ // Only credit allocation after we've passed the size check above
+ accountAllocation(cap, n);
+
ACQUIRE_SM_LOCK
bd = allocGroupOnNode(cap->node,req_blocks);
dbl_link_onto(bd, &g0->large_objects);
@@ -870,6 +895,7 @@ allocate (Capability *cap, W_ n)
/* small allocation (<LARGE_OBJECT_THRESHOLD) */
+ accountAllocation(cap, n);
bd = cap->r.rCurrentAlloc;
if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
@@ -955,7 +981,8 @@ allocate (Capability *cap, W_ n)
to pinned ByteArrays, not scavenging is ok.
This function is called by newPinnedByteArray# which immediately
- fills the allocated memory with a MutableByteArray#.
+ fills the allocated memory with a MutableByteArray#. Note that
+ this returns NULL on heap overflow.
------------------------------------------------------------------------- */
StgPtr
@@ -967,20 +994,16 @@ allocatePinned (Capability *cap, W_ n)
// If the request is for a large object, then allocate()
// will give us a pinned object anyway.
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
- p = allocate(cap, n);
- Bdescr(p)->flags |= BF_PINNED;
- return p;
- }
-
- TICK_ALLOC_HEAP_NOCTR(WDS(n));
- CCS_ALLOC(cap->r.rCCCS,n);
- if (cap->r.rCurrentTSO != NULL) {
- // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
- ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
- (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
- - n*sizeof(W_)));
+ p = allocateMightFail(cap, n);
+ if (p == NULL) {
+ return NULL;
+ } else {
+ Bdescr(p)->flags |= BF_PINNED;
+ return p;
+ }
}
+ accountAllocation(cap, n);
bd = cap->pinned_object_block;
// If we don't have a block of pinned objects yet, or the current
@@ -1135,7 +1158,7 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p)
* -------------------------------------------------------------------------- */
/* -----------------------------------------------------------------------------
- * [Note allocation accounting]
+ * Note [allocation accounting]
*
* - When cap->r.rCurrentNusery moves to a new block in the nursery,
* we add the size of the used portion of the previous block to
@@ -1241,16 +1264,15 @@ W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
* to store bitmaps and the mark stack. Note: blocks_needed does not
* include the blocks in the nursery.
*
- * Assume: all data currently live will remain live. Generationss
+ * Assume: all data currently live will remain live. Generations
* that will be collected next time will therefore need twice as many
* blocks since all the data will be copied.
*/
extern W_
calcNeeded (bool force_major, memcount *blocks_needed)
{
- W_ needed = 0, blocks;
- uint32_t g, N;
- generation *gen;
+ W_ needed = 0;
+ uint32_t N;
if (force_major) {
N = RtsFlags.GcFlags.generations - 1;
@@ -1258,12 +1280,12 @@ calcNeeded (bool force_major, memcount *blocks_needed)
N = 0;
}
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- gen = &generations[g];
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ generation *gen = &generations[g];
- blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
- + gen->n_large_blocks
- + gen->n_compact_blocks;
+ W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
+ + gen->n_large_blocks
+ + gen->n_compact_blocks;
// we need at least this much space
needed += blocks;