diff options
| author | Ben Gamari <ben@smart-cactus.org> | 2020-11-01 13:10:01 -0500 | 
|---|---|---|
| committer | Ben Gamari <ben@smart-cactus.org> | 2020-11-01 13:10:01 -0500 | 
| commit | b8e66e0eecdc58ec5fea0b2c9a9454d38858886c (patch) | |
| tree | 7d25f3ee8f2b714175d1b5647d9aec1fdb550cc1 | |
| parent | b4686bff56377a583f0605b81fae290d3fee4c4a (diff) | |
| parent | 3a18155331e07e53b9f3b1d987ed430066b17aa4 (diff) | |
| download | haskell-b8e66e0eecdc58ec5fea0b2c9a9454d38858886c.tar.gz | |
Merge branch 'wip/tsan/storage' into wip/tsan/all
| -rw-r--r-- | includes/rts/OSThreads.h | 3 | ||||
| -rw-r--r-- | includes/rts/SpinLock.h | 15 | ||||
| -rw-r--r-- | includes/rts/StablePtr.h | 6 | ||||
| -rw-r--r-- | includes/rts/storage/GC.h | 6 | ||||
| -rw-r--r-- | includes/stg/SMP.h | 1 | ||||
| -rw-r--r-- | rts/Capability.h | 6 | ||||
| -rw-r--r-- | rts/SMPClosureOps.h | 5 | ||||
| -rw-r--r-- | rts/Schedule.c | 4 | ||||
| -rw-r--r-- | rts/SpinLock.c | 41 | ||||
| -rw-r--r-- | rts/StablePtr.c | 9 | ||||
| -rw-r--r-- | rts/Updates.h | 6 | ||||
| -rw-r--r-- | rts/Weak.c | 29 | ||||
| -rw-r--r-- | rts/posix/OSThreads.c | 8 | ||||
| -rw-r--r-- | rts/rts.cabal.in | 1 | ||||
| -rw-r--r-- | rts/sm/BlockAlloc.c | 38 | ||||
| -rw-r--r-- | rts/sm/Evac.c | 139 | ||||
| -rw-r--r-- | rts/sm/GC.c | 137 | ||||
| -rw-r--r-- | rts/sm/GC.h | 28 | ||||
| -rw-r--r-- | rts/sm/GCAux.c | 2 | ||||
| -rw-r--r-- | rts/sm/GCUtils.c | 13 | ||||
| -rw-r--r-- | rts/sm/GCUtils.h | 4 | ||||
| -rw-r--r-- | rts/sm/MarkWeak.c | 5 | ||||
| -rw-r--r-- | rts/sm/NonMoving.c | 1 | ||||
| -rw-r--r-- | rts/sm/Scav.c | 142 | ||||
| -rw-r--r-- | rts/sm/Storage.c | 48 | ||||
| -rw-r--r-- | rts/sm/Storage.h | 7 | ||||
| -rw-r--r-- | rts/win32/OSThreads.c | 9 | 
27 files changed, 431 insertions, 282 deletions
diff --git a/includes/rts/OSThreads.h b/includes/rts/OSThreads.h index a68f1ea140..21b92950b2 100644 --- a/includes/rts/OSThreads.h +++ b/includes/rts/OSThreads.h @@ -164,7 +164,8 @@ typedef void* OSThreadProcAttr OSThreadProc(void *);  extern int  createOSThread        ( OSThreadId* tid, char *name,                                      OSThreadProc *startProc, void *param);  extern bool osThreadIsAlive       ( OSThreadId id ); -extern void interruptOSThread     (OSThreadId id); +extern void interruptOSThread     ( OSThreadId id ); +extern void joinOSThread          ( OSThreadId id );  //  // Condition Variables diff --git a/includes/rts/SpinLock.h b/includes/rts/SpinLock.h index 0ac51455dd..c1fe6c866c 100644 --- a/includes/rts/SpinLock.h +++ b/includes/rts/SpinLock.h @@ -39,19 +39,14 @@ typedef struct SpinLock_  #define IF_PROF_SPIN(x)  #endif +void acquire_spin_lock_slow_path(SpinLock * p); +  // acquire spin lock  INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p)  { -    do { -        for (uint32_t i = 0; i < SPIN_COUNT; i++) { -            StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0); -            if (r != 0) return; -            IF_PROF_SPIN(__atomic_fetch_add(&p->spin, 1, __ATOMIC_RELAXED)); -            busy_wait_nop(); -        } -        IF_PROF_SPIN(__atomic_fetch_add(&p->yield, 1, __ATOMIC_RELAXED)); -        yieldThread(); -    } while (1); +    StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0); +    if (RTS_UNLIKELY(r == 0)) +        acquire_spin_lock_slow_path(p);  }  // release spin lock diff --git a/includes/rts/StablePtr.h b/includes/rts/StablePtr.h index f42c353d2b..56113b9f81 100644 --- a/includes/rts/StablePtr.h +++ b/includes/rts/StablePtr.h @@ -31,5 +31,9 @@ extern DLL_IMPORT_RTS spEntry *stable_ptr_table;  EXTERN_INLINE  StgPtr deRefStablePtr(StgStablePtr sp)  { -    return stable_ptr_table[(StgWord)sp].addr; +    // acquire load to ensure that we see the new SPT if it has been recently +    // enlarged. +    const spEntry *spt = ACQUIRE_LOAD(&stable_ptr_table); +    // acquire load to ensure that the referenced object is visible. +    return ACQUIRE_LOAD(&spt[(StgWord)sp].addr);  } diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index 9f4a0dde07..e8dc05048a 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -247,9 +247,9 @@ extern bool keepCAFs;  INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest)  { -    bd->gen     = gen; -    bd->gen_no  = gen->no; -    bd->dest_no = dest->no; +    RELAXED_STORE(&bd->gen, gen); +    RELAXED_STORE(&bd->gen_no, gen->no); +    RELAXED_STORE(&bd->dest_no, dest->no);  #if !IN_STG_CODE      /* See Note [RtsFlags is a pointer in STG code] */ diff --git a/includes/stg/SMP.h b/includes/stg/SMP.h index 9390c00eb1..8eff276e60 100644 --- a/includes/stg/SMP.h +++ b/includes/stg/SMP.h @@ -467,6 +467,7 @@ EXTERN_INLINE void load_load_barrier () {} /* nothing */  // Relaxed atomic operations  #define RELAXED_LOAD(ptr) *ptr  #define RELAXED_STORE(ptr,val) *ptr = val +#define RELAXED_ADD(ptr,val) *ptr += val  // Acquire/release atomic operations  #define ACQUIRE_LOAD(ptr) *ptr diff --git a/rts/Capability.h b/rts/Capability.h index bc2e48412a..8c5b1e814e 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -419,14 +419,16 @@ recordMutableCap (const StgClosure *p, Capability *cap, uint32_t gen)      //    ASSERT(cap->running_task == myTask());      // NO: assertion is violated by performPendingThrowTos()      bd = cap->mut_lists[gen]; -    if (bd->free >= bd->start + BLOCK_SIZE_W) { +    if (RELAXED_LOAD(&bd->free) >= bd->start + BLOCK_SIZE_W) {          bdescr *new_bd;          new_bd = allocBlockOnNode_lock(cap->node);          new_bd->link = bd; +        new_bd->free = new_bd->start;          bd = new_bd;          cap->mut_lists[gen] = bd;      } -    *bd->free++ = (StgWord)p; +    RELAXED_STORE(bd->free, (StgWord) p); +    NONATOMIC_ADD(&bd->free, 1);  }  EXTERN_INLINE void diff --git a/rts/SMPClosureOps.h b/rts/SMPClosureOps.h index c73821a782..3191a8c600 100644 --- a/rts/SMPClosureOps.h +++ b/rts/SMPClosureOps.h @@ -119,9 +119,8 @@ tryLockClosure(StgClosure *p)  EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info)  { -    // This is a strictly ordered write, so we need a write_barrier(): -    write_barrier(); -    p->header.info = info; +    // This is a strictly ordered write, so we need a RELEASE ordering. +    RELEASE_STORE(&p->header.info, info);  }  #endif /* CMINUSMINUS */ diff --git a/rts/Schedule.c b/rts/Schedule.c index 52d89a08fb..b97da30848 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -435,7 +435,7 @@ run_thread:      RELAXED_STORE(&cap->interrupt, false);      cap->in_haskell = true; -    cap->idle = 0; +    RELAXED_STORE(&cap->idle, false);      dirty_TSO(cap,t);      dirty_STACK(cap,t->stackobj); @@ -1793,7 +1793,7 @@ scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,          debugTrace(DEBUG_sched, "%d idle caps", n_idle_caps);          for (i=0; i < n_capabilities; i++) { -            capabilities[i]->idle++; +            NONATOMIC_ADD(&capabilities[i]->idle, 1);          }          // For all capabilities participating in this GC, wait until diff --git a/rts/SpinLock.c b/rts/SpinLock.c new file mode 100644 index 0000000000..5289694aa7 --- /dev/null +++ b/rts/SpinLock.c @@ -0,0 +1,41 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 2006-2009 + * + * Spin locks + * + * These are simple spin-only locks as opposed to Mutexes which + * probably spin for a while before blocking in the kernel.  We use + * these when we are sure that all our threads are actively running on + * a CPU, eg. in the GC. + * + * TODO: measure whether we really need these, or whether Mutexes + * would do (and be a bit safer if a CPU becomes loaded). + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#include "PosixSource.h" +#include "Rts.h" + +#if defined(THREADED_RTS) + +void acquire_spin_lock_slow_path(SpinLock * p) +{ +    do { +        for (uint32_t i = 0; i < SPIN_COUNT; i++) { +            StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0); +            if (r != 0) return; +            IF_PROF_SPIN(RELAXED_ADD(&p->spin, 1)); +            busy_wait_nop(); +        } +        IF_PROF_SPIN(RELAXED_ADD(&p->yield, 1)); +        yieldThread(); +    } while (1); +} + +#endif diff --git a/rts/StablePtr.c b/rts/StablePtr.c index edcd863183..469a17a5b9 100644 --- a/rts/StablePtr.c +++ b/rts/StablePtr.c @@ -191,9 +191,10 @@ enlargeStablePtrTable(void)      /* When using the threaded RTS, the update of stable_ptr_table is assumed to       * be atomic, so that another thread simultaneously dereferencing a stable -     * pointer will always read a valid address. +     * pointer will always read a valid address. Release ordering to ensure +     * that the new table is visible to others.       */ -    stable_ptr_table = new_stable_ptr_table; +    RELEASE_STORE(&stable_ptr_table, new_stable_ptr_table);      initSpEntryFreeList(stable_ptr_table + old_SPT_size, old_SPT_size, NULL);  } @@ -247,7 +248,7 @@ exitStablePtrTable(void)  STATIC_INLINE void  freeSpEntry(spEntry *sp)  { -    sp->addr = (P_)stable_ptr_free; +    RELAXED_STORE(&sp->addr, (P_)stable_ptr_free);      stable_ptr_free = sp;  } @@ -279,7 +280,7 @@ getStablePtr(StgPtr p)    if (!stable_ptr_free) enlargeStablePtrTable();    sp = stable_ptr_free - stable_ptr_table;    stable_ptr_free  = (spEntry*)(stable_ptr_free->addr); -  stable_ptr_table[sp].addr = p; +  RELAXED_STORE(&stable_ptr_table[sp].addr, p);    stablePtrUnlock();    return (StgStablePtr)(sp);  } diff --git a/rts/Updates.h b/rts/Updates.h index 608aaff524..aa5fbe0133 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -76,7 +76,6 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,      /* not necessarily true: ASSERT( !closure_IND(p1) ); */      /* occurs in RaiseAsync.c:raiseAsync() */      /* See Note [Heap memory barriers] in SMP.h */ -    write_barrier();      bdescr *bd = Bdescr((StgPtr)p1);      if (bd->gen_no != 0) {        IF_NONMOVING_WRITE_BARRIER_ENABLED { @@ -88,9 +87,8 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,          TICK_UPD_NEW_IND();      }      OVERWRITING_CLOSURE(p1); -    ((StgInd *)p1)->indirectee = p2; -    write_barrier(); -    SET_INFO(p1, &stg_BLACKHOLE_info); +    RELEASE_STORE(&((StgInd *)p1)->indirectee, p2); +    SET_INFO_RELEASE(p1, &stg_BLACKHOLE_info);      LDV_RECORD_CREATE(p1);  } diff --git a/rts/Weak.c b/rts/Weak.c index fe4516794a..0adf5a8b92 100644 --- a/rts/Weak.c +++ b/rts/Weak.c @@ -57,8 +57,7 @@ runAllCFinalizers(StgWeak *list)          // If there's no major GC between the time that the finalizer for the          // object from the oldest generation is manually called and shutdown          // we end up running the same finalizer twice. See #7170. -        const StgInfoTable *winfo = w->header.info; -        load_load_barrier(); +        const StgInfoTable *winfo = ACQUIRE_LOAD(&w->header.info);          if (winfo != &stg_DEAD_WEAK_info) {              runCFinalizers((StgCFinalizerList *)w->cfinalizers);          } @@ -93,10 +92,10 @@ scheduleFinalizers(Capability *cap, StgWeak *list)      StgWord size;      uint32_t n, i; -    // This assertion does not hold with non-moving collection because -    // non-moving collector does not wait for the list to be consumed (by -    // doIdleGcWork()) before appending the list with more finalizers. -    ASSERT(RtsFlags.GcFlags.useNonmoving || n_finalizers == 0); +    // n_finalizers is not necessarily zero under non-moving collection +    // because non-moving collector does not wait for the list to be consumed +    // (by doIdleGcWork()) before appending the list with more finalizers. +    ASSERT(RtsFlags.GcFlags.useNonmoving || SEQ_CST_LOAD(&n_finalizers) == 0);      // Append finalizer_list with the new list. TODO: Perhaps cache tail of the      // list for faster append. NOTE: We can't append `list` here! Otherwise we @@ -105,7 +104,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list)      while (*tl) {          tl = &(*tl)->link;      } -    *tl = list; +    SEQ_CST_STORE(tl, list);      // Traverse the list and      //  * count the number of Haskell finalizers @@ -140,7 +139,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list)          SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs);      } -    n_finalizers += i; +    SEQ_CST_ADD(&n_finalizers, i);      // No Haskell finalizers to run?      if (n == 0) return; @@ -226,7 +225,7 @@ static volatile StgWord finalizer_lock = 0;  //  bool runSomeFinalizers(bool all)  { -    if (n_finalizers == 0) +    if (RELAXED_LOAD(&n_finalizers) == 0)          return false;      if (cas(&finalizer_lock, 0, 1) != 0) { @@ -252,17 +251,15 @@ bool runSomeFinalizers(bool all)          if (!all && count >= finalizer_chunk) break;      } -    finalizer_list = w; -    n_finalizers -= count; +    RELAXED_STORE(&finalizer_list, w); +    SEQ_CST_ADD(&n_finalizers, -count);      if (task != NULL) {          task->running_finalizers = false;      }      debugTrace(DEBUG_sched, "ran %d C finalizers", count); - -    write_barrier(); -    finalizer_lock = 0; - -    return n_finalizers != 0; +    bool ret = n_finalizers != 0; +    RELEASE_STORE(&finalizer_lock, 0); +    return ret;  } diff --git a/rts/posix/OSThreads.c b/rts/posix/OSThreads.c index c51ccfcafb..6347e8ce7a 100644 --- a/rts/posix/OSThreads.c +++ b/rts/posix/OSThreads.c @@ -398,6 +398,14 @@ interruptOSThread (OSThreadId id)      pthread_kill(id, SIGPIPE);  } +void +joinOSThread (OSThreadId id) +{ +    if (pthread_join(id, NULL) != 0) { +        sysErrorBelch("joinOSThread: error %d", errno); +    } +} +  KernelThreadId kernelThreadId (void)  {  #if defined(linux_HOST_OS) diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 08ebd3d7bf..12a4d68e4a 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -462,6 +462,7 @@ library                 STM.c                 Schedule.c                 Sparks.c +               SpinLock.c                 StableName.c                 StablePtr.c                 StaticPtrTable.c diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index 2bf497197e..451c182ac3 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -787,6 +787,26 @@ free_mega_group (bdescr *mg)  } +/* Note [Data races in freeGroup] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * freeGroup commits a rather serious concurrency sin in its block coalescence + * logic: When freeing a block it looks at bd->free of the previous/next block + * to see whether it is allocated. However, the free'ing thread likely does not + * own the previous/next block, nor do we make any attempt to synchronize with + * the thread that *does* own it; this makes this access a data race. + * + * The original design argued that this was correct because `bd->free` will + * only take a value of -1 when the block is free and thereby owned by the + * storage manager. However, this is nevertheless unsafe under the C11 data + * model, which guarantees no particular semantics for data races. + * + * We currently assume (and hope) we won't see torn values and consequently + * we will never see `bd->free == -1` for an allocated block which we do not + * own. However, this is all extremely dodgy. + * + * This is tracked as #18913. + */ +  void  freeGroup(bdescr *p)  { @@ -796,7 +816,7 @@ freeGroup(bdescr *p)    // not true in multithreaded GC:    // ASSERT_SM_LOCK(); -  ASSERT(p->free != (P_)-1); +  ASSERT(RELAXED_LOAD(&p->free) != (P_)-1);  #if defined(DEBUG)    for (uint32_t i=0; i < p->blocks; i++) { @@ -806,9 +826,9 @@ freeGroup(bdescr *p)    node = p->node; -  p->free = (void *)-1;  /* indicates that this block is free */ -  p->gen = NULL; -  p->gen_no = 0; +  RELAXED_STORE(&p->free, (void *) -1);  /* indicates that this block is free */ +  RELAXED_STORE(&p->gen, NULL); +  RELAXED_STORE(&p->gen_no, 0);    /* fill the block group with garbage if sanity checking is on */    IF_DEBUG(zero_on_gc, memset(p->start, 0xaa, (W_)p->blocks * BLOCK_SIZE)); @@ -834,7 +854,11 @@ freeGroup(bdescr *p)    {        bdescr *next;        next = p + p->blocks; -      if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1) + +      // See Note [Data races in freeGroup]. +      TSAN_ANNOTATE_BENIGN_RACE(&next->free, "freeGroup"); +      if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) +          && RELAXED_LOAD(&next->free) == (P_)-1)        {            p->blocks += next->blocks;            ln = log_2(next->blocks); @@ -855,7 +879,9 @@ freeGroup(bdescr *p)        prev = p - 1;        if (prev->blocks == 0) prev = prev->link; // find the head -      if (prev->free == (P_)-1) +      // See Note [Data races in freeGroup]. +      TSAN_ANNOTATE_BENIGN_RACE(&prev->free, "freeGroup"); +      if (RELAXED_LOAD(&prev->free) == (P_)-1)        {            ln = log_2(prev->blocks);            dbl_link_remove(prev, &free_list[node][ln]); diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 0ece06016a..b324a59179 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -171,7 +171,11 @@ copy_tag(StgClosure **p, const StgInfoTable *info,  #endif              return evacuate(p); // does the failed_to_evac stuff          } else { -            *p = TAG_CLOSURE(tag,(StgClosure*)to); +            // This doesn't need to have RELEASE ordering since we are guaranteed +            // to scavenge the to-space object on the current core therefore +            // no-one else will follow this pointer (FIXME: Is this true in +            // light of the selector optimization?). +            RELEASE_STORE(p, TAG_CLOSURE(tag,(StgClosure*)to));          }      }  #else @@ -206,9 +210,9 @@ copy_tag_nolock(StgClosure **p, const StgInfoTable *info,      // if somebody else reads the forwarding pointer, we better make      // sure there's a closure at the end of it. -    write_barrier(); -    *p = TAG_CLOSURE(tag,(StgClosure*)to); -    src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to); +    RELEASE_STORE(p, TAG_CLOSURE(tag,(StgClosure*)to)); +    RELEASE_STORE(&src->header.info, \ +                  (const StgInfoTable *)MK_FORWARDING_PTR(to));  //  if (to+size+2 < bd->start + BLOCK_SIZE_W) {  //      __builtin_prefetch(to + size + 2, 1); @@ -245,7 +249,7 @@ spin:              goto spin;          }      if (IS_FORWARDING_PTR(info)) { -        src->header.info = (const StgInfoTable *)info; +        RELEASE_STORE(&src->header.info, (const StgInfoTable *)info);          evacuate(p); // does the failed_to_evac stuff          return false;      } @@ -261,9 +265,8 @@ spin:          to[i] = from[i];      } -    write_barrier(); -    *p = (StgClosure *)to; -    src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to); +    RELEASE_STORE(p, (StgClosure *) to); +    RELEASE_STORE(&src->header.info, (const StgInfoTable*)MK_FORWARDING_PTR(to));  #if defined(PROFILING)      // We store the size of the just evacuated object in the LDV word so that @@ -306,12 +309,12 @@ evacuate_large(StgPtr p)    gen_workspace *ws;    bd = Bdescr(p); -  gen = bd->gen; -  gen_no = bd->gen_no; +  gen = RELAXED_LOAD(&bd->gen); +  gen_no = RELAXED_LOAD(&bd->gen_no);    ACQUIRE_SPIN_LOCK(&gen->sync);    // already evacuated? -  if (bd->flags & BF_EVACUATED) { +  if (RELAXED_LOAD(&bd->flags) & BF_EVACUATED) {      /* Don't forget to set the gct->failed_to_evac flag if we didn't get       * the desired destination (see comments in evacuate()).       */ @@ -344,9 +347,9 @@ evacuate_large(StgPtr p)    ws = &gct->gens[new_gen_no];    new_gen = &generations[new_gen_no]; -  bd->flags |= BF_EVACUATED; +  __atomic_fetch_or(&bd->flags, BF_EVACUATED, __ATOMIC_ACQ_REL);    if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) { -      bd->flags |= BF_NONMOVING; +      __atomic_fetch_or(&bd->flags, BF_NONMOVING, __ATOMIC_ACQ_REL);    }    initBdescr(bd, new_gen, new_gen->to); @@ -354,7 +357,7 @@ evacuate_large(StgPtr p)    // these objects, because they aren't allowed to contain any outgoing    // pointers.  For these blocks, we skip the scavenge stage and put    // them straight on the scavenged_large_objects list. -  if (bd->flags & BF_PINNED) { +  if (RELAXED_LOAD(&bd->flags) & BF_PINNED) {        ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS);        if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } @@ -389,7 +392,7 @@ evacuate_static_object (StgClosure **link_field, StgClosure *q)          return;      } -    StgWord link = (StgWord)*link_field; +    StgWord link = RELAXED_LOAD((StgWord*) link_field);      // See Note [STATIC_LINK fields] for how the link field bits work      if (((link & STATIC_BITS) | prev_static_flag) != 3) { @@ -435,7 +438,7 @@ evacuate_compact (StgPtr p)      bd = Bdescr((StgPtr)str);      gen_no = bd->gen_no; -    if (bd->flags & BF_NONMOVING) { +    if (RELAXED_LOAD(&bd->flags) & BF_NONMOVING) {          // We may have evacuated the block to the nonmoving generation. If so          // we need to make sure it is added to the mark queue since the only          // reference to it may be from the moving heap. @@ -500,7 +503,7 @@ evacuate_compact (StgPtr p)      // in the GC, and that should never see blocks other than the first)      bd->flags |= BF_EVACUATED;      if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) { -        bd->flags |= BF_NONMOVING; +      __atomic_fetch_or(&bd->flags, BF_NONMOVING, __ATOMIC_RELAXED);      }      initBdescr(bd, new_gen, new_gen->to); @@ -581,7 +584,7 @@ evacuate(StgClosure **p)    const StgInfoTable *info;    StgWord tag; -  q = *p; +  q = RELAXED_LOAD(p);  loop:    /* The tag and the pointer are split, to be merged after evacing */ @@ -638,10 +641,11 @@ loop:    bd = Bdescr((P_)q); -  if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT | BF_NONMOVING)) != 0) { +  uint16_t flags = RELAXED_LOAD(&bd->flags); +  if ((flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT | BF_NONMOVING)) != 0) {        // Pointer to non-moving heap. Non-moving heap is collected using        // mark-sweep so this object should be marked and then retained in sweep. -      if (RTS_UNLIKELY(bd->flags & BF_NONMOVING)) { +      if (RTS_UNLIKELY(RELAXED_LOAD(&bd->flags) & BF_NONMOVING)) {            // NOTE: large objects in nonmoving heap are also marked with            // BF_NONMOVING. Those are moved to scavenged_large_objects list in            // mark phase. @@ -656,11 +660,11 @@ loop:        // happen often, but allowing it makes certain things a bit        // easier; e.g. scavenging an object is idempotent, so it's OK to        // have an object on the mutable list multiple times. -      if (bd->flags & BF_EVACUATED) { +      if (flags & BF_EVACUATED) {            // We aren't copying this object, so we have to check            // whether it is already in the target generation.  (this is            // the write barrier). -          if (bd->gen_no < gct->evac_gen_no) { +          if (RELAXED_LOAD(&bd->gen_no) < gct->evac_gen_no) {                gct->failed_to_evac = true;                TICK_GC_FAILED_PROMOTION();            } @@ -671,20 +675,20 @@ loop:        // right thing for objects that are half way in the middle of the first        // block of a compact (and would be treated as large objects even though        // they are not) -      if (bd->flags & BF_COMPACT) { +      if (flags & BF_COMPACT) {            evacuate_compact((P_)q);            return;        }        /* evacuate large objects by re-linking them onto a different list.         */ -      if (bd->flags & BF_LARGE) { +      if (flags & BF_LARGE) {            evacuate_large((P_)q);            // We may have evacuated the block to the nonmoving generation. If so            // we need to make sure it is added to the mark queue since the only            // reference to it may be from the moving heap. -          if (major_gc && bd->flags & BF_NONMOVING && !deadlock_detect_gc) { +          if (major_gc && flags & BF_NONMOVING && !deadlock_detect_gc) {                markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);            }            return; @@ -702,7 +706,7 @@ loop:    gen_no = bd->dest_no; -  info = q->header.info; +  info = ACQUIRE_LOAD(&q->header.info);    if (IS_FORWARDING_PTR(info))    {      /* Already evacuated, just return the forwarding address. @@ -722,9 +726,12 @@ loop:       * check if gen is too low.       */        StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info); -      *p = TAG_CLOSURE(tag,e); +      RELAXED_STORE(p, TAG_CLOSURE(tag,e));        if (gen_no < gct->evac_gen_no) {  // optimisation -          if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) { +          // The ACQUIRE here is necessary to ensure that we see gen_no if the +          // evacuted object lives in a block newly-allocated by a GC thread on +          // another core. +          if (ACQUIRE_LOAD(&Bdescr((P_)e)->gen_no) < gct->evac_gen_no) {                gct->failed_to_evac = true;                TICK_GC_FAILED_PROMOTION();            } @@ -752,15 +759,17 @@ loop:        if (info == Czh_con_info &&            // unsigned, so always true:  (StgChar)w >= MIN_CHARLIKE &&            (StgChar)w <= MAX_CHARLIKE) { -          *p =  TAG_CLOSURE(tag, -                            (StgClosure *)CHARLIKE_CLOSURE((StgChar)w) -                           ); +          RELAXED_STORE(p, \ +                        TAG_CLOSURE(tag, \ +                                    (StgClosure *)CHARLIKE_CLOSURE((StgChar)w) +                                   ));        }        else if (info == Izh_con_info &&            (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) { -          *p = TAG_CLOSURE(tag, -                             (StgClosure *)INTLIKE_CLOSURE((StgInt)w) -                             ); +          RELAXED_STORE(p, \ +                        TAG_CLOSURE(tag, \ +                                    (StgClosure *)INTLIKE_CLOSURE((StgInt)w) +                                   ));        }        else {            copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag); @@ -814,10 +823,10 @@ loop:        const StgInfoTable *i;        r = ((StgInd*)q)->indirectee;        if (GET_CLOSURE_TAG(r) == 0) { -          i = r->header.info; +          i = ACQUIRE_LOAD(&r->header.info);            if (IS_FORWARDING_PTR(i)) {                r = (StgClosure *)UN_FORWARDING_PTR(i); -              i = r->header.info; +              i = ACQUIRE_LOAD(&r->header.info);            }            if (i == &stg_TSO_info                || i == &stg_WHITEHOLE_info @@ -842,7 +851,7 @@ loop:            ASSERT(i != &stg_IND_info);        }        q = r; -      *p = r; +      RELEASE_STORE(p, r);        goto loop;    } @@ -868,8 +877,8 @@ loop:    case IND:      // follow chains of indirections, don't evacuate them -    q = ((StgInd*)q)->indirectee; -    *p = q; +    q = RELAXED_LOAD(&((StgInd*)q)->indirectee); +    RELAXED_STORE(p, q);      goto loop;    case RET_BCO: @@ -983,11 +992,12 @@ evacuate_BLACKHOLE(StgClosure **p)      ASSERT(GET_CLOSURE_TAG(q) == 0);      bd = Bdescr((P_)q); +    const uint16_t flags = RELAXED_LOAD(&bd->flags);      // blackholes can't be in a compact -    ASSERT((bd->flags & BF_COMPACT) == 0); +    ASSERT((flags & BF_COMPACT) == 0); -    if (RTS_UNLIKELY(bd->flags & BF_NONMOVING)) { +    if (RTS_UNLIKELY(RELAXED_LOAD(&bd->flags) & BF_NONMOVING)) {          if (major_gc && !deadlock_detect_gc)              markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);          return; @@ -996,18 +1006,18 @@ evacuate_BLACKHOLE(StgClosure **p)      // blackholes *can* be in a large object: when raiseAsync() creates an      // AP_STACK the payload might be large enough to create a large object.      // See #14497. -    if (bd->flags & BF_LARGE) { +    if (flags & BF_LARGE) {          evacuate_large((P_)q);          return;      } -    if (bd->flags & BF_EVACUATED) { +    if (flags & BF_EVACUATED) {          if (bd->gen_no < gct->evac_gen_no) {              gct->failed_to_evac = true;              TICK_GC_FAILED_PROMOTION();          }          return;      } -    if (bd->flags & BF_MARKED) { +    if (flags & BF_MARKED) {          if (!is_marked((P_)q,bd)) {              mark((P_)q,bd);              push_mark_stack((P_)q); @@ -1015,13 +1025,13 @@ evacuate_BLACKHOLE(StgClosure **p)          return;      }      gen_no = bd->dest_no; -    info = q->header.info; +    info = ACQUIRE_LOAD(&q->header.info);      if (IS_FORWARDING_PTR(info))      {          StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);          *p = e;          if (gen_no < gct->evac_gen_no) {  // optimisation -            if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) { +            if (ACQUIRE_LOAD(&Bdescr((P_)e)->gen_no) < gct->evac_gen_no) {                  gct->failed_to_evac = true;                  TICK_GC_FAILED_PROMOTION();              } @@ -1090,13 +1100,11 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)              // XXX we do not have BLACKHOLEs any more; replace with              // a THUNK_SELECTOR again.  This will go into a loop if it is              // entered, and should result in a NonTermination exception. -            ((StgThunk *)p)->payload[0] = val; -            write_barrier(); -            SET_INFO((StgClosure *)p, &stg_sel_0_upd_info); +            RELAXED_STORE(&((StgThunk *)p)->payload[0], val); +            SET_INFO_RELEASE((StgClosure *)p, &stg_sel_0_upd_info);          } else { -            ((StgInd *)p)->indirectee = val; -            write_barrier(); -            SET_INFO((StgClosure *)p, &stg_IND_info); +            RELAXED_STORE(&((StgInd *)p)->indirectee, val); +            SET_INFO_RELEASE((StgClosure *)p, &stg_IND_info);          }          // For the purposes of LDV profiling, we have created an @@ -1143,7 +1151,7 @@ selector_chain:          // save any space in any case, and updating with an indirection is          // trickier in a non-collected gen: we would have to update the          // mutable list. -        if (bd->flags & (BF_EVACUATED | BF_NONMOVING)) { +        if (RELAXED_LOAD(&bd->flags) & (BF_EVACUATED | BF_NONMOVING)) {              unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);              *q = (StgClosure *)p;              // shortcut, behave as for:  if (evac) evacuate(q); @@ -1198,8 +1206,7 @@ selector_chain:              //     need the write-barrier stuff.              //   - undo the chain we've built to point to p.              SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr); -            write_barrier(); -            *q = (StgClosure *)p; +            RELEASE_STORE(q, (StgClosure *) p);              if (evac) evacuate(q);              unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);              return; @@ -1225,7 +1232,7 @@ selector_loop:      // from-space during marking, for example.  We rely on the property      // that evacuate() doesn't mind if it gets passed a to-space pointer. -    info = (StgInfoTable*)selectee->header.info; +    info = RELAXED_LOAD((StgInfoTable**) &selectee->header.info);      if (IS_FORWARDING_PTR(info)) {          // We don't follow pointers into to-space; the constructor @@ -1252,7 +1259,7 @@ selector_loop:                                            info->layout.payload.nptrs));                // Select the right field from the constructor -              StgClosure *val = selectee->payload[field]; +              StgClosure *val = RELAXED_LOAD(&selectee->payload[field]);  #if defined(PROFILING)                // For the purposes of LDV profiling, we have destroyed @@ -1278,19 +1285,19 @@ selector_loop:                // evaluating until we find the real value, and then                // update the whole chain to point to the value.            val_loop: -              info_ptr = (StgWord)UNTAG_CLOSURE(val)->header.info; +              info_ptr = ACQUIRE_LOAD((StgWord*) &UNTAG_CLOSURE(val)->header.info);                if (!IS_FORWARDING_PTR(info_ptr))                {                    info = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr);                    switch (info->type) {                    case IND:                    case IND_STATIC: -                      val = ((StgInd *)val)->indirectee; +                      val = RELAXED_LOAD(&((StgInd *)val)->indirectee);                        goto val_loop;                    case THUNK_SELECTOR:                        // Use payload to make a list of thunk selectors, to be                        // used in unchain_thunk_selectors -                      ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; +                      RELAXED_STORE(&((StgClosure*)p)->payload[0], (StgClosure *)prev_thunk_selector);                        prev_thunk_selector = p;                        p = (StgSelector*)val;                        goto selector_chain; @@ -1298,7 +1305,7 @@ selector_loop:                        break;                    }                } -              ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; +              RELAXED_STORE(&((StgClosure*)p)->payload[0], (StgClosure *)prev_thunk_selector);                prev_thunk_selector = p;                *q = val; @@ -1320,22 +1327,22 @@ selector_loop:        case IND:        case IND_STATIC:            // Again, we might need to untag a constructor. -          selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); +          selectee = UNTAG_CLOSURE( RELAXED_LOAD(&((StgInd *)selectee)->indirectee) );            goto selector_loop;        case BLACKHOLE:        {            StgClosure *r;            const StgInfoTable *i; -          r = ((StgInd*)selectee)->indirectee; +          r = ACQUIRE_LOAD(&((StgInd*)selectee)->indirectee);            // establish whether this BH has been updated, and is now an            // indirection, as in evacuate().            if (GET_CLOSURE_TAG(r) == 0) { -              i = r->header.info; +              i = ACQUIRE_LOAD(&r->header.info);                if (IS_FORWARDING_PTR(i)) {                    r = (StgClosure *)UN_FORWARDING_PTR(i); -                  i = r->header.info; +                  i = RELAXED_LOAD(&r->header.info);                }                if (i == &stg_TSO_info                    || i == &stg_WHITEHOLE_info @@ -1346,7 +1353,7 @@ selector_loop:                ASSERT(i != &stg_IND_info);            } -          selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); +          selectee = UNTAG_CLOSURE( RELAXED_LOAD(&((StgInd *)selectee)->indirectee) );            goto selector_loop;        } diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 0fa927f2ad..8a8acb1b53 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -112,14 +112,8 @@ static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC  /* Mut-list stats */  #if defined(DEBUG) -uint32_t mutlist_MUTVARS, -    mutlist_MUTARRS, -    mutlist_MVARS, -    mutlist_TVAR, -    mutlist_TVAR_WATCH_QUEUE, -    mutlist_TREC_CHUNK, -    mutlist_TREC_HEADER, -    mutlist_OTHERS; +// For lack of a better option we protect mutlist_scav_stats with oldest_gen->sync +MutListScavStats mutlist_scav_stats;  #endif  /* Thread-local data for each GC thread @@ -184,6 +178,36 @@ bdescr *mark_stack_top_bd; // topmost block in the mark stack  bdescr *mark_stack_bd;     // current block in the mark stack  StgPtr mark_sp;            // pointer to the next unallocated mark stack entry + +/* ----------------------------------------------------------------------------- +   Statistics from mut_list scavenging +   -------------------------------------------------------------------------- */ + +#if defined(DEBUG) +void +zeroMutListScavStats(MutListScavStats *src) +{ +    memset(src, 0, sizeof(MutListScavStats)); +} + +void +addMutListScavStats(const MutListScavStats *src, +                    MutListScavStats *dest) +{ +#define ADD_STATS(field) dest->field += src->field; +    ADD_STATS(n_MUTVAR); +    ADD_STATS(n_MUTARR); +    ADD_STATS(n_MVAR); +    ADD_STATS(n_TVAR); +    ADD_STATS(n_TREC_CHUNK); +    ADD_STATS(n_TVAR_WATCH_QUEUE); +    ADD_STATS(n_TREC_HEADER); +    ADD_STATS(n_OTHERS); +#undef ADD_STATS +} +#endif /* DEBUG */ + +  /* -----------------------------------------------------------------------------     GarbageCollect: the main entry point to the garbage collector. @@ -250,14 +274,7 @@ GarbageCollect (uint32_t collect_gen,    stablePtrLock();  #if defined(DEBUG) -  mutlist_MUTVARS = 0; -  mutlist_MUTARRS = 0; -  mutlist_MVARS = 0; -  mutlist_TVAR = 0; -  mutlist_TVAR_WATCH_QUEUE = 0; -  mutlist_TREC_CHUNK = 0; -  mutlist_TREC_HEADER = 0; -  mutlist_OTHERS = 0; +  zeroMutListScavStats(&mutlist_scav_stats);  #endif    // attribute any costs to CCS_GC @@ -520,37 +537,37 @@ GarbageCollect (uint32_t collect_gen,        const gc_thread* thread;        for (i=0; i < n_gc_threads; i++) { -          copied += gc_threads[i]->copied; +          copied += RELAXED_LOAD(&gc_threads[i]->copied);        }        for (i=0; i < n_gc_threads; i++) {            thread = gc_threads[i];            if (n_gc_threads > 1) {                debugTrace(DEBUG_gc,"thread %d:", i);                debugTrace(DEBUG_gc,"   copied           %ld", -                         thread->copied * sizeof(W_)); +                         RELAXED_LOAD(&thread->copied) * sizeof(W_));                debugTrace(DEBUG_gc,"   scanned          %ld", -                         thread->scanned * sizeof(W_)); +                         RELAXED_LOAD(&thread->scanned) * sizeof(W_));                debugTrace(DEBUG_gc,"   any_work         %ld", -                         thread->any_work); +                         RELAXED_LOAD(&thread->any_work));                debugTrace(DEBUG_gc,"   no_work          %ld", -                         thread->no_work); +                         RELAXED_LOAD(&thread->no_work));                debugTrace(DEBUG_gc,"   scav_find_work %ld", -                         thread->scav_find_work); +                         RELAXED_LOAD(&thread->scav_find_work));  #if defined(THREADED_RTS) && defined(PROF_SPIN) -              gc_spin_spin += thread->gc_spin.spin; -              gc_spin_yield += thread->gc_spin.yield; -              mut_spin_spin += thread->mut_spin.spin; -              mut_spin_yield += thread->mut_spin.yield; +              gc_spin_spin += RELAXED_LOAD(&thread->gc_spin.spin); +              gc_spin_yield += RELAXED_LOAD(&thread->gc_spin.yield); +              mut_spin_spin += RELAXED_LOAD(&thread->mut_spin.spin); +              mut_spin_yield += RELAXED_LOAD(&thread->mut_spin.yield);  #endif -              any_work += thread->any_work; -              no_work += thread->no_work; -              scav_find_work += thread->scav_find_work; +              any_work += RELAXED_LOAD(&thread->any_work); +              no_work += RELAXED_LOAD(&thread->no_work); +              scav_find_work += RELAXED_LOAD(&thread->scav_find_work); -              par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); +              par_max_copied = stg_max(RELAXED_LOAD(&thread->copied), par_max_copied);                par_balanced_copied_acc += -                  stg_min(n_gc_threads * gc_threads[i]->copied, copied); +                  stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied);            }        }        if (n_gc_threads > 1) { @@ -590,10 +607,14 @@ GarbageCollect (uint32_t collect_gen,          debugTrace(DEBUG_gc,                     "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d others)",                     (unsigned long)(mut_list_size * sizeof(W_)), -                   mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, -                   mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE, -                   mutlist_TREC_CHUNK, mutlist_TREC_HEADER, -                   mutlist_OTHERS); +                   mutlist_scav_stats.n_MUTVAR, +                   mutlist_scav_stats.n_MUTARR, +                   mutlist_scav_stats.n_MVAR, +                   mutlist_scav_stats.n_TVAR, +                   mutlist_scav_stats.n_TVAR_WATCH_QUEUE, +                   mutlist_scav_stats.n_TREC_CHUNK, +                   mutlist_scav_stats.n_TREC_HEADER, +                   mutlist_scav_stats.n_OTHERS);      }      bdescr *next, *prev; @@ -1109,7 +1130,7 @@ inc_running (void)  static StgWord  dec_running (void)  { -    ASSERT(gc_running_threads != 0); +    ASSERT(RELAXED_LOAD(&gc_running_threads) != 0);      return atomic_dec(&gc_running_threads);  } @@ -1119,7 +1140,7 @@ any_work (void)      int g;      gen_workspace *ws; -    gct->any_work++; +    NONATOMIC_ADD(&gct->any_work, 1);      write_barrier(); @@ -1152,7 +1173,7 @@ any_work (void)      }  #endif -    gct->no_work++; +    __atomic_fetch_add(&gct->no_work, 1, __ATOMIC_RELAXED);  #if defined(THREADED_RTS)      yieldThread();  #endif @@ -1193,7 +1214,7 @@ loop:      debugTrace(DEBUG_gc, "%d GC threads still running", r); -    while (gc_running_threads != 0) { +    while (SEQ_CST_LOAD(&gc_running_threads) != 0) {          // usleep(1);          if (any_work()) {              inc_running(); @@ -1230,7 +1251,7 @@ gcWorkerThread (Capability *cap)      //    measurements more accurate on Linux, perhaps because it syncs      //    the CPU time across the multiple cores.  Without this, CPU time      //    is heavily skewed towards GC rather than MUT. -    gct->wakeup = GC_THREAD_STANDING_BY; +    SEQ_CST_STORE(&gct->wakeup, GC_THREAD_STANDING_BY);      debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);      ACQUIRE_SPIN_LOCK(&gct->gc_spin); @@ -1257,10 +1278,13 @@ gcWorkerThread (Capability *cap)      // Wait until we're told to continue      RELEASE_SPIN_LOCK(&gct->gc_spin); -    gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;      debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",                 gct->thread_index);      stat_endGCWorker (cap, gct); +    // This must come *after* stat_endGCWorker since it serves to +    // synchronize us with the GC leader, which will later aggregate the +    // GC statistics. +    SEQ_CST_STORE(&gct->wakeup, GC_THREAD_WAITING_TO_CONTINUE);      ACQUIRE_SPIN_LOCK(&gct->mut_spin);      debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index); @@ -1285,7 +1309,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])      while(retry) {          for (i=0; i < n_threads; i++) {              if (i == me || idle_cap[i]) continue; -            if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) { +            if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {                  prodCapability(capabilities[i], cap->running_task);              }          } @@ -1295,7 +1319,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])                  if (i == me || idle_cap[i]) continue;                  write_barrier();                  interruptCapability(capabilities[i]); -                if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) { +                if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {                      retry = true;                  }              } @@ -1352,10 +1376,10 @@ wakeup_gc_threads (uint32_t me USED_IF_THREADS,          if (i == me || idle_cap[i]) continue;          inc_running();          debugTrace(DEBUG_gc, "waking up gc thread %d", i); -        if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) +        if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY)              barf("wakeup_gc_threads"); -        gc_threads[i]->wakeup = GC_THREAD_RUNNING; +        SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_RUNNING);          ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);          RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);      } @@ -1376,9 +1400,8 @@ shutdown_gc_threads (uint32_t me USED_IF_THREADS,      for (i=0; i < n_gc_threads; i++) {          if (i == me || idle_cap[i]) continue; -        while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { +        while (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE) {              busy_wait_nop(); -            write_barrier();          }      }  #endif @@ -1393,10 +1416,10 @@ releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])      uint32_t i;      for (i=0; i < n_threads; i++) {          if (i == me || idle_cap[i]) continue; -        if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) +        if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE)              barf("releaseGCThreads"); -        gc_threads[i]->wakeup = GC_THREAD_INACTIVE; +        SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_INACTIVE);          ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);          RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);      } @@ -1412,7 +1435,7 @@ static void  stash_mut_list (Capability *cap, uint32_t gen_no)  {      cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no]; -    cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node); +    RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node));  }  /* ---------------------------------------------------------------------------- @@ -1438,9 +1461,11 @@ prepare_collected_gen (generation *gen)          // mutable list always has at least one block; this means we can avoid          // a check for NULL in recordMutable().          for (i = 0; i < n_capabilities; i++) { -            freeChain(capabilities[i]->mut_lists[g]); -            capabilities[i]->mut_lists[g] = -                allocBlockOnNode(capNoToNumaNode(i)); +            bdescr *old = RELAXED_LOAD(&capabilities[i]->mut_lists[g]); +            freeChain(old); + +            bdescr *new = allocBlockOnNode(capNoToNumaNode(i)); +            RELAXED_STORE(&capabilities[i]->mut_lists[g], new);          }      } @@ -1654,7 +1679,7 @@ collect_pinned_object_blocks (void)          bdescr *last = NULL;          if (use_nonmoving && gen == oldest_gen) {              // Mark objects as belonging to the nonmoving heap -            for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) { +            for (bdescr *bd = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks); bd != NULL; bd = bd->link) {                  bd->flags |= BF_NONMOVING;                  bd->gen = oldest_gen;                  bd->gen_no = oldest_gen->no; @@ -1673,8 +1698,8 @@ collect_pinned_object_blocks (void)              if (gen->large_objects != NULL) {                  gen->large_objects->u.back = last;              } -            gen->large_objects = capabilities[n]->pinned_object_blocks; -            capabilities[n]->pinned_object_blocks = NULL; +            g0->large_objects = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks); +            RELAXED_STORE(&capabilities[n]->pinned_object_blocks, NULL);          }      }  } diff --git a/rts/sm/GC.h b/rts/sm/GC.h index bde006913b..c5d5f6ac81 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -42,20 +42,32 @@ extern StgPtr mark_sp;  extern bool work_stealing; -#if defined(DEBUG) -extern uint32_t mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS, -    mutlist_TVAR, -    mutlist_TVAR_WATCH_QUEUE, -    mutlist_TREC_CHUNK, -    mutlist_TREC_HEADER; -#endif -  #if defined(PROF_SPIN) && defined(THREADED_RTS)  extern volatile StgWord64 whitehole_gc_spin;  extern volatile StgWord64 waitForGcThreads_spin;  extern volatile StgWord64 waitForGcThreads_yield;  #endif +// mutable list scavenging statistics +#if defined(DEBUG) +typedef struct { +    StgWord n_MUTVAR; +    StgWord n_MUTARR; +    StgWord n_MVAR; +    StgWord n_TVAR; +    StgWord n_TREC_CHUNK; +    StgWord n_TVAR_WATCH_QUEUE; +    StgWord n_TREC_HEADER; +    StgWord n_OTHERS; +} MutListScavStats; + +extern MutListScavStats mutlist_scav_stats; + +void zeroMutListScavStats(MutListScavStats *src); +void addMutListScavStats(const MutListScavStats *src, +                         MutListScavStats *dest); +#endif /* DEBUG */ +  void gcWorkerThread (Capability *cap);  void initGcThreads (uint32_t from, uint32_t to);  void freeGcThreads (void); diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 11080c1f22..55b4f99596 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -83,7 +83,7 @@ isAlive(StgClosure *p)          return p;      } -    info = q->header.info; +    info = RELAXED_LOAD(&q->header.info);      if (IS_FORWARDING_PTR(info)) {          // alive! diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 02c26ddf5e..d58fdc48ae 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -249,8 +249,8 @@ todo_block_full (uint32_t size, gen_workspace *ws)          return p;      } -    gct->copied += ws->todo_free - bd->free; -    bd->free = ws->todo_free; +    gct->copied += ws->todo_free - RELAXED_LOAD(&bd->free); +    RELAXED_STORE(&bd->free, ws->todo_free);      ASSERT(bd->u.scan >= bd->start && bd->u.scan <= bd->free); @@ -330,10 +330,11 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)                  gct->free_blocks = bd->link;              }          } -        // blocks in to-space get the BF_EVACUATED flag. -        bd->flags = BF_EVACUATED; -        bd->u.scan = bd->start;          initBdescr(bd, ws->gen, ws->gen->to); +        RELAXED_STORE(&bd->u.scan, RELAXED_LOAD(&bd->start)); +        // blocks in to-space get the BF_EVACUATED flag. +        // RELEASE here to ensure that bd->gen is visible to other cores. +        RELEASE_STORE(&bd->flags, BF_EVACUATED);      }      bd->link = NULL; @@ -345,7 +346,7 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)                       // See Note [big objects]      debugTrace(DEBUG_gc, "alloc new todo block %p for gen  %d", -               bd->free, ws->gen->no); +               RELAXED_LOAD(&bd->free), ws->gen->no);      return ws->todo_free;  } diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h index a71d6dcb92..798a795deb 100644 --- a/rts/sm/GCUtils.h +++ b/rts/sm/GCUtils.h @@ -67,7 +67,9 @@ recordMutableGen_GC (StgClosure *p, uint32_t gen_no)          bd = new_bd;          gct->mut_lists[gen_no] = bd;      } -    *bd->free++ = (StgWord)p; +    *bd->free++ = (StgWord) p; +    // N.B. we are allocating into our Capability-local mut_list, therefore +    // we don't need an atomic increment.  }  #include "EndPrivate.h" diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index 65b1338f10..b8d120823c 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -414,14 +414,13 @@ markWeakPtrList ( void )          StgWeak *w, **last_w;          last_w = &gen->weak_ptr_list; -        for (w = gen->weak_ptr_list; w != NULL; w = w->link) { +        for (w = gen->weak_ptr_list; w != NULL; w = RELAXED_LOAD(&w->link)) {              // w might be WEAK, EVACUATED, or DEAD_WEAK (actually CON_STATIC) here  #if defined(DEBUG)              {   // careful to do this assertion only reading the info ptr                  // once, because during parallel GC it might change under our feet. -                const StgInfoTable *info; -                info = w->header.info; +                const StgInfoTable *info = RELAXED_LOAD(&w->header.info);                  ASSERT(IS_FORWARDING_PTR(info)                         || info == &stg_DEAD_WEAK_info                         || INFO_PTR_TO_STRUCT(info)->type == WEAK); diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c index 388ceae2fd..3eafd6be98 100644 --- a/rts/sm/NonMoving.c +++ b/rts/sm/NonMoving.c @@ -726,6 +726,7 @@ void nonmovingStop(void)                     "waiting for nonmoving collector thread to terminate");          ACQUIRE_LOCK(&concurrent_coll_finished_lock);          waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock); +        joinOSThread(mark_thread);      }  #endif  } diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index dd9a96adf8..9fe2c6006e 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -65,6 +65,8 @@  #include "sm/NonMoving.h" // for nonmoving_set_closure_mark_bit  #include "sm/NonMovingScav.h" +#include <string.h> /* for memset */ +  static void scavenge_large_bitmap (StgPtr p,                                     StgLargeBitmap *large_bitmap,                                     StgWord size ); @@ -201,9 +203,9 @@ scavenge_compact(StgCompactNFData *str)      gct->eager_promotion = saved_eager;      if (gct->failed_to_evac) { -        ((StgClosure *)str)->header.info = &stg_COMPACT_NFDATA_DIRTY_info; +        RELEASE_STORE(&((StgClosure *)str)->header.info, &stg_COMPACT_NFDATA_DIRTY_info);      } else { -        ((StgClosure *)str)->header.info = &stg_COMPACT_NFDATA_CLEAN_info; +        RELEASE_STORE(&((StgClosure *)str)->header.info, &stg_COMPACT_NFDATA_CLEAN_info);      }  } @@ -464,9 +466,9 @@ scavenge_block (bdescr *bd)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            mvar->header.info = &stg_MVAR_DIRTY_info; +            RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info);          } else { -            mvar->header.info = &stg_MVAR_CLEAN_info; +            RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info);          }          p += sizeofW(StgMVar);          break; @@ -481,9 +483,9 @@ scavenge_block (bdescr *bd)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            tvar->header.info = &stg_TVAR_DIRTY_info; +            RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info);          } else { -            tvar->header.info = &stg_TVAR_CLEAN_info; +            RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info);          }          p += sizeofW(StgTVar);          break; @@ -615,9 +617,9 @@ scavenge_block (bdescr *bd)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_DIRTY_info);          } else { -            ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_CLEAN_info);          }          p += sizeofW(StgMutVar);          break; @@ -634,9 +636,9 @@ scavenge_block (bdescr *bd)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; +            RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info);          } else { -            bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info; +            RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info);          }          p += sizeofW(StgBlockingQueue);          break; @@ -686,9 +688,9 @@ scavenge_block (bdescr *bd)          p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);          if (gct->failed_to_evac) { -            ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info);          } else { -            ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info);          }          gct->eager_promotion = saved_eager_promotion; @@ -703,9 +705,9 @@ scavenge_block (bdescr *bd)          p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);          if (gct->failed_to_evac) { -            ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);          } else { -            ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info);          }          break;      } @@ -728,9 +730,9 @@ scavenge_block (bdescr *bd)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info);          } else { -            ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info);          }          gct->failed_to_evac = true; // always put it on the mutable list. @@ -749,9 +751,9 @@ scavenge_block (bdescr *bd)          }          if (gct->failed_to_evac) { -            ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info);          } else { -            ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info; +            RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info);          }          break;      } @@ -834,7 +836,7 @@ scavenge_block (bdescr *bd)    if (p > bd->free)  {        gct->copied += ws->todo_free - bd->free; -      bd->free = p; +      RELEASE_STORE(&bd->free, p);    }    debugTrace(DEBUG_gc, "   scavenged %ld bytes", @@ -889,9 +891,9 @@ scavenge_mark_stack(void)              gct->eager_promotion = saved_eager_promotion;              if (gct->failed_to_evac) { -                mvar->header.info = &stg_MVAR_DIRTY_info; +                RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info);              } else { -                mvar->header.info = &stg_MVAR_CLEAN_info; +                RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info);              }              break;          } @@ -905,9 +907,9 @@ scavenge_mark_stack(void)              gct->eager_promotion = saved_eager_promotion;              if (gct->failed_to_evac) { -                tvar->header.info = &stg_TVAR_DIRTY_info; +                RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info);              } else { -                tvar->header.info = &stg_TVAR_CLEAN_info; +                RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info);              }              break;          } @@ -1011,9 +1013,9 @@ scavenge_mark_stack(void)              gct->eager_promotion = saved_eager_promotion;              if (gct->failed_to_evac) { -                ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; +                RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_DIRTY_info);              } else { -                ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; +                RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_CLEAN_info);              }              break;          } @@ -1030,9 +1032,9 @@ scavenge_mark_stack(void)              gct->eager_promotion = saved_eager_promotion;              if (gct->failed_to_evac) { -                bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; +                RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info);              } else { -                bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info; +                RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info);              }              break;          } @@ -1078,9 +1080,9 @@ scavenge_mark_stack(void)              scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);              if (gct->failed_to_evac) { -                ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; +                RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info);              } else { -                ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; +                RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info);              }              gct->eager_promotion = saved_eager_promotion; @@ -1097,9 +1099,9 @@ scavenge_mark_stack(void)              scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);              if (gct->failed_to_evac) { -                ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; +                RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);              } else { -                ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info; +                RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info);              }              break;          } @@ -1124,9 +1126,9 @@ scavenge_mark_stack(void)              gct->eager_promotion = saved_eager;              if (gct->failed_to_evac) { -                ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; +                RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info);              } else { -                ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info; +                RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info);              }              gct->failed_to_evac = true; // mutable anyhow. @@ -1145,9 +1147,9 @@ scavenge_mark_stack(void)              }              if (gct->failed_to_evac) { -                ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; +                RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info);              } else { -                ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info; +                RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info);              }              break;          } @@ -1251,9 +1253,9 @@ scavenge_one(StgPtr p)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            mvar->header.info = &stg_MVAR_DIRTY_info; +            RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info);          } else { -            mvar->header.info = &stg_MVAR_CLEAN_info; +            RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info);          }          break;      } @@ -1267,9 +1269,9 @@ scavenge_one(StgPtr p)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            tvar->header.info = &stg_TVAR_DIRTY_info; +            RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info);          } else { -            tvar->header.info = &stg_TVAR_CLEAN_info; +            RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info);          }          break;      } @@ -1331,9 +1333,9 @@ scavenge_one(StgPtr p)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; +            RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_MUT_VAR_DIRTY_info);          } else { -            ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; +            RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_MUT_VAR_CLEAN_info);          }          break;      } @@ -1350,9 +1352,9 @@ scavenge_one(StgPtr p)          gct->eager_promotion = saved_eager_promotion;          if (gct->failed_to_evac) { -            bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; +            RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info);          } else { -            bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info; +            RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info);          }          break;      } @@ -1398,9 +1400,9 @@ scavenge_one(StgPtr p)          scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);          if (gct->failed_to_evac) { -            ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; +            RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info);          } else { -            ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; +            RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info);          }          gct->eager_promotion = saved_eager_promotion; @@ -1415,9 +1417,9 @@ scavenge_one(StgPtr p)          scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);          if (gct->failed_to_evac) { -            ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; +            RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);          } else { -            ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info; +            RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info);          }          break;      } @@ -1442,9 +1444,9 @@ scavenge_one(StgPtr p)          gct->eager_promotion = saved_eager;          if (gct->failed_to_evac) { -            ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; +            RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info);          } else { -            ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info; +            RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info);          }          gct->failed_to_evac = true; @@ -1463,9 +1465,9 @@ scavenge_one(StgPtr p)          }          if (gct->failed_to_evac) { -            ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; +            RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info);          } else { -            ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info; +            RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info);          }          break;      } @@ -1583,6 +1585,10 @@ static void  scavenge_mutable_list(bdescr *bd, generation *gen)  {      StgPtr p, q; +#if defined(DEBUG) +    MutListScavStats stats; // Local accumulator +    zeroMutListScavStats(&stats); +#endif      uint32_t gen_no = gen->no;      gct->evac_gen_no = gen_no; @@ -1598,31 +1604,31 @@ scavenge_mutable_list(bdescr *bd, generation *gen)              case MUT_VAR_CLEAN:                  // can happen due to concurrent writeMutVars              case MUT_VAR_DIRTY: -                mutlist_MUTVARS++; break; +                stats.n_MUTVAR++; break;              case MUT_ARR_PTRS_CLEAN:              case MUT_ARR_PTRS_DIRTY:              case MUT_ARR_PTRS_FROZEN_CLEAN:              case MUT_ARR_PTRS_FROZEN_DIRTY: -                mutlist_MUTARRS++; break; +                stats.n_MUTARR++; break;              case MVAR_CLEAN:                  barf("MVAR_CLEAN on mutable list");              case MVAR_DIRTY: -                mutlist_MVARS++; break; +                stats.n_MVAR++; break;              case TVAR: -                mutlist_TVAR++; break; +                stats.n_TVAR++; break;              case TREC_CHUNK: -                mutlist_TREC_CHUNK++; break; +                stats.n_TREC_CHUNK++; break;              case MUT_PRIM:                  pinfo = ((StgClosure*)p)->header.info;                  if (pinfo == &stg_TVAR_WATCH_QUEUE_info) -                    mutlist_TVAR_WATCH_QUEUE++; +                    stats.n_TVAR_WATCH_QUEUE++;                  else if (pinfo == &stg_TREC_HEADER_info) -                    mutlist_TREC_HEADER++; +                    stats.n_TREC_HEADER++;                  else -                    mutlist_OTHERS++; +                    stats.n_OTHERS++;                  break;              default: -                mutlist_OTHERS++; break; +                stats.n_OTHERS++; break;              }  #endif @@ -1647,9 +1653,9 @@ scavenge_mutable_list(bdescr *bd, generation *gen)                  scavenge_mut_arr_ptrs_marked((StgMutArrPtrs *)p);                  if (gct->failed_to_evac) { -                    ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; +                    RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info);                  } else { -                    ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; +                    RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info);                  }                  gct->eager_promotion = saved_eager_promotion; @@ -1671,6 +1677,13 @@ scavenge_mutable_list(bdescr *bd, generation *gen)              }          }      } + +#if defined(DEBUG) +    // For lack of a better option we protect mutlist_scav_stats with oldest_gen->sync +    ACQUIRE_SPIN_LOCK(&oldest_gen->sync); +    addMutListScavStats(&stats, &mutlist_scav_stats); +    RELEASE_SPIN_LOCK(&oldest_gen->sync); +#endif  }  void @@ -1740,8 +1753,9 @@ scavenge_static(void)      /* Take this object *off* the static_objects list,       * and put it on the scavenged_static_objects list.       */ -    gct->static_objects = *STATIC_LINK(info,p); -    *STATIC_LINK(info,p) = gct->scavenged_static_objects; +    StgClosure **link = STATIC_LINK(info,p); +    gct->static_objects = RELAXED_LOAD(link); +    RELAXED_STORE(link, gct->scavenged_static_objects);      gct->scavenged_static_objects = flagged_p;      switch (info -> type) { diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 96bc133d02..251353de6d 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -445,7 +445,7 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)      Capability *cap = regTableToCapability(reg);      StgInd *bh; -    orig_info = caf->header.info; +    orig_info = RELAXED_LOAD(&caf->header.info);  #if defined(THREADED_RTS)      const StgInfoTable *cur_info; @@ -501,12 +501,11 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)      }      bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;      SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs); -    // Ensure that above writes are visible before we introduce reference as CAF indirectee. -    write_barrier(); -    caf->indirectee = (StgClosure *)bh; -    write_barrier(); -    SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info); +    // RELEASE ordering to ensure that above writes are visible before we +    // introduce reference as CAF indirectee. +    RELEASE_STORE(&caf->indirectee, (StgClosure *) bh); +    SET_INFO_RELEASE((StgClosure*)caf, &stg_IND_STATIC_info);      return bh;  } @@ -1033,8 +1032,8 @@ allocateMightFail (Capability *cap, W_ n)          g0->n_new_large_words += n;          RELEASE_SM_LOCK;          initBdescr(bd, g0, g0); -        bd->flags = BF_LARGE; -        bd->free = bd->start + n; +        RELAXED_STORE(&bd->flags, BF_LARGE); +        RELAXED_STORE(&bd->free, bd->start + n);          cap->total_allocated += n;          return bd->start;      } @@ -1300,8 +1299,8 @@ dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mvar, StgClosure *old)      Capability *cap = regTableToCapability(reg);      // No barrier required here as no other heap object fields are read. See      // note [Heap memory barriers] in SMP.h. -    if (mvar->header.info == &stg_MUT_VAR_CLEAN_info) { -        mvar->header.info = &stg_MUT_VAR_DIRTY_info; +    if (RELAXED_LOAD(&mvar->header.info) == &stg_MUT_VAR_CLEAN_info) { +        SET_INFO((StgClosure*) mvar, &stg_MUT_VAR_DIRTY_info);          recordClosureMutated(cap, (StgClosure *) mvar);          IF_NONMOVING_WRITE_BARRIER_ENABLED {              // See Note [Dirty flags in the non-moving collector] in NonMoving.c @@ -1323,8 +1322,8 @@ dirty_TVAR(Capability *cap, StgTVar *p,  {      // No barrier required here as no other heap object fields are read. See      // note [Heap memory barriers] in SMP.h. -    if (p->header.info == &stg_TVAR_CLEAN_info) { -        p->header.info = &stg_TVAR_DIRTY_info; +    if (RELAXED_LOAD(&p->header.info) == &stg_TVAR_CLEAN_info) { +        SET_INFO((StgClosure*) p, &stg_TVAR_DIRTY_info);          recordClosureMutated(cap,(StgClosure*)p);          IF_NONMOVING_WRITE_BARRIER_ENABLED {              // See Note [Dirty flags in the non-moving collector] in NonMoving.c @@ -1341,8 +1340,8 @@ dirty_TVAR(Capability *cap, StgTVar *p,  void  setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)  { -    if (tso->dirty == 0) { -        tso->dirty = 1; +    if (RELAXED_LOAD(&tso->dirty) == 0) { +        RELAXED_STORE(&tso->dirty, 1);          recordClosureMutated(cap,(StgClosure*)tso);          IF_NONMOVING_WRITE_BARRIER_ENABLED {              updateRemembSetPushClosure(cap, (StgClosure *) tso->_link); @@ -1354,8 +1353,8 @@ setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)  void  setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)  { -    if (tso->dirty == 0) { -        tso->dirty = 1; +    if (RELAXED_LOAD(&tso->dirty) == 0) { +        RELAXED_STORE(&tso->dirty, 1);          recordClosureMutated(cap,(StgClosure*)tso);          IF_NONMOVING_WRITE_BARRIER_ENABLED {              updateRemembSetPushClosure(cap, (StgClosure *) tso->block_info.prev); @@ -1367,8 +1366,8 @@ setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)  void  dirty_TSO (Capability *cap, StgTSO *tso)  { -    if (tso->dirty == 0) { -        tso->dirty = 1; +    if (RELAXED_LOAD(&tso->dirty) == 0) { +        RELAXED_STORE(&tso->dirty, 1);          recordClosureMutated(cap,(StgClosure*)tso);      } @@ -1386,8 +1385,8 @@ dirty_STACK (Capability *cap, StgStack *stack)          updateRemembSetPushStack(cap, stack);      } -    if (! (stack->dirty & STACK_DIRTY)) { -        stack->dirty = STACK_DIRTY; +    if (RELAXED_LOAD(&stack->dirty) == 0) { +        RELAXED_STORE(&stack->dirty, 1);          recordClosureMutated(cap,(StgClosure*)stack);      } @@ -1562,10 +1561,13 @@ calcNeeded (bool force_major, memcount *blocks_needed)      for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {          generation *gen = &generations[g]; -          W_ blocks = gen->live_estimate ? (gen->live_estimate / BLOCK_SIZE_W) : gen->n_blocks; -        blocks += gen->n_large_blocks -                + gen->n_compact_blocks; + +        // This can race with allocate() and compactAllocateBlockInternal() +        // but only needs to be approximate +        TSAN_ANNOTATE_BENIGN_RACE(&gen->n_large_blocks, "n_large_blocks"); +        blocks += RELAXED_LOAD(&gen->n_large_blocks) +                + RELAXED_LOAD(&gen->n_compact_blocks);          // we need at least this much space          needed += blocks; diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 8d90c3ba5f..48ddcf35f5 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -72,8 +72,11 @@ bool     getNewNursery        (Capability *cap);  INLINE_HEADER  bool doYouWantToGC(Capability *cap)  { +    // This is necessarily approximate since otherwise we would need to take +    // SM_LOCK to safely look at n_new_large_words. +    TSAN_ANNOTATE_BENIGN_RACE(&g0->n_new_large_words, "doYouWantToGC(n_new_large_words)");      return ((cap->r.rCurrentNursery->link == NULL && !getNewNursery(cap)) || -            g0->n_new_large_words >= large_alloc_lim); +            RELAXED_LOAD(&g0->n_new_large_words) >= large_alloc_lim);  }  /* ----------------------------------------------------------------------------- @@ -91,7 +94,7 @@ INLINE_HEADER void finishedNurseryBlock (Capability *cap, bdescr *bd) {  }  INLINE_HEADER void newNurseryBlock (bdescr *bd) { -    bd->free = bd->start; +    RELAXED_STORE(&bd->free, bd->start);  }  void     updateNurseriesStats (void); diff --git a/rts/win32/OSThreads.c b/rts/win32/OSThreads.c index f3bdefd998..ed8a598e51 100644 --- a/rts/win32/OSThreads.c +++ b/rts/win32/OSThreads.c @@ -444,6 +444,15 @@ interruptOSThread (OSThreadId id)      CloseHandle(hdl);  } +void +joinOSThread (OSThreadId id) +{ +    int ret = WaitForSingleObject(id, INFINITE); +    if (ret != WAIT_OBJECT_0) { +        sysErrorBelch("joinOSThread: error %d", ret); +    } +} +  void setThreadNode (uint32_t node)  {      if (osNumaAvailable())  | 
