summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouglas Wilson <douglas.wilson@gmail.com>2021-01-01 12:45:06 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-01-09 21:21:02 -0500
commitf49d6fb27336297d6d7a46269a22dd98c131b4a8 (patch)
tree19c14462560b3c421f46279ecb34ed90917c48e2
parente07ba458156c7d038dba2e7b9e1372c3bbed9994 (diff)
downloadhaskell-f49d6fb27336297d6d7a46269a22dd98c131b4a8.tar.gz
rts: stats: Some fixes to stats for sequential gcs
Solves #19147. When n_capabilities > 1 we were not correctly accounting for gc time for sequential collections. In this case par_n_gcthreads == 1, however it is not guaranteed that the single gc thread is capability 0. A similar issue for copied is addressed as well.
-rw-r--r--rts/Stats.c16
-rw-r--r--rts/sm/GC.c35
2 files changed, 37 insertions, 14 deletions
diff --git a/rts/Stats.c b/rts/Stats.c
index 55aff39196..654edebb55 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -507,10 +507,18 @@ stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ s
initiating_gct->gc_start_elapsed - initiating_gct->gc_sync_start_elapsed;
stats.gc.elapsed_ns = current_elapsed - initiating_gct->gc_start_elapsed;
stats.gc.cpu_ns = 0;
- for (unsigned int i=0; i < par_n_threads; i++) {
- gc_thread *gct = gc_threads[i];
- ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu);
- stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu;
+ // see Note [n_gc_threads]
+ // par_n_threads is set to n_gc_threads at the single callsite of this
+ // function
+ if (par_n_threads == 1) {
+ ASSERT(initiating_gct->gc_end_cpu >= initiating_gct->gc_start_cpu);
+ stats.gc.cpu_ns += initiating_gct->gc_end_cpu - initiating_gct->gc_start_cpu;
+ } else {
+ for (unsigned int i=0; i < par_n_threads; i++) {
+ gc_thread *gct = gc_threads[i];
+ ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu);
+ stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu;
+ }
}
}
// -------------------------------------------------
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 78be848c3f..d8ce3a1377 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -136,8 +136,22 @@ StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
ATTRIBUTE_ALIGNED(64);
#endif
-// Number of threads running in *this* GC. Affects how many
-// step->todos[] lists we have to look in to find work.
+/* Note [n_gc_threads]
+This is a global variable that originally tracked the number of threads
+participating in the current gc. It's meaing has diverged from this somewhate.
+In practise, it now takes one of the values {1, n_capabilities}. Don't be
+tricked into thinking this means garbage collections must have 1 or
+n_capabilities participating: idle capabilities (idle_cap[cap->no]) are included
+in the n_gc_thread count.
+
+Clearly this is in need of some tidying up, but for now we tread carefully. We
+check n_gc_threads > 1 to see whether we are in a parallel or sequential. We
+ensure n_gc_threads > 1 before iterating over gc_threads a la:
+
+for(i=0;i<n_gc_threads;++i) { foo(gc_thread[i]; )}
+
+Omitting this check has led to issues such as #19147.
+*/
uint32_t n_gc_threads;
// For stats:
@@ -552,12 +566,13 @@ GarbageCollect (uint32_t collect_gen,
uint64_t par_balanced_copied_acc = 0;
const gc_thread* thread;
- for (i=0; i < n_gc_threads; i++) {
- copied += RELAXED_LOAD(&gc_threads[i]->copied);
- }
- for (i=0; i < n_gc_threads; i++) {
- thread = gc_threads[i];
- if (n_gc_threads > 1) {
+ // see Note [n_gc_threads]
+ if (n_gc_threads > 1) {
+ for (i=0; i < n_gc_threads; i++) {
+ copied += RELAXED_LOAD(&gc_threads[i]->copied);
+ }
+ for (i=0; i < n_gc_threads; i++) {
+ thread = gc_threads[i];
debugTrace(DEBUG_gc,"thread %d:", i);
debugTrace(DEBUG_gc," copied %ld",
RELAXED_LOAD(&thread->copied) * sizeof(W_));
@@ -585,12 +600,12 @@ GarbageCollect (uint32_t collect_gen,
par_balanced_copied_acc +=
stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied);
}
- }
- if (n_gc_threads > 1) {
// See Note [Work Balance] for an explanation of this computation
par_balanced_copied =
(par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
(n_gc_threads - 1);
+ } else {
+ copied += gct->copied;
}
}