3 files changed, 46 insertions, 14 deletions
diff --git a/docs/users_guide/9.6.1-notes.rst b/docs/users_guide/9.6.1-notes.rst
index 797ff72f3a..9fd0ca2d75 100644
--- a/docs/users_guide/9.6.1-notes.rst
+++ b/docs/users_guide/9.6.1-notes.rst
@@ -129,6 +129,9 @@ Runtime system
   Previously only live blocks were taken into account.
   This makes it more likely to trigger promptly when the heap is highly fragmented.
 
+- Fixed a bug that sometimes caused live sparks to be GC'ed too early either during
+  minor GC or major GC with workstealing disabled. See #22528.
+
 
 ``base`` library
 ~~~~~~~~~~~~~~~~
@@ -149,9 +152,9 @@ Runtime system
 
 - Updated to `Unicode 15.0.0 <https://www.unicode.org/versions/Unicode15.0.0/>`_.
 
-- Add standard Unicode case predicates :base-ref:`Data.Char.isUpperCase` and 
-  :base-ref:`Data.Char.isLowerCase`. These predicates use the standard Unicode 
-  case properties and are more intuitive than :base-ref:`Data.Char.isUpper` and 
+- Add standard Unicode case predicates :base-ref:`Data.Char.isUpperCase` and
+  :base-ref:`Data.Char.isLowerCase`. These predicates use the standard Unicode
+  case properties and are more intuitive than :base-ref:`Data.Char.isUpper` and
   :base-ref:`Data.Char.isLower`.
 
 ``ghc-prim`` library
diff --git a/rts/Sparks.c b/rts/Sparks.c
index 6e2c0f9fb6..3f8980485a 100644
--- a/rts/Sparks.c
+++ b/rts/Sparks.c
@@ -79,6 +79,34 @@ newSpark (StgRegTable *reg, StgClosure *p)
     return 1;
 }
 
+/* Note [Pruning the spark pool]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+pruneSparkQueue checks if closures have been evacuated to know weither or
+not a spark can be GCed. If it was evacuated it's live and we keep the spark
+alive. If it hasn't been evacuated at the end of GC we can assume it's dead and
+remove the spark from the pool.
+
+To make this sound we must ensure GC has finished evacuating live objects before
+we prune the spark pool. Otherwise we might GC a spark before it has been evaluated.
+
+* If we run sequential GC then the GC Lead simply prunes after
+everything has been evacuated.
+
+* If we run parallel gc without work stealing then GC workers are not synchronized
+at any point before the worker returns. So we leave it to the GC Lead to prune
+sparks once evacuation has been finished and all workers returned.
+
+* If work stealing is enabled all GC threads will be running
+scavenge_until_all_done until regular heap marking is done. After which
+all workers will converge on a synchronization point. This means
+we can perform spark pruning inside the GC workers at this point.
+The only wart is that if we prune sparks locally we might
+miss sparks reachable via weak pointers as these are marked in the main
+thread concurrently to the calls to pruneSparkQueue. To fix this problem would
+require a GC barrier but that seems to high a price to pay.
+*/
+
+
 /* --------------------------------------------------------------------------
  * Remove all sparks from the spark queues which should not spark any
  * more.  Called after GC. We assume exclusive access to the structure
@@ -181,7 +209,7 @@ pruneSparkQueue (bool nonmovingMarkFinished, Capability *cap)
           cap->spark_stats.fizzled++;
           traceEventSparkFizzle(cap);
       } else {
-          info = spark->header.info;
+          info = RELAXED_LOAD(&spark->header.info);
           load_load_barrier();
           if (IS_FORWARDING_PTR(info)) {
               tmp = (StgClosure*)UN_FORWARDING_PTR(info);
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 5bfee440a3..2438ad2816 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -292,6 +292,7 @@ GarbageCollect (uint32_t collect_gen,
       any_work, scav_find_work, max_n_todo_overflow;
 #if defined(THREADED_RTS)
   gc_thread *saved_gct;
+  bool gc_sparks_all_caps;
 #endif
   uint32_t g, n;
   // The time we should report our heap census as occurring at, if necessary.
@@ -559,6 +560,9 @@ GarbageCollect (uint32_t collect_gen,
   StgTSO *resurrected_threads = END_TSO_QUEUE;
   // must be last...  invariant is that everything is fully
   // scavenged at this point.
+#if defined(THREADED_RTS)
+  gc_sparks_all_caps = !work_stealing || !is_par_gc();
+#endif
   work_stealing = false;
   while (traverseWeakPtrList(&dead_weak_ptr_list, &resurrected_threads))
   {
@@ -571,8 +575,9 @@ GarbageCollect (uint32_t collect_gen,
   gcStableNameTable();
 
 #if defined(THREADED_RTS)
-  if (!is_par_gc()) {
-      for (n = 0; n < getNumCapabilities(); n++) {
+  // See Note [Pruning the spark pool]
+  if(gc_sparks_all_caps) {
+      for (n = 0; n < n_capabilities; n++) {
           pruneSparkQueue(false, getCapability(n));
       }
   } else {
@@ -1379,7 +1384,6 @@ void
 gcWorkerThread (Capability *cap)
 {
     gc_thread *saved_gct;
-
     // necessary if we stole a callee-saves register for gct:
     saved_gct = gct;
 
@@ -1410,13 +1414,10 @@ gcWorkerThread (Capability *cap)
     scavenge_until_all_done();
 
 #if defined(THREADED_RTS)
-    // Now that the whole heap is marked, we discard any sparks that
-    // were found to be unreachable.  The main GC thread is currently
-    // marking heap reachable via weak pointers, so it is
-    // non-deterministic whether a spark will be retained if it is
-    // only reachable via weak pointers.  To fix this problem would
-    // require another GC barrier, which is too high a price.
-    pruneSparkQueue(false, cap);
+    // See Note [Pruning the spark pool]
+    if(work_stealing && is_par_gc()) {
+        pruneSparkQueue(false, cap);
+    }
 #endif
 
     // Wait until we're told to continue