Avoid accumulating slop in the pinned_object_block.

The pinned_object_block is where we allocate small pinned ByteArray# objects. At a GC the pinned_object_block was being treated like other large objects and promoted to the next step/generation, even if it was only partly full. Under some ByteString-heavy workloads this would accumulate on average 2k of slop per GC, and this memory is never released until the ByteArray# objects in the block are freed. So now, we keep allocating into the pinned_object_block until it is completely full, at which point it is handed over to the GC as before. The pinned_object_block might therefore contain objects which a large range of ages, but I don't think this is any worse than the situation before. We still have the fragmentation issue in general, but the new scheme can improve the memory overhead for some workloads dramatically.
author: Simon Marlow <marlowsd@gmail.com> 2011-04-14 08:59:39 +0100
committer: Simon Marlow <marlowsd@gmail.com> 2011-04-14 09:01:52 +0100
commit: cc2ea98ac4a15e40a15e89de9e47f33e191ba393 (patch)
tree: f3ed770c0908c21b13d9f86afb326d5e01e3d28c /rts/sm
parent: 25297a13bd6be722a74ee87237498a5626b298ef (diff)
download: haskell-cc2ea98ac4a15e40a15e89de9e47f33e191ba393.tar.gz
3 files changed, 24 insertions, 10 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 05bc8f22fb..30361401cc 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -597,11 +597,6 @@ GarbageCollect (rtsBool force_major_gc,
   // update the max size of older generations after a major GC
   resize_generations();
   
-  // Start a new pinned_object_block
-  for (n = 0; n < n_capabilities; n++) {
-      capabilities[n].pinned_object_block = NULL;
-  }
-
   // Free the mark stack.
   if (mark_stack_top_bd != NULL) {
       debugTrace(DEBUG_gc, "mark stack: %d blocks",
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 8ebb9a2513..0ec552c047 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -789,6 +789,7 @@ findMemoryLeak (void)
 
     for (i = 0; i < n_capabilities; i++) {
         markBlocks(nurseries[i].blocks);
+        markBlocks(capabilities[i].pinned_object_block);
     }
 
 #ifdef PROFILING
@@ -880,6 +881,9 @@ memInventory (rtsBool show)
   for (i = 0; i < n_capabilities; i++) {
       ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks);
       nursery_blocks += nurseries[i].n_blocks;
+      if (capabilities[i].pinned_object_block != NULL) {
+          nursery_blocks += capabilities[i].pinned_object_block->blocks;
+      }
   }
 
   retainer_blocks = 0;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index ae3433a9b2..f8a9e559bf 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -657,17 +657,32 @@ allocatePinned (Capability *cap, lnat n)
     // If we don't have a block of pinned objects yet, or the current
     // one isn't large enough to hold the new object, allocate a new one.
     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
+        // The pinned_object_block remains attached to the capability
+        // until it is full, even if a GC occurs.  We want this
+        // behaviour because otherwise the unallocated portion of the
+        // block would be forever slop, and under certain workloads
+        // (allocating a few ByteStrings per GC) we accumulate a lot
+        // of slop.
+        //
+        // So, the pinned_object_block is initially marked
+        // BF_EVACUATED so the GC won't touch it.  When it is full,
+        // we place it on the large_objects list, and at the start of
+        // the next GC the BF_EVACUATED flag will be cleared, and the
+        // block will be promoted as usual (if anything in it is
+        // live).
         ACQUIRE_SM_LOCK;
-	cap->pinned_object_block = bd = allocBlock();
-	dbl_link_onto(bd, &g0->large_objects);
-	g0->n_large_blocks++;
+        if (bd != NULL) {
+            dbl_link_onto(bd, &g0->large_objects);
+            g0->n_large_blocks++;
+            g0->n_new_large_words += bd->free - bd->start;
+        }
+        cap->pinned_object_block = bd = allocBlock();
         RELEASE_SM_LOCK;
         initBdescr(bd, g0, g0);
-	bd->flags  = BF_PINNED | BF_LARGE;
+        bd->flags  = BF_PINNED | BF_LARGE | BF_EVACUATED;
 	bd->free   = bd->start;
     }
 
-    g0->n_new_large_words += n;
     p = bd->free;
     bd->free += n;
     return p;
author	Simon Marlow <marlowsd@gmail.com>	2011-04-14 08:59:39 +0100
committer	Simon Marlow <marlowsd@gmail.com>	2011-04-14 09:01:52 +0100
commit	cc2ea98ac4a15e40a15e89de9e47f33e191ba393 (patch)
tree	f3ed770c0908c21b13d9f86afb326d5e01e3d28c /rts/sm
parent	25297a13bd6be722a74ee87237498a5626b298ef (diff)
download	haskell-cc2ea98ac4a15e40a15e89de9e47f33e191ba393.tar.gz