diff options
| author | Simon Marlow <marlowsd@gmail.com> | 2012-02-13 11:17:50 +0000 | 
|---|---|---|
| committer | Simon Marlow <marlowsd@gmail.com> | 2012-02-13 12:26:10 +0000 | 
| commit | 67f4ab7e6b7705a9d617c6109a8c5434ede13cae (patch) | |
| tree | 8f1ed63f526c3a88a4f234c9a3d5b5ac2a9eb0c6 | |
| parent | 86ebfef9a5acc60b7a2ce3c8f025e6e707f17f87 (diff) | |
| download | haskell-67f4ab7e6b7705a9d617c6109a8c5434ede13cae.tar.gz | |
Allocate pinned object blocks from the nursery, not the global
allocator.
Prompted by a benchmark posted to parallel-haskell@haskell.org by
Andreas Voellmy <andreas.voellmy@gmail.com>.  This program exhibits
contention for the block allocator when run with -N2 and greater
without the fix:
{-# LANGUAGE MagicHash, UnboxedTuples, BangPatterns #-}
module Main where
import Control.Monad
import Control.Concurrent
import System.Environment
import GHC.IO
import GHC.Exts
import GHC.Conc
main = do
 [m] <- fmap (fmap read) getArgs
 n <- getNumCapabilities
 ms <- replicateM n newEmptyMVar
 sequence [ forkIO $ busyWorkerB (m `quot` n) >> putMVar mv () | mv <- ms ]
 mapM takeMVar ms
busyWorkerB :: Int -> IO ()
busyWorkerB n_loops = go 0
  where go !n | n >= n_loops = return ()
              | otherwise    =
          do p <- (IO $ \s ->
                    case newPinnedByteArray# 1024# s      of
                      { (# s', mbarr# #) ->
                           (# s', () #)
                      }
                  )
             go (n+1)
| -rw-r--r-- | rts/Capability.c | 1 | ||||
| -rw-r--r-- | rts/Capability.h | 2 | ||||
| -rw-r--r-- | rts/sm/GC.c | 42 | ||||
| -rw-r--r-- | rts/sm/Sanity.c | 3 | ||||
| -rw-r--r-- | rts/sm/Storage.c | 59 | 
5 files changed, 94 insertions, 13 deletions
| diff --git a/rts/Capability.c b/rts/Capability.c index 54f9196b99..2cb3042088 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -273,6 +273,7 @@ initCapability( Capability *cap, nat i )      cap->transaction_tokens = 0;      cap->context_switch = 0;      cap->pinned_object_block = NULL; +    cap->pinned_object_blocks = NULL;  #ifdef PROFILING      cap->r.rCCCS = CCS_SYSTEM; diff --git a/rts/Capability.h b/rts/Capability.h index 2ae2fcf6d7..2f616b5c6f 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -76,6 +76,8 @@ struct Capability_ {      // block for allocating pinned objects into      bdescr *pinned_object_block; +    // full pinned object blocks allocated since the last GC +    bdescr *pinned_object_blocks;      // Context switch flag.  When non-zero, this means: stop running      // Haskell code, and switch threads. diff --git a/rts/sm/GC.c b/rts/sm/GC.c index aeadf6f42f..86231948c1 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -150,6 +150,7 @@ static StgWord dec_running          (void);  static void wakeup_gc_threads       (nat me);  static void shutdown_gc_threads     (nat me);  static void collect_gct_blocks      (void); +static lnat collect_pinned_object_blocks (void);  #if 0 && defined(DEBUG)  static void gcCAFs                  (void); @@ -285,6 +286,10 @@ GarbageCollect (rtsBool force_major_gc,    // check sanity *before* GC    IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc)); +  // gather blocks allocated using allocatePinned() from each capability +  // and put them on the g0->large_object list. +  collect_pinned_object_blocks(); +    // Initialise all the generations/steps that we're collecting.    for (g = 0; g <= N; g++) {        prepare_collected_gen(&generations[g]); @@ -1422,6 +1427,43 @@ collect_gct_blocks (void)  }  /* ----------------------------------------------------------------------------- +   During mutation, any blocks that are filled by allocatePinned() are +   stashed on the local pinned_object_blocks list, to avoid needing to +   take a global lock.  Here we collect those blocks from the +   cap->pinned_object_blocks lists and put them on the +   main g0->large_object list. + +   Returns: the number of words allocated this way, for stats +   purposes. +   -------------------------------------------------------------------------- */ + +static lnat +collect_pinned_object_blocks (void) +{ +    nat n; +    bdescr *bd, *prev; +    lnat allocated = 0; + +    for (n = 0; n < n_capabilities; n++) { +        prev = NULL; +        for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) { +            allocated += bd->free - bd->start; +            prev = bd; +        } +        if (prev != NULL) { +            prev->link = g0->large_objects; +            if (g0->large_objects != NULL) { +                g0->large_objects->u.back = prev; +            } +            g0->large_objects = capabilities[n].pinned_object_blocks; +            capabilities[n].pinned_object_blocks = 0; +        } +    } + +    return allocated; +} + +/* -----------------------------------------------------------------------------     Initialise a gc_thread before GC     -------------------------------------------------------------------------- */ diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index b6c5926ab8..78ecc96e0a 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -869,7 +869,7 @@ memInventory (rtsBool show)            gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].part_list);            gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].scavd_list);            gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].todo_bd); -      }	   +      }        gen_blocks[g] += genBlocks(&generations[g]);    } @@ -880,6 +880,7 @@ memInventory (rtsBool show)        if (capabilities[i].pinned_object_block != NULL) {            nursery_blocks += capabilities[i].pinned_object_block->blocks;        } +      nursery_blocks += countBlocks(capabilities[i].pinned_object_blocks);    }    retainer_blocks = 0; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 012ba514db..0ff37d2582 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -744,8 +744,54 @@ allocatePinned (Capability *cap, lnat n)      bd = cap->pinned_object_block;      // If we don't have a block of pinned objects yet, or the current -    // one isn't large enough to hold the new object, allocate a new one. +    // one isn't large enough to hold the new object, get a new one.      if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { + +        // stash the old block on cap->pinned_object_blocks.  On the +        // next GC cycle these objects will be moved to +        // g0->large_objects. +        if (bd != NULL) { +            dbl_link_onto(bd, &cap->pinned_object_blocks); +        } + +        // We need to find another block.  We could just allocate one, +        // but that means taking a global lock and we really want to +        // avoid that (benchmarks that allocate a lot of pinned +        // objects scale really badly if we do this). +        // +        // So first, we try taking the next block from the nursery, in +        // the same way as allocate(), but note that we can only take +        // an *empty* block, because we're about to mark it as +        // BF_PINNED | BF_LARGE. +        bd = cap->r.rCurrentNursery->link; +        if (bd == NULL || bd->free != bd->start) { // must be empty! +            // The nursery is empty, or the next block is non-empty: +            // allocate a fresh block (we can't fail here). + +            // XXX in the case when the next nursery block is +            // non-empty we aren't exerting any pressure to GC soon, +            // so if this case ever happens then we could in theory +            // keep allocating for ever without calling the GC. We +            // can't bump g0->n_new_large_words because that will be +            // counted towards allocation, and we're already counting +            // our pinned obects as allocation in +            // collect_pinned_object_blocks in the GC. +            ACQUIRE_SM_LOCK; +            bd = allocBlock(); +            RELEASE_SM_LOCK; +            initBdescr(bd, g0, g0); +        } else { +            // we have a block in the nursery: steal it +            cap->r.rCurrentNursery->link = bd->link; +            if (bd->link != NULL) { +                bd->link->u.back = cap->r.rCurrentNursery; +            } +            cap->r.rNursery->n_blocks--; +        } + +        cap->pinned_object_block = bd; +        bd->flags  = BF_PINNED | BF_LARGE | BF_EVACUATED; +          // The pinned_object_block remains attached to the capability          // until it is full, even if a GC occurs.  We want this          // behaviour because otherwise the unallocated portion of the @@ -759,17 +805,6 @@ allocatePinned (Capability *cap, lnat n)          // the next GC the BF_EVACUATED flag will be cleared, and the          // block will be promoted as usual (if anything in it is          // live). -        ACQUIRE_SM_LOCK; -        if (bd != NULL) { -            dbl_link_onto(bd, &g0->large_objects); -            g0->n_large_blocks++; -            g0->n_new_large_words += bd->free - bd->start; -        } -        cap->pinned_object_block = bd = allocBlock(); -        RELEASE_SM_LOCK; -        initBdescr(bd, g0, g0); -        bd->flags  = BF_PINNED | BF_LARGE | BF_EVACUATED; -	bd->free   = bd->start;      }      p = bd->free; | 
