/* ---------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2000-2008
 *
 * Sparking support for THREADED_RTS version of the RTS.
 *
 -------------------------------------------------------------------------*/

#include "rts/PosixSource.h"
#include "Rts.h"

#include "Schedule.h"
#include "RtsUtils.h"
#include "Trace.h"
#include "Prelude.h"
#include "Sparks.h"
#include "ThreadLabels.h"
#include "sm/NonMovingMark.h"
#include "sm/HeapAlloc.h"

#if defined(THREADED_RTS)

SparkPool *
allocSparkPool( void )
{
    return newWSDeque(RtsFlags.ParFlags.maxLocalSparks);
}

void
freeSparkPool (SparkPool *pool)
{
    freeWSDeque(pool);
}

/* -----------------------------------------------------------------------------
 *
 * Turn a spark into a real thread
 *
 * -------------------------------------------------------------------------- */

void
createSparkThread (Capability *cap)
{
    StgTSO *tso;

    tso = createIOThread (cap, RtsFlags.GcFlags.initialStkSize,
                          (StgClosure *)runSparks_closure);
    setThreadLabel(cap, tso, "spark evaluator");
    traceEventCreateSparkThread(cap, tso->id);

    appendToRunQueue(cap,tso);
}

/* --------------------------------------------------------------------------
 * newSpark: create a new spark, as a result of calling "par"
 * Called directly from STG.
 * -------------------------------------------------------------------------- */

StgInt
newSpark (StgRegTable *reg, StgClosure *p)
{
    Capability *cap = regTableToCapability(reg);
    SparkPool *pool = cap->sparks;

    if (!fizzledSpark(p)) {
        if (pushWSDeque(pool,p)) {
            cap->spark_stats.created++;
            traceEventSparkCreate(cap);
        } else {
            /* overflowing the spark pool */
            cap->spark_stats.overflowed++;
            traceEventSparkOverflow(cap);
        }
    } else {
        cap->spark_stats.dud++;
        traceEventSparkDud(cap);
    }

    return 1;
}

/* Note [Pruning the spark pool]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pruneSparkQueue checks if closures have been evacuated to know weither or
not a spark can be GCed. If it was evacuated it's live and we keep the spark
alive. If it hasn't been evacuated at the end of GC we can assume it's dead and
remove the spark from the pool.

To make this sound we must ensure GC has finished evacuating live objects before
we prune the spark pool. Otherwise we might GC a spark before it has been evaluated.

* If we run sequential GC then the GC Lead simply prunes after
everything has been evacuated.

* If we run parallel gc without work stealing then GC workers are not synchronized
at any point before the worker returns. So we leave it to the GC Lead to prune
sparks once evacuation has been finished and all workers returned.

* If work stealing is enabled all GC threads will be running
scavenge_until_all_done until regular heap marking is done. After which
all workers will converge on a synchronization point. This means
we can perform spark pruning inside the GC workers at this point.
The only wart is that if we prune sparks locally we might
miss sparks reachable via weak pointers as these are marked in the main
thread concurrently to the calls to pruneSparkQueue. To fix this problem would
require a GC barrier but that seems to high a price to pay.
*/


/* --------------------------------------------------------------------------
 * Remove all sparks from the spark queues which should not spark any
 * more.  Called after GC. We assume exclusive access to the structure
 * and replace  all sparks in the queue, see explanation below. At exit,
 * the spark pool only contains sparkable closures.
 * -------------------------------------------------------------------------- */

void
pruneSparkQueue (bool nonmovingMarkFinished, Capability *cap)
{
    SparkPool *pool;
    StgClosurePtr spark, tmp, *elements;
    uint32_t n, pruned_sparks; // stats only
    StgInt botInd,oldBotInd,currInd; // indices in array (always < size)
    const StgInfoTable *info;

    n = 0;
    pruned_sparks = 0;

    pool = cap->sparks;

    // it is possible that top > bottom, indicating an empty pool.  We
    // fix that here; this is only necessary because the loop below
    // assumes it.
    if (pool->top > pool->bottom)
        pool->top = pool->bottom;

    // Take this opportunity to reset top/bottom modulo the size of
    // the array, to avoid overflow.  This is only possible because no
    // stealing is happening during GC.
    pool->bottom  -= pool->top & ~pool->moduloSize;
    pool->top     &= pool->moduloSize;

    debugTrace(DEBUG_sparks,
               "markSparkQueue: current spark queue len=%ld; (hd=%ld; tl=%ld)",
               sparkPoolSize(pool), pool->bottom, pool->top);

    ASSERT_WSDEQUE_INVARIANTS(pool);

    elements = (StgClosurePtr *)pool->elements;

    /* We have exclusive access to the structure here, so we can reset
       bottom and top counters, and prune invalid sparks. Contents are
       copied in-place if they are valuable, otherwise discarded. The
       routine uses "real" indices t and b, starts by computing them
       as the modulus size of top and bottom,

       Copying:

       At the beginning, the pool structure can look like this:
       ( bottom % size >= top % size , no wrap-around)
                  t          b
       ___________***********_________________

       or like this ( bottom % size < top % size, wrap-around )
                  b         t
       ***********__________******************
       As we need to remove useless sparks anyway, we make one pass
       between t and b, moving valuable content to b and subsequent
       cells (wrapping around when the size is reached).

                     b      t
       ***********OOO_______XX_X__X?**********
                     ^____move?____/

       After this movement, botInd becomes the new bottom, and old
       bottom becomes the new top index, both as indices in the array
       size range.
    */
    // starting here
    currInd = (pool->top) & (pool->moduloSize); // mod

    // copies of evacuated closures go to space from botInd on
    // we keep oldBotInd to know when to stop
    oldBotInd = botInd = (pool->bottom) & (pool->moduloSize); // mod

    // on entry to loop, we are within the bounds
    ASSERT( currInd < pool->size && botInd  < pool->size );

    while (currInd != oldBotInd ) {
      /* must use != here, wrap-around at size
         subtle: loop not entered if queue empty
       */

      /* check element at currInd. if valuable, evacuate and move to
         botInd, otherwise move on */
      spark = elements[currInd];

      // We have to be careful here: in the parallel GC, another
      // thread might evacuate this closure while we're looking at it,
      // so grab the info pointer just once.
      if (GET_CLOSURE_TAG(spark) != 0) {
          // Tagged pointer is a value, so the spark has fizzled.  It
          // probably never happens that we get a tagged pointer in
          // the spark pool, because we would have pruned the spark
          // during the previous GC cycle if it turned out to be
          // evaluated, but it doesn't hurt to have this check for
          // robustness.
          pruned_sparks++;
          cap->spark_stats.fizzled++;
          traceEventSparkFizzle(cap);
      } else {
          info = RELAXED_LOAD(&spark->header.info);
          load_load_barrier();
          if (IS_FORWARDING_PTR(info)) {
              tmp = (StgClosure*)UN_FORWARDING_PTR(info);
              /* if valuable work: shift inside the pool */
              if (closure_SHOULD_SPARK(tmp)) {
                  elements[botInd] = tmp; // keep entry (new address)
                  botInd++;
                  n++;
              } else {
                  pruned_sparks++; // discard spark
                  cap->spark_stats.fizzled++;
                  traceEventSparkFizzle(cap);
              }
          } else if (HEAP_ALLOCED(spark)) {
              bdescr *spark_bd = Bdescr((P_) spark);
              bool is_alive = false;
              if (nonmovingMarkFinished) {
                  // See Note [Spark management under the nonmoving collector]
                  // in NonMoving.c.
                  // If we just concluded concurrent marking then we can rely
                  // on the mark bitmap to reflect whether sparks living in the
                  // non-moving heap have died.
                  if (spark_bd->flags & BF_NONMOVING) {
                      is_alive = nonmovingIsAlive(spark);
                  } else {
                      // The nonmoving collector doesn't collect anything
                      // outside of the non-moving heap.
                      is_alive = true;
                  }
              } else if (spark_bd->flags & (BF_EVACUATED | BF_NONMOVING)) {
                  is_alive = true;
              }

              if (is_alive) {
                  if (closure_SHOULD_SPARK(spark)) {
                      elements[botInd] = spark; // keep entry (new address)
                      botInd++;
                      n++;
                  } else {
                      pruned_sparks++; // discard spark
                      cap->spark_stats.fizzled++;
                      traceEventSparkFizzle(cap);
                  }
              } else {
                  pruned_sparks++; // discard spark
                  cap->spark_stats.gcd++;
                  traceEventSparkGC(cap);
              }
          } else {
              if (INFO_PTR_TO_STRUCT(info)->type == THUNK_STATIC) {
                  // We can't tell whether a THUNK_STATIC is garbage or not.
                  // See also Note [STATIC_LINK fields]
                  // isAlive() also ignores static closures (see GCAux.c)
                  elements[botInd] = spark; // keep entry (new address)
                  botInd++;
                  n++;
              } else {
                  pruned_sparks++; // discard spark
                  cap->spark_stats.fizzled++;
                  traceEventSparkFizzle(cap);
              }
          }
      }

      currInd++;

      // in the loop, we may reach the bounds, and instantly wrap around
      ASSERT( currInd <= pool->size && botInd <= pool->size );
      if ( currInd == pool->size ) { currInd = 0; }
      if ( botInd == pool->size )  { botInd = 0;  }

    } // while-loop over spark pool elements

    ASSERT(currInd == oldBotInd);

    pool->top = oldBotInd; // where we started writing

    pool->bottom = (oldBotInd <= botInd) ? botInd : (botInd + pool->size);
    // first free place we did not use (corrected by wraparound)

    debugTrace(DEBUG_sparks, "pruned %d sparks", pruned_sparks);

    debugTrace(DEBUG_sparks,
               "new spark queue len=%ld; (hd=%ld; tl=%ld)",
               sparkPoolSize(pool), pool->bottom, pool->top);

    ASSERT_WSDEQUE_INVARIANTS(pool);
}

/* GC for the spark pool, called inside Capability.c for all
   capabilities in turn. Blindly "evac"s complete spark pool. */
void
traverseSparkQueue (evac_fn evac, void *user, Capability *cap)
{
    StgClosure **sparkp;
    SparkPool *pool;
    StgWord top,bottom, modMask;

    pool = cap->sparks;

    ASSERT_WSDEQUE_INVARIANTS(pool);

    top = pool->top;
    bottom = pool->bottom;
    sparkp = (StgClosurePtr*)pool->elements;
    modMask = pool->moduloSize;

    while (top < bottom) {
    /* call evac for all closures in range (wrap-around via modulo)
     * In GHC-6.10, evac takes an additional 1st argument to hold a
     * GC-specific register, see rts/sm/GC.c::mark_root()
     */
      evac( user , sparkp + (top & modMask) );
      top++;
    }

    debugTrace(DEBUG_sparks,
               "traversed spark queue, len=%ld; (hd=%ld; tl=%ld)",
               sparkPoolSize(pool), pool->bottom, pool->top);
}

#else

StgInt
newSpark (StgRegTable *reg STG_UNUSED, StgClosure *p STG_UNUSED)
{
    /* nothing */
    return 1;
}

#endif /* THREADED_RTS */