diff options
author | Ömer Sinan Ağacan <omeragacan@gmail.com> | 2019-09-09 14:54:21 +0300 |
---|---|---|
committer | Ömer Sinan Ağacan <omeragacan@gmail.com> | 2019-09-11 11:14:58 +0300 |
commit | 7a5e3725a2d1da706c34582edac7bb147c7a5247 (patch) | |
tree | 29fb3bb4429700fc884ed42037a3fdd5513f73fd | |
parent | 270fbe8512f04b6107755fa22bdec62205c0a567 (diff) | |
download | haskell-wip/osa1/min_payload_size.tar.gz |
Remove min payload size restriction from the compiler and RTSwip/osa1/min_payload_size
Previously we'd require every heap object to have at least one word
space in the payload. This was *incorrectly* documented as follows in
the source code: (now deleted)
Minimum closure sizes
This is the minimum number of words in the payload of a
heap-allocated closure, so that the closure has enough room to be
overwritten with a forwarding pointer during garbage collection.
This comment was wrong: the GC doesn't require a location in the
payload, forwarding pointers are made using the info table pointer
location.
The real reason why we needed a word in the payload is because
mark-compact collector allocates one bit per word in the heap for
bitmap, but needs two bits per object to work efficiently. Details are
explained in the new note Note [Mark bits in mark-compact collector] in
Compact.h.
We now allocate two bits in the bitmap per word and remove the
restriction. Heap objects can now have size 1, as demonstrated in the
updated tests.
Nofib results:
- 0.0% change in binary sizes and TotalMem
- "contaraints" now allocates -0.1% less. No other changes in
allocations.
- Runtime/Elapsed results are noisy and inconclusive
-rw-r--r-- | compiler/cmm/CmmParse.y | 11 | ||||
-rw-r--r-- | compiler/cmm/SMRep.hs | 22 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmClosure.hs | 4 | ||||
-rw-r--r-- | compiler/ghci/ByteCodeItbls.hs | 6 | ||||
-rw-r--r-- | includes/rts/Constants.h | 10 | ||||
-rw-r--r-- | libraries/ghc-heap/tests/closure_size.hs | 3 | ||||
-rw-r--r-- | libraries/ghc-heap/tests/heap_all.hs | 4 | ||||
-rw-r--r-- | rts/Interpreter.c | 2 | ||||
-rw-r--r-- | rts/sm/Compact.c | 19 | ||||
-rw-r--r-- | rts/sm/Compact.h | 75 | ||||
-rw-r--r-- | rts/sm/Evac.c | 8 | ||||
-rw-r--r-- | rts/sm/GC.c | 15 | ||||
-rw-r--r-- | rts/sm/GCAux.c | 2 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 4 | ||||
-rw-r--r-- | utils/deriveConstants/Main.hs | 4 |
15 files changed, 105 insertions, 84 deletions
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index f563145250..50bb15930b 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -466,8 +466,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } do dflags <- getDynFlags let prof = profilingInfo dflags $11 $13 rep = mkRTSRep (fromIntegral $9) $ - mkHeapRep dflags False (fromIntegral $5) - (fromIntegral $7) Thunk + mkHeapRep False (fromIntegral $5) (fromIntegral $7) Thunk -- not really Thunk, but that makes the info table -- we want. return (mkCmmEntryLabel pkg $3, @@ -484,8 +483,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } ty = Fun 0 (ArgSpec (fromIntegral $15)) -- Arity zero, arg_type $15 rep = mkRTSRep (fromIntegral $9) $ - mkHeapRep dflags False (fromIntegral $5) - (fromIntegral $7) ty + mkHeapRep False (fromIntegral $5) (fromIntegral $7) ty return (mkCmmEntryLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 , cit_rep = rep @@ -502,8 +500,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } ty = Constr (fromIntegral $9) -- Tag (BS8.pack $13) rep = mkRTSRep (fromIntegral $11) $ - mkHeapRep dflags False (fromIntegral $5) - (fromIntegral $7) ty + mkHeapRep False (fromIntegral $5) (fromIntegral $7) ty return (mkCmmEntryLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 , cit_rep = rep @@ -520,7 +517,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } let prof = profilingInfo dflags $9 $11 ty = ThunkSelector (fromIntegral $5) rep = mkRTSRep (fromIntegral $7) $ - mkHeapRep dflags False 0 0 ty + mkHeapRep False 0 0 ty return (mkCmmEntryLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 , cit_rep = rep diff --git a/compiler/cmm/SMRep.hs b/compiler/cmm/SMRep.hs index 49137eff25..f2db1b136e 100644 --- a/compiler/cmm/SMRep.hs +++ b/compiler/cmm/SMRep.hs @@ -222,20 +222,9 @@ data ArgDescr ----------------------------------------------------------------------------- -- Construction -mkHeapRep :: DynFlags -> IsStatic -> WordOff -> WordOff -> ClosureTypeInfo - -> SMRep -mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type_info - = HeapRep is_static - ptr_wds - (nonptr_wds + slop_wds) - cl_type_info - where - slop_wds - | is_static = 0 - | otherwise = max 0 (minClosureSize dflags - (hdr_size + payload_size)) - - hdr_size = closureTypeHdrSize dflags cl_type_info - payload_size = ptr_wds + nonptr_wds +mkHeapRep :: IsStatic -> WordOff -> WordOff -> ClosureTypeInfo -> SMRep +mkHeapRep is_static ptr_wds nonptr_wds cl_type_info + = HeapRep is_static ptr_wds nonptr_wds cl_type_info mkRTSRep :: Int -> SMRep -> SMRep mkRTSRep = RTSRep @@ -310,11 +299,6 @@ profHdrSize dflags | gopt Opt_SccProfilingOn dflags = pROF_HDR_SIZE dflags | otherwise = 0 --- | The garbage collector requires that every closure is at least as --- big as this. -minClosureSize :: DynFlags -> WordOff -minClosureSize dflags = fixedHdrSizeW dflags + mIN_PAYLOAD_SIZE dflags - arrWordsHdrSize :: DynFlags -> ByteOff arrWordsHdrSize dflags = fixedHdrSize dflags + sIZEOF_StgArrBytes_NoHdr dflags diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index ac8db1268f..d47ea3a760 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -698,7 +698,7 @@ mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr , closureProf = prof } -- (we don't have an SRT yet) where name = idName id - sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info) + sm_rep = mkHeapRep is_static ptr_wds nonptr_wds (lfClosureType lf_info) prof = mkProfilingInfo dflags id val_descr nonptr_wds = tot_wds - ptr_wds @@ -966,7 +966,7 @@ mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds where name = dataConName data_con info_lbl = mkConInfoTableLabel name NoCafRefs - sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type + sm_rep = mkHeapRep is_static ptr_wds nonptr_wds cl_type cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con) -- We keep the *zero-indexed* tag in the srt_len field -- of the info table of a data constructor. diff --git a/compiler/ghci/ByteCodeItbls.hs b/compiler/ghci/ByteCodeItbls.hs index 7381c8f926..84041437be 100644 --- a/compiler/ghci/ByteCodeItbls.hs +++ b/compiler/ghci/ByteCodeItbls.hs @@ -13,7 +13,6 @@ import GhcPrelude import ByteCodeTypes import GHCi -import DynFlags import HscTypes import Name ( Name, getName ) import NameEnv @@ -65,12 +64,9 @@ make_constr_itbls hsc_env cons = ptrs' = ptr_wds nptrs' = tot_wds - ptr_wds - nptrs_really - | ptrs' + nptrs' >= mIN_PAYLOAD_SIZE dflags = nptrs' - | otherwise = mIN_PAYLOAD_SIZE dflags - ptrs' descr = dataConIdentity dcon - r <- iservCmd hsc_env (MkConInfoTable ptrs' nptrs_really + r <- iservCmd hsc_env (MkConInfoTable ptrs' nptrs' conNo (tagForCon dflags dcon) descr) return (getName dcon, ItblPtr r) diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h index 15ff2a43d9..c3f777f1c4 100644 --- a/includes/rts/Constants.h +++ b/includes/rts/Constants.h @@ -20,16 +20,6 @@ #pragma once /* ----------------------------------------------------------------------------- - Minimum closure sizes - - This is the minimum number of words in the payload of a - heap-allocated closure, so that the closure has enough room to be - overwritten with a forwarding pointer during garbage collection. - -------------------------------------------------------------------------- */ - -#define MIN_PAYLOAD_SIZE 1 - -/* ----------------------------------------------------------------------------- Constants to do with specialised closure types. -------------------------------------------------------------------------- */ diff --git a/libraries/ghc-heap/tests/closure_size.hs b/libraries/ghc-heap/tests/closure_size.hs index d760f22efa..d5bace42e2 100644 --- a/libraries/ghc-heap/tests/closure_size.hs +++ b/libraries/ghc-heap/tests/closure_size.hs @@ -18,9 +18,10 @@ data APC a = APC a main :: IO () main = do + assertSize False 1 + assertSize (Nothing :: Maybe ()) 1 assertSize 'a' 2 assertSize (Just ()) 2 - assertSize (Nothing :: Maybe ()) 2 assertSize ((1,2) :: (Int,Int)) 3 assertSize ((1,2,3) :: (Int,Int,Int)) 4 diff --git a/libraries/ghc-heap/tests/heap_all.hs b/libraries/ghc-heap/tests/heap_all.hs index 1560d4d9e8..a234b34db1 100644 --- a/libraries/ghc-heap/tests/heap_all.hs +++ b/libraries/ghc-heap/tests/heap_all.hs @@ -44,9 +44,9 @@ exConstrClosure = ConstrClosure exFunClosure :: Closure exFunClosure = FunClosure - { info = exItbl{tipe=FUN_0_1, ptrs=0, nptrs=1} + { info = exItbl{tipe=FUN, ptrs=0, nptrs=0} , ptrArgs = [] - , dataArgs = [0] + , dataArgs = [] } exThunkClosure :: Closure diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 2a886ff8a4..53d6a92cec 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -148,7 +148,7 @@ STATIC_INLINE StgPtr allocate_NONUPD (Capability *cap, int n_words) { - return allocate(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words)); + return allocate(cap, stg_max(sizeofW(StgHeader), n_words)); } int rts_stop_next_breakpoint = 0; diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index 3bfefa7ceb..6ccb5ad444 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -787,7 +787,7 @@ update_fwd_compact( bdescr *blocks ) while (p < bd->free ) { - while ( p < bd->free && !is_marked(p,bd) ) { + while ( p < bd->free && !is_marked_live(p,bd) ) { p++; } if (p >= bd->free) { @@ -830,15 +830,16 @@ update_fwd_compact( bdescr *blocks ) size = p - q; if (free + size > free_bd->start + BLOCK_SIZE_W) { - // set the next bit in the bitmap to indicate that - // this object needs to be pushed into the next - // block. This saves us having to run down the - // threaded info pointer list twice during the next pass. - mark(q+1,bd); + // set the next bit in the bitmap to indicate that this object + // needs to be pushed into the next block. This saves us having + // to run down the threaded info pointer list twice during the + // next pass. See Note [Mark bits in mark-compact collector] in + // Compact.h. + mark_too_large(q,bd); free_bd = free_bd->link; free = free_bd->start; } else { - ASSERT(!is_marked(q+1,bd)); + ASSERT(!is_marked_too_large(q,bd)); } unthread(q,(StgWord)free + GET_CLOSURE_TAG((StgClosure *)iptr)); @@ -873,7 +874,7 @@ update_bkwd_compact( generation *gen ) while (p < bd->free ) { - while ( p < bd->free && !is_marked(p,bd) ) { + while ( p < bd->free && !is_marked_live(p,bd) ) { p++; } if (p >= bd->free) { @@ -898,7 +899,7 @@ update_bkwd_compact( generation *gen ) } #endif - if (is_marked(p+1,bd)) { + if (is_marked_too_large(p,bd)) { // don't forget to update the free ptr in the block desc. free_bd->free = free; free_bd = free_bd->link; diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h index b0521122df..cfff313670 100644 --- a/rts/sm/Compact.h +++ b/rts/sm/Compact.h @@ -15,26 +15,85 @@ #include "BeginPrivate.h" +/* ----------------------------------------------------------------------------- + Note [Mark bits in mark-compact collector] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + In mark-compact collector each closure has two mark bits: + + - Liveness bit: not marked == unreachable (dead) + + - "Too large" bit: when this is set it means that the closure won't fit in + the current heap block and we need to move it to the next chain in the + heap. + + Why do we need the second bit? We only know a closure's size *before* + threading it, because after threading the info table pointer will be end of + the chain. So by the time we do the second pass to move the closures and + unthread chains we'd have to do two passes, one for to get the info table + pointer at the end of the chain to compute the closure size and update the + free pointer if it's too large to fit in the current block, and then another + pass to actually unthread. + + To avoid this we update the second bit when we first visit an object (in the + "forward" pass) and realize that it won't fit in the current block, and check + that bit in the second pass (where we actually move the object and update all + references). If the bit is set we move the object to the free location in the + next block in heap chain, otherwise we use the free pointer in the current + block. + -------------------------------------------------------------------------- */ + +#define MARK_COMPACT_LIVE 0 +#define MARK_COMPACT_TOO_LARGE 1 + INLINE_HEADER void -mark(StgPtr p, bdescr *bd) +mark(StgPtr p, bdescr *bd, int offset) { + ASSERT(offset == MARK_COMPACT_LIVE || offset == MARK_COMPACT_TOO_LARGE); uint32_t offset_within_block = p - bd->start; // in words - StgPtr bitmap_word = (StgPtr)bd->u.bitmap + - (offset_within_block / BITS_IN(W_)); - StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_) - 1)); + uint32_t offset_in_bitmap = offset_within_block * 2 + offset; // 2 per object + StgPtr bitmap_word = (StgPtr)bd->u.bitmap + (offset_in_bitmap / BITS_IN(W_)); + StgWord bit_mask = (StgWord)1 << (offset_in_bitmap & (BITS_IN(W_) - 1)); *bitmap_word |= bit_mask; } INLINE_HEADER StgWord -is_marked(StgPtr p, bdescr *bd) +is_marked(StgPtr p, bdescr *bd, int offset) { + // offset 0: liveness bit + // offset 1: "too large" bit + ASSERT(offset == MARK_COMPACT_LIVE || offset == MARK_COMPACT_TOO_LARGE); uint32_t offset_within_block = p - bd->start; // in words - StgPtr bitmap_word = (StgPtr)bd->u.bitmap + - (offset_within_block / BITS_IN(W_)); - StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_)- 1)); + uint32_t offset_in_bitmap = offset_within_block * 2 + offset; // 2 per object + StgPtr bitmap_word = (StgPtr)bd->u.bitmap + (offset_in_bitmap / BITS_IN(W_)); + StgWord bit_mask = (StgWord)1 << (offset_in_bitmap & (BITS_IN(W_)- 1)); return (*bitmap_word & bit_mask); } +INLINE_HEADER void +mark_live(StgPtr p, bdescr *bd) +{ + mark(p, bd, MARK_COMPACT_LIVE); +} + +INLINE_HEADER StgWord +is_marked_live(StgPtr p, bdescr *bd) +{ + return is_marked(p, bd, MARK_COMPACT_LIVE); +} + +INLINE_HEADER void +mark_too_large(StgPtr p, bdescr *bd) +{ + mark(p, bd, MARK_COMPACT_TOO_LARGE); +} + +INLINE_HEADER StgWord +is_marked_too_large(StgPtr p, bdescr *bd) +{ + return is_marked(p, bd, MARK_COMPACT_TOO_LARGE); +} + void compact (StgClosure *static_objects, StgWeak **dead_weak_ptr_list, StgTSO **resurrected_threads); diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 53a473d26c..23e50c28bc 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -600,8 +600,8 @@ loop: /* If the object is in a gen that we're compacting, then we * need to use an alternative evacuate procedure. */ - if (!is_marked((P_)q,bd)) { - mark((P_)q,bd); + if (!is_marked_live((P_)q,bd)) { + mark_live((P_)q,bd); push_mark_stack((P_)q); } return; @@ -909,8 +909,8 @@ evacuate_BLACKHOLE(StgClosure **p) return; } if (bd->flags & BF_MARKED) { - if (!is_marked((P_)q,bd)) { - mark((P_)q,bd); + if (!is_marked_live((P_)q,bd)) { + mark_live((P_)q,bd); push_mark_stack((P_)q); } return; diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 92a5e229a1..53c0d53e92 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1414,17 +1414,14 @@ prepare_collected_gen (generation *gen) // for a compacted generation, we need to allocate the bitmap if (gen->mark) { - StgWord bitmap_size; // in bytes - bdescr *bitmap_bdescr; - StgWord *bitmap; - - bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_); + // in bytes + StgWord bitmap_size = (gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_)) * 2; if (bitmap_size > 0) { - bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size) - / BLOCK_SIZE); + bdescr *bitmap_bdescr = + allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size) / BLOCK_SIZE); gen->bitmap = bitmap_bdescr; - bitmap = bitmap_bdescr->start; + StgWord *bitmap = bitmap_bdescr->start; debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p", bitmap_size, bitmap); @@ -1436,7 +1433,7 @@ prepare_collected_gen (generation *gen) // block descriptor. for (bd=gen->old_blocks; bd != NULL; bd = bd->link) { bd->u.bitmap = bitmap; - bitmap += BLOCK_SIZE_W / BITS_IN(W_); + bitmap += (BLOCK_SIZE_W / BITS_IN(W_)) * 2; // Also at this point we set the BF_MARKED flag // for this block. The invariant is that diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 650dc2c1df..9292df0a21 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -71,7 +71,7 @@ isAlive(StgClosure *p) } // check the mark bit for compacted generations - if ((bd->flags & BF_MARKED) && is_marked((P_)q,bd)) { + if ((bd->flags & BF_MARKED) && is_marked_live((P_)q,bd)) { return p; } diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 3585bd93b4..99e580e978 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -467,7 +467,7 @@ void checkHeapChain (bdescr *bd) while (p < bd->free) { uint32_t size = checkClosure((StgClosure *)p); /* This is the smallest size of closure that can live in the heap */ - ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) ); + ASSERT( size >= sizeofW(StgHeader) ); p += size; /* skip over slop */ @@ -488,7 +488,7 @@ checkHeapChunk(StgPtr start, StgPtr end) ASSERT(LOOKS_LIKE_INFO_PTR(*p)); size = checkClosure((StgClosure *)p); /* This is the smallest size of closure that can live in the heap. */ - ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) ); + ASSERT( size >= sizeofW(StgHeader) ); } } diff --git a/utils/deriveConstants/Main.hs b/utils/deriveConstants/Main.hs index 54533254dd..2b30ca8e48 100644 --- a/utils/deriveConstants/Main.hs +++ b/utils/deriveConstants/Main.hs @@ -621,10 +621,6 @@ wanteds os = concat ,constantWord Haskell "MAX_SPEC_SELECTEE_SIZE" "MAX_SPEC_SELECTEE_SIZE" ,constantWord Haskell "MAX_SPEC_AP_SIZE" "MAX_SPEC_AP_SIZE" - -- closure sizes: these do NOT include the header (see below for - -- header sizes) - ,constantWord Haskell "MIN_PAYLOAD_SIZE" "MIN_PAYLOAD_SIZE" - ,constantInt Haskell "MIN_INTLIKE" "MIN_INTLIKE" ,constantWord Haskell "MAX_INTLIKE" "MAX_INTLIKE" |