summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÖmer Sinan Ağacan <omeragacan@gmail.com>2019-09-09 14:54:21 +0300
committerÖmer Sinan Ağacan <omeragacan@gmail.com>2019-09-11 11:14:58 +0300
commit7a5e3725a2d1da706c34582edac7bb147c7a5247 (patch)
tree29fb3bb4429700fc884ed42037a3fdd5513f73fd
parent270fbe8512f04b6107755fa22bdec62205c0a567 (diff)
downloadhaskell-wip/osa1/min_payload_size.tar.gz
Remove min payload size restriction from the compiler and RTSwip/osa1/min_payload_size
Previously we'd require every heap object to have at least one word space in the payload. This was *incorrectly* documented as follows in the source code: (now deleted) Minimum closure sizes This is the minimum number of words in the payload of a heap-allocated closure, so that the closure has enough room to be overwritten with a forwarding pointer during garbage collection. This comment was wrong: the GC doesn't require a location in the payload, forwarding pointers are made using the info table pointer location. The real reason why we needed a word in the payload is because mark-compact collector allocates one bit per word in the heap for bitmap, but needs two bits per object to work efficiently. Details are explained in the new note Note [Mark bits in mark-compact collector] in Compact.h. We now allocate two bits in the bitmap per word and remove the restriction. Heap objects can now have size 1, as demonstrated in the updated tests. Nofib results: - 0.0% change in binary sizes and TotalMem - "contaraints" now allocates -0.1% less. No other changes in allocations. - Runtime/Elapsed results are noisy and inconclusive
-rw-r--r--compiler/cmm/CmmParse.y11
-rw-r--r--compiler/cmm/SMRep.hs22
-rw-r--r--compiler/codeGen/StgCmmClosure.hs4
-rw-r--r--compiler/ghci/ByteCodeItbls.hs6
-rw-r--r--includes/rts/Constants.h10
-rw-r--r--libraries/ghc-heap/tests/closure_size.hs3
-rw-r--r--libraries/ghc-heap/tests/heap_all.hs4
-rw-r--r--rts/Interpreter.c2
-rw-r--r--rts/sm/Compact.c19
-rw-r--r--rts/sm/Compact.h75
-rw-r--r--rts/sm/Evac.c8
-rw-r--r--rts/sm/GC.c15
-rw-r--r--rts/sm/GCAux.c2
-rw-r--r--rts/sm/Sanity.c4
-rw-r--r--utils/deriveConstants/Main.hs4
15 files changed, 105 insertions, 84 deletions
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index f563145250..50bb15930b 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -466,8 +466,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
do dflags <- getDynFlags
let prof = profilingInfo dflags $11 $13
rep = mkRTSRep (fromIntegral $9) $
- mkHeapRep dflags False (fromIntegral $5)
- (fromIntegral $7) Thunk
+ mkHeapRep False (fromIntegral $5) (fromIntegral $7) Thunk
-- not really Thunk, but that makes the info table
-- we want.
return (mkCmmEntryLabel pkg $3,
@@ -484,8 +483,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
ty = Fun 0 (ArgSpec (fromIntegral $15))
-- Arity zero, arg_type $15
rep = mkRTSRep (fromIntegral $9) $
- mkHeapRep dflags False (fromIntegral $5)
- (fromIntegral $7) ty
+ mkHeapRep False (fromIntegral $5) (fromIntegral $7) ty
return (mkCmmEntryLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
, cit_rep = rep
@@ -502,8 +500,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
ty = Constr (fromIntegral $9) -- Tag
(BS8.pack $13)
rep = mkRTSRep (fromIntegral $11) $
- mkHeapRep dflags False (fromIntegral $5)
- (fromIntegral $7) ty
+ mkHeapRep False (fromIntegral $5) (fromIntegral $7) ty
return (mkCmmEntryLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
, cit_rep = rep
@@ -520,7 +517,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
let prof = profilingInfo dflags $9 $11
ty = ThunkSelector (fromIntegral $5)
rep = mkRTSRep (fromIntegral $7) $
- mkHeapRep dflags False 0 0 ty
+ mkHeapRep False 0 0 ty
return (mkCmmEntryLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
, cit_rep = rep
diff --git a/compiler/cmm/SMRep.hs b/compiler/cmm/SMRep.hs
index 49137eff25..f2db1b136e 100644
--- a/compiler/cmm/SMRep.hs
+++ b/compiler/cmm/SMRep.hs
@@ -222,20 +222,9 @@ data ArgDescr
-----------------------------------------------------------------------------
-- Construction
-mkHeapRep :: DynFlags -> IsStatic -> WordOff -> WordOff -> ClosureTypeInfo
- -> SMRep
-mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type_info
- = HeapRep is_static
- ptr_wds
- (nonptr_wds + slop_wds)
- cl_type_info
- where
- slop_wds
- | is_static = 0
- | otherwise = max 0 (minClosureSize dflags - (hdr_size + payload_size))
-
- hdr_size = closureTypeHdrSize dflags cl_type_info
- payload_size = ptr_wds + nonptr_wds
+mkHeapRep :: IsStatic -> WordOff -> WordOff -> ClosureTypeInfo -> SMRep
+mkHeapRep is_static ptr_wds nonptr_wds cl_type_info
+ = HeapRep is_static ptr_wds nonptr_wds cl_type_info
mkRTSRep :: Int -> SMRep -> SMRep
mkRTSRep = RTSRep
@@ -310,11 +299,6 @@ profHdrSize dflags
| gopt Opt_SccProfilingOn dflags = pROF_HDR_SIZE dflags
| otherwise = 0
--- | The garbage collector requires that every closure is at least as
--- big as this.
-minClosureSize :: DynFlags -> WordOff
-minClosureSize dflags = fixedHdrSizeW dflags + mIN_PAYLOAD_SIZE dflags
-
arrWordsHdrSize :: DynFlags -> ByteOff
arrWordsHdrSize dflags
= fixedHdrSize dflags + sIZEOF_StgArrBytes_NoHdr dflags
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
index ac8db1268f..d47ea3a760 100644
--- a/compiler/codeGen/StgCmmClosure.hs
+++ b/compiler/codeGen/StgCmmClosure.hs
@@ -698,7 +698,7 @@ mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr
, closureProf = prof } -- (we don't have an SRT yet)
where
name = idName id
- sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info)
+ sm_rep = mkHeapRep is_static ptr_wds nonptr_wds (lfClosureType lf_info)
prof = mkProfilingInfo dflags id val_descr
nonptr_wds = tot_wds - ptr_wds
@@ -966,7 +966,7 @@ mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds
where
name = dataConName data_con
info_lbl = mkConInfoTableLabel name NoCafRefs
- sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type
+ sm_rep = mkHeapRep is_static ptr_wds nonptr_wds cl_type
cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con)
-- We keep the *zero-indexed* tag in the srt_len field
-- of the info table of a data constructor.
diff --git a/compiler/ghci/ByteCodeItbls.hs b/compiler/ghci/ByteCodeItbls.hs
index 7381c8f926..84041437be 100644
--- a/compiler/ghci/ByteCodeItbls.hs
+++ b/compiler/ghci/ByteCodeItbls.hs
@@ -13,7 +13,6 @@ import GhcPrelude
import ByteCodeTypes
import GHCi
-import DynFlags
import HscTypes
import Name ( Name, getName )
import NameEnv
@@ -65,12 +64,9 @@ make_constr_itbls hsc_env cons =
ptrs' = ptr_wds
nptrs' = tot_wds - ptr_wds
- nptrs_really
- | ptrs' + nptrs' >= mIN_PAYLOAD_SIZE dflags = nptrs'
- | otherwise = mIN_PAYLOAD_SIZE dflags - ptrs'
descr = dataConIdentity dcon
- r <- iservCmd hsc_env (MkConInfoTable ptrs' nptrs_really
+ r <- iservCmd hsc_env (MkConInfoTable ptrs' nptrs'
conNo (tagForCon dflags dcon) descr)
return (getName dcon, ItblPtr r)
diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h
index 15ff2a43d9..c3f777f1c4 100644
--- a/includes/rts/Constants.h
+++ b/includes/rts/Constants.h
@@ -20,16 +20,6 @@
#pragma once
/* -----------------------------------------------------------------------------
- Minimum closure sizes
-
- This is the minimum number of words in the payload of a
- heap-allocated closure, so that the closure has enough room to be
- overwritten with a forwarding pointer during garbage collection.
- -------------------------------------------------------------------------- */
-
-#define MIN_PAYLOAD_SIZE 1
-
-/* -----------------------------------------------------------------------------
Constants to do with specialised closure types.
-------------------------------------------------------------------------- */
diff --git a/libraries/ghc-heap/tests/closure_size.hs b/libraries/ghc-heap/tests/closure_size.hs
index d760f22efa..d5bace42e2 100644
--- a/libraries/ghc-heap/tests/closure_size.hs
+++ b/libraries/ghc-heap/tests/closure_size.hs
@@ -18,9 +18,10 @@ data APC a = APC a
main :: IO ()
main = do
+ assertSize False 1
+ assertSize (Nothing :: Maybe ()) 1
assertSize 'a' 2
assertSize (Just ()) 2
- assertSize (Nothing :: Maybe ()) 2
assertSize ((1,2) :: (Int,Int)) 3
assertSize ((1,2,3) :: (Int,Int,Int)) 4
diff --git a/libraries/ghc-heap/tests/heap_all.hs b/libraries/ghc-heap/tests/heap_all.hs
index 1560d4d9e8..a234b34db1 100644
--- a/libraries/ghc-heap/tests/heap_all.hs
+++ b/libraries/ghc-heap/tests/heap_all.hs
@@ -44,9 +44,9 @@ exConstrClosure = ConstrClosure
exFunClosure :: Closure
exFunClosure = FunClosure
- { info = exItbl{tipe=FUN_0_1, ptrs=0, nptrs=1}
+ { info = exItbl{tipe=FUN, ptrs=0, nptrs=0}
, ptrArgs = []
- , dataArgs = [0]
+ , dataArgs = []
}
exThunkClosure :: Closure
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 2a886ff8a4..53d6a92cec 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -148,7 +148,7 @@
STATIC_INLINE StgPtr
allocate_NONUPD (Capability *cap, int n_words)
{
- return allocate(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
+ return allocate(cap, stg_max(sizeofW(StgHeader), n_words));
}
int rts_stop_next_breakpoint = 0;
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index 3bfefa7ceb..6ccb5ad444 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -787,7 +787,7 @@ update_fwd_compact( bdescr *blocks )
while (p < bd->free ) {
- while ( p < bd->free && !is_marked(p,bd) ) {
+ while ( p < bd->free && !is_marked_live(p,bd) ) {
p++;
}
if (p >= bd->free) {
@@ -830,15 +830,16 @@ update_fwd_compact( bdescr *blocks )
size = p - q;
if (free + size > free_bd->start + BLOCK_SIZE_W) {
- // set the next bit in the bitmap to indicate that
- // this object needs to be pushed into the next
- // block. This saves us having to run down the
- // threaded info pointer list twice during the next pass.
- mark(q+1,bd);
+ // set the next bit in the bitmap to indicate that this object
+ // needs to be pushed into the next block. This saves us having
+ // to run down the threaded info pointer list twice during the
+ // next pass. See Note [Mark bits in mark-compact collector] in
+ // Compact.h.
+ mark_too_large(q,bd);
free_bd = free_bd->link;
free = free_bd->start;
} else {
- ASSERT(!is_marked(q+1,bd));
+ ASSERT(!is_marked_too_large(q,bd));
}
unthread(q,(StgWord)free + GET_CLOSURE_TAG((StgClosure *)iptr));
@@ -873,7 +874,7 @@ update_bkwd_compact( generation *gen )
while (p < bd->free ) {
- while ( p < bd->free && !is_marked(p,bd) ) {
+ while ( p < bd->free && !is_marked_live(p,bd) ) {
p++;
}
if (p >= bd->free) {
@@ -898,7 +899,7 @@ update_bkwd_compact( generation *gen )
}
#endif
- if (is_marked(p+1,bd)) {
+ if (is_marked_too_large(p,bd)) {
// don't forget to update the free ptr in the block desc.
free_bd->free = free;
free_bd = free_bd->link;
diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h
index b0521122df..cfff313670 100644
--- a/rts/sm/Compact.h
+++ b/rts/sm/Compact.h
@@ -15,26 +15,85 @@
#include "BeginPrivate.h"
+/* -----------------------------------------------------------------------------
+ Note [Mark bits in mark-compact collector]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ In mark-compact collector each closure has two mark bits:
+
+ - Liveness bit: not marked == unreachable (dead)
+
+ - "Too large" bit: when this is set it means that the closure won't fit in
+ the current heap block and we need to move it to the next chain in the
+ heap.
+
+ Why do we need the second bit? We only know a closure's size *before*
+ threading it, because after threading the info table pointer will be end of
+ the chain. So by the time we do the second pass to move the closures and
+ unthread chains we'd have to do two passes, one for to get the info table
+ pointer at the end of the chain to compute the closure size and update the
+ free pointer if it's too large to fit in the current block, and then another
+ pass to actually unthread.
+
+ To avoid this we update the second bit when we first visit an object (in the
+ "forward" pass) and realize that it won't fit in the current block, and check
+ that bit in the second pass (where we actually move the object and update all
+ references). If the bit is set we move the object to the free location in the
+ next block in heap chain, otherwise we use the free pointer in the current
+ block.
+ -------------------------------------------------------------------------- */
+
+#define MARK_COMPACT_LIVE 0
+#define MARK_COMPACT_TOO_LARGE 1
+
INLINE_HEADER void
-mark(StgPtr p, bdescr *bd)
+mark(StgPtr p, bdescr *bd, int offset)
{
+ ASSERT(offset == MARK_COMPACT_LIVE || offset == MARK_COMPACT_TOO_LARGE);
uint32_t offset_within_block = p - bd->start; // in words
- StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
- (offset_within_block / BITS_IN(W_));
- StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_) - 1));
+ uint32_t offset_in_bitmap = offset_within_block * 2 + offset; // 2 per object
+ StgPtr bitmap_word = (StgPtr)bd->u.bitmap + (offset_in_bitmap / BITS_IN(W_));
+ StgWord bit_mask = (StgWord)1 << (offset_in_bitmap & (BITS_IN(W_) - 1));
*bitmap_word |= bit_mask;
}
INLINE_HEADER StgWord
-is_marked(StgPtr p, bdescr *bd)
+is_marked(StgPtr p, bdescr *bd, int offset)
{
+ // offset 0: liveness bit
+ // offset 1: "too large" bit
+ ASSERT(offset == MARK_COMPACT_LIVE || offset == MARK_COMPACT_TOO_LARGE);
uint32_t offset_within_block = p - bd->start; // in words
- StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
- (offset_within_block / BITS_IN(W_));
- StgWord bit_mask = (StgWord)1 << (offset_within_block & (BITS_IN(W_)- 1));
+ uint32_t offset_in_bitmap = offset_within_block * 2 + offset; // 2 per object
+ StgPtr bitmap_word = (StgPtr)bd->u.bitmap + (offset_in_bitmap / BITS_IN(W_));
+ StgWord bit_mask = (StgWord)1 << (offset_in_bitmap & (BITS_IN(W_)- 1));
return (*bitmap_word & bit_mask);
}
+INLINE_HEADER void
+mark_live(StgPtr p, bdescr *bd)
+{
+ mark(p, bd, MARK_COMPACT_LIVE);
+}
+
+INLINE_HEADER StgWord
+is_marked_live(StgPtr p, bdescr *bd)
+{
+ return is_marked(p, bd, MARK_COMPACT_LIVE);
+}
+
+INLINE_HEADER void
+mark_too_large(StgPtr p, bdescr *bd)
+{
+ mark(p, bd, MARK_COMPACT_TOO_LARGE);
+}
+
+INLINE_HEADER StgWord
+is_marked_too_large(StgPtr p, bdescr *bd)
+{
+ return is_marked(p, bd, MARK_COMPACT_TOO_LARGE);
+}
+
void compact (StgClosure *static_objects,
StgWeak **dead_weak_ptr_list,
StgTSO **resurrected_threads);
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 53a473d26c..23e50c28bc 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -600,8 +600,8 @@ loop:
/* If the object is in a gen that we're compacting, then we
* need to use an alternative evacuate procedure.
*/
- if (!is_marked((P_)q,bd)) {
- mark((P_)q,bd);
+ if (!is_marked_live((P_)q,bd)) {
+ mark_live((P_)q,bd);
push_mark_stack((P_)q);
}
return;
@@ -909,8 +909,8 @@ evacuate_BLACKHOLE(StgClosure **p)
return;
}
if (bd->flags & BF_MARKED) {
- if (!is_marked((P_)q,bd)) {
- mark((P_)q,bd);
+ if (!is_marked_live((P_)q,bd)) {
+ mark_live((P_)q,bd);
push_mark_stack((P_)q);
}
return;
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 92a5e229a1..53c0d53e92 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -1414,17 +1414,14 @@ prepare_collected_gen (generation *gen)
// for a compacted generation, we need to allocate the bitmap
if (gen->mark) {
- StgWord bitmap_size; // in bytes
- bdescr *bitmap_bdescr;
- StgWord *bitmap;
-
- bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
+ // in bytes
+ StgWord bitmap_size = (gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_)) * 2;
if (bitmap_size > 0) {
- bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
- / BLOCK_SIZE);
+ bdescr *bitmap_bdescr =
+ allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size) / BLOCK_SIZE);
gen->bitmap = bitmap_bdescr;
- bitmap = bitmap_bdescr->start;
+ StgWord *bitmap = bitmap_bdescr->start;
debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
bitmap_size, bitmap);
@@ -1436,7 +1433,7 @@ prepare_collected_gen (generation *gen)
// block descriptor.
for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
bd->u.bitmap = bitmap;
- bitmap += BLOCK_SIZE_W / BITS_IN(W_);
+ bitmap += (BLOCK_SIZE_W / BITS_IN(W_)) * 2;
// Also at this point we set the BF_MARKED flag
// for this block. The invariant is that
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index 650dc2c1df..9292df0a21 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -71,7 +71,7 @@ isAlive(StgClosure *p)
}
// check the mark bit for compacted generations
- if ((bd->flags & BF_MARKED) && is_marked((P_)q,bd)) {
+ if ((bd->flags & BF_MARKED) && is_marked_live((P_)q,bd)) {
return p;
}
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 3585bd93b4..99e580e978 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -467,7 +467,7 @@ void checkHeapChain (bdescr *bd)
while (p < bd->free) {
uint32_t size = checkClosure((StgClosure *)p);
/* This is the smallest size of closure that can live in the heap */
- ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) );
+ ASSERT( size >= sizeofW(StgHeader) );
p += size;
/* skip over slop */
@@ -488,7 +488,7 @@ checkHeapChunk(StgPtr start, StgPtr end)
ASSERT(LOOKS_LIKE_INFO_PTR(*p));
size = checkClosure((StgClosure *)p);
/* This is the smallest size of closure that can live in the heap. */
- ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) );
+ ASSERT( size >= sizeofW(StgHeader) );
}
}
diff --git a/utils/deriveConstants/Main.hs b/utils/deriveConstants/Main.hs
index 54533254dd..2b30ca8e48 100644
--- a/utils/deriveConstants/Main.hs
+++ b/utils/deriveConstants/Main.hs
@@ -621,10 +621,6 @@ wanteds os = concat
,constantWord Haskell "MAX_SPEC_SELECTEE_SIZE" "MAX_SPEC_SELECTEE_SIZE"
,constantWord Haskell "MAX_SPEC_AP_SIZE" "MAX_SPEC_AP_SIZE"
- -- closure sizes: these do NOT include the header (see below for
- -- header sizes)
- ,constantWord Haskell "MIN_PAYLOAD_SIZE" "MIN_PAYLOAD_SIZE"
-
,constantInt Haskell "MIN_INTLIKE" "MIN_INTLIKE"
,constantWord Haskell "MAX_INTLIKE" "MAX_INTLIKE"