summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2014-03-10 21:43:15 +0000
committerJohan Tibell <johan.tibell@gmail.com>2014-03-11 20:01:54 +0100
commitb684f27ec7b3538ffd9401de70ef5685b8b71978 (patch)
tree2c5f095d4bff3b51a328231c7ce3fb367113e7df /compiler
parenta70e7b4762c75812254f7781bcd48139c4ec40dd (diff)
downloadhaskell-b684f27ec7b3538ffd9401de70ef5685b8b71978.tar.gz
Refactor inline array allocation
- Move array representation knowledge into SMRep - Separate out low-level heap-object allocation so that we can reuse it from doNewArrayOp - remove card-table initialisation, we can safely ignore the card table for newly allocated arrays.
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/SMRep.lhs65
-rw-r--r--compiler/codeGen/StgCmmHeap.hs84
-rw-r--r--compiler/codeGen/StgCmmPrim.hs75
-rw-r--r--compiler/codeGen/StgCmmProf.hs2
-rw-r--r--compiler/codeGen/StgCmmTicky.hs2
5 files changed, 120 insertions, 108 deletions
diff --git a/compiler/cmm/SMRep.lhs b/compiler/cmm/SMRep.lhs
index 6c7b70015c..7fb0a2b4f5 100644
--- a/compiler/cmm/SMRep.lhs
+++ b/compiler/cmm/SMRep.lhs
@@ -25,21 +25,24 @@ module SMRep (
ConstrDescription,
-- ** Construction
- mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep,
+ mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, arrPtrsRep,
-- ** Predicates
isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon,
isStackRep,
-- ** Size-related things
- heapClosureSize,
- fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize,
- profHdrSize, thunkHdrSize, nonHdrSize,
+ heapClosureSizeW,
+ fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, arrPtrsHdrSizeW,
+ profHdrSize, thunkHdrSize, nonHdrSizeW,
-- ** RTS closure types
rtsClosureType, rET_SMALL, rET_BIG,
aRG_GEN, aRG_GEN_BIG,
+ -- ** Arrays
+ card, cardRoundUp, cardTableSizeB, cardTableSizeW,
+
-- * Operations over [Word8] strings that don't belong here
pprWord8String, stringToWord8s
) where
@@ -150,6 +153,10 @@ data SMRep
!WordOff -- # non-ptr words INCLUDING SLOP (see mkHeapRep below)
ClosureTypeInfo -- type-specific info
+ | ArrayPtrsRep
+ !WordOff -- # ptr words
+ !WordOff -- # card table words
+
| StackRep -- Stack frame (RET_SMALL or RET_BIG)
Liveness
@@ -231,13 +238,16 @@ blackHoleRep = HeapRep False 0 0 BlackHole
indStaticRep :: SMRep
indStaticRep = HeapRep True 1 0 IndStatic
+arrPtrsRep :: DynFlags -> WordOff -> SMRep
+arrPtrsRep dflags elems = ArrayPtrsRep elems (cardTableSizeW dflags elems)
+
-----------------------------------------------------------------------------
-- Predicates
isStaticRep :: SMRep -> IsStatic
isStaticRep (HeapRep is_static _ _ _) = is_static
-isStaticRep (StackRep {}) = False
isStaticRep (RTSRep _ rep) = isStaticRep rep
+isStaticRep _ = False
isStackRep :: SMRep -> Bool
isStackRep StackRep{} = True
@@ -293,6 +303,11 @@ arrPtrsHdrSize :: DynFlags -> ByteOff
arrPtrsHdrSize dflags
= fixedHdrSize dflags * wORD_SIZE dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags
+arrPtrsHdrSizeW :: DynFlags -> WordOff
+arrPtrsHdrSizeW dflags =
+ fixedHdrSize dflags +
+ (sIZEOF_StgMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags)
+
-- Thunks have an extra header word on SMP, so the update doesn't
-- splat the payload.
thunkHdrSize :: DynFlags -> WordOff
@@ -300,15 +315,18 @@ thunkHdrSize dflags = fixedHdrSize dflags + smp_hdr
where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags
-nonHdrSize :: SMRep -> WordOff
-nonHdrSize (HeapRep _ p np _) = p + np
-nonHdrSize (StackRep bs) = length bs
-nonHdrSize (RTSRep _ rep) = nonHdrSize rep
+nonHdrSizeW :: SMRep -> WordOff
+nonHdrSizeW (HeapRep _ p np _) = p + np
+nonHdrSizeW (ArrayPtrsRep elems ct) = elems + ct
+nonHdrSizeW (StackRep bs) = length bs
+nonHdrSizeW (RTSRep _ rep) = nonHdrSizeW rep
-heapClosureSize :: DynFlags -> SMRep -> WordOff
-heapClosureSize dflags (HeapRep _ p np ty)
+heapClosureSizeW :: DynFlags -> SMRep -> WordOff
+heapClosureSizeW dflags (HeapRep _ p np ty)
= closureTypeHdrSize dflags ty + p + np
-heapClosureSize _ _ = panic "SMRep.heapClosureSize"
+heapClosureSizeW dflags (ArrayPtrsRep elems ct)
+ = arrPtrsHdrSizeW dflags + elems + ct
+heapClosureSizeW _ _ = panic "SMRep.heapClosureSize"
closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff
closureTypeHdrSize dflags ty = case ty of
@@ -323,6 +341,27 @@ closureTypeHdrSize dflags ty = case ty of
-- difference. If we ever have significant numbers of non-
-- updatable thunks, it might be worth fixing this.
+-- ---------------------------------------------------------------------------
+-- Arrays
+
+-- | The byte offset into the card table of the card for a given element
+card :: DynFlags -> Int -> Int
+card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
+
+-- | Convert a number of elements to a number of cards, rounding up
+cardRoundUp :: DynFlags -> Int -> Int
+cardRoundUp dflags i =
+ card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
+
+-- | The size of a card table, in bytes
+cardTableSizeB :: DynFlags -> Int -> ByteOff
+cardTableSizeB dflags elems = cardRoundUp dflags elems
+
+-- | The size of a card table, in words
+cardTableSizeW :: DynFlags -> Int -> WordOff
+cardTableSizeW dflags elems =
+ bytesToWordsRoundUp dflags (cardTableSizeB dflags elems)
+
-----------------------------------------------------------------------------
-- deriving the RTS closure type from an SMRep
@@ -413,6 +452,8 @@ instance Outputable SMRep where
pp_n _ 0 = empty
pp_n s n = int n <+> text s
+ ppr (ArrayPtrsRep size _) = ptext (sLit "ArrayPtrsRep") <+> ppr size
+
ppr (StackRep bs) = ptext (sLit "StackRep") <+> ppr bs
ppr (RTSRep ty rep) = ptext (sLit "tag:") <> ppr ty <+> ppr rep
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 75ad8b40f4..2a0eaf9da7 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -16,7 +16,7 @@ module StgCmmHeap (
mkStaticClosureFields, mkStaticClosure,
- allocDynClosure, allocDynClosureCmm,
+ allocDynClosure, allocDynClosureCmm, allocHeapClosure,
emitSetDynHdr
) where
@@ -88,61 +88,69 @@ allocDynClosureCmm
-- significant - see test T4801.
-allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets
- = do { let (args, offsets) = unzip args_w_offsets
- ; cmm_args <- mapM getArgAmode args -- No void args
- ; allocDynClosureCmm mb_id info_tbl lf_info
- use_cc _blame_cc (zip cmm_args offsets)
- }
+allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
+ let (args, offsets) = unzip args_w_offsets
+ cmm_args <- mapM getArgAmode args -- No void args
+ allocDynClosureCmm mb_id info_tbl lf_info
+ use_cc _blame_cc (zip cmm_args offsets)
-allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets
- = do { virt_hp <- getVirtHp
- -- SAY WHAT WE ARE ABOUT TO DO
- ; let rep = cit_rep info_tbl
- ; tickyDynAlloc mb_id rep lf_info
- ; profDynAlloc rep use_cc
+allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
+ -- SAY WHAT WE ARE ABOUT TO DO
+ let rep = cit_rep info_tbl
+ tickyDynAlloc mb_id rep lf_info
+ profDynAlloc rep use_cc
+ let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
+ allocHeapClosure rep info_ptr use_cc amodes_w_offsets
- -- FIND THE OFFSET OF THE INFO-PTR WORD
- ; let info_offset = virt_hp + 1
- -- info_offset is the VirtualHpOffset of the first
- -- word of the new object
- -- Remember, virtHp points to last allocated word,
- -- ie 1 *before* the info-ptr word of new object.
- info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
+-- | Low-level heap object allocation.
+allocHeapClosure
+ :: SMRep -- ^ representation of the object
+ -> CmmExpr -- ^ info pointer
+ -> CmmExpr -- ^ cost centre
+ -> [(CmmExpr,ByteOff)] -- ^ payload
+ -> FCode CmmExpr -- ^ returns the address of the object
+allocHeapClosure rep info_ptr use_cc payload = do
+ virt_hp <- getVirtHp
- -- ALLOCATE THE OBJECT
- ; base <- getHpRelOffset info_offset
- ; emitComment $ mkFastString "allocDynClosure"
- ; emitSetDynHdr base info_ptr use_cc
- ; let (cmm_args, offsets) = unzip amodes_w_offsets
- ; hpStore base cmm_args offsets
+ -- Find the offset of the info-ptr word
+ let info_offset = virt_hp + 1
+ -- info_offset is the VirtualHpOffset of the first
+ -- word of the new object
+ -- Remember, virtHp points to last allocated word,
+ -- ie 1 *before* the info-ptr word of new object.
- -- BUMP THE VIRTUAL HEAP POINTER
- ; dflags <- getDynFlags
- ; setVirtHp (virt_hp + heapClosureSize dflags rep)
+ base <- getHpRelOffset info_offset
+ emitComment $ mkFastString "allocDynClosure"
+ emitSetDynHdr base info_ptr use_cc
+
+ -- Fill in the fields
+ hpStore base payload
+
+ -- Bump the virtual heap pointer
+ dflags <- getDynFlags
+ setVirtHp (virt_hp + heapClosureSizeW dflags rep)
+
+ return base
- ; getHpRelOffset info_offset
- }
emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
emitSetDynHdr base info_ptr ccs
= do dflags <- getDynFlags
- hpStore base (header dflags) [0, wORD_SIZE dflags ..]
+ hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
where
header :: DynFlags -> [CmmExpr]
header dflags = [info_ptr] ++ dynProfHdr dflags ccs
-- ToDof: Parallel stuff
-- No ticky header
-hpStore :: CmmExpr -> [CmmExpr] -> [ByteOff] -> FCode ()
-- Store the item (expr,off) in base[off]
-hpStore base vals offs
- = do dflags <- getDynFlags
- let mk_store val off = mkStore (cmmOffsetB dflags base off) val
- emit (catAGraphs (zipWith mk_store vals offs))
-
+hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
+hpStore base vals = do
+ dflags <- getDynFlags
+ sequence_ $
+ [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
-----------------------------------------------------------
-- Layout of static closures
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 504510c359..a4327c4064 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -90,10 +90,11 @@ cgOpApp (StgPrimOp primop) args res_ty = do
dflags <- getDynFlags
cmm_args <- getNonVoidArgAmodes args
case shouldInlinePrimOp dflags primop cmm_args of
- Nothing -> do let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
- emitCall (NativeNodeCall, NativeReturn) fun cmm_args
+ Nothing -> do -- out-of-line
+ let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
+ emitCall (NativeNodeCall, NativeReturn) fun cmm_args
- Just f
+ Just f -- inline
| ReturnsPrim VoidRep <- result_info
-> do f []
emitReturn []
@@ -1533,36 +1534,24 @@ doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode ()
doNewArrayOp res_r n init = do
dflags <- getDynFlags
- let card_bytes = cardRoundUp dflags (fromInteger n)
- size = fromInteger n + bytesToWordsRoundUp dflags card_bytes
- words = arrPtrsHdrSizeWords dflags + size
-
- -- If the allocation is of small, statically-known size, we reuse
- -- the existing heap check to allocate inline.
- virt_hp <- getVirtHp
-
- -- FIND THE OFFSET OF THE INFO-PTR WORD
- let info_offset = virt_hp + 1
- -- info_offset is the VirtualHpOffset of the first
- -- word of the new object
- -- Remember, virtHp points to last allocated word,
- -- ie 1 *before* the info-ptr word of new object.
- base <- getHpRelOffset info_offset
- setVirtHp (virt_hp + fromIntegral words) -- check n < big
- arr <- CmmLocal `fmap` newTemp (bWord dflags)
- emit $ mkAssign arr base
+ let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel
+
+ -- ToDo: this probably isn't right (card size?)
tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
- (cmmMulWord dflags (mkIntExpr dflags (fromInteger n)) (wordSize dflags))
+ (mkIntExpr dflags (fromInteger n * wORD_SIZE dflags))
(zeroExpr dflags)
- emitSetDynHdr base (mkLblExpr mkMAP_DIRTY_infoLabel) curCCS
- emit $ mkStore (cmmOffsetB dflags base
- (fixedHdrSize dflags * wORD_SIZE dflags +
- oFFSET_StgMutArrPtrs_ptrs dflags))
- (mkIntExpr dflags (fromInteger n))
- emit $ mkStore (cmmOffsetB dflags base
- (fixedHdrSize dflags * wORD_SIZE dflags +
- oFFSET_StgMutArrPtrs_size dflags)) (mkIntExpr dflags size)
+ let rep = arrPtrsRep dflags (fromIntegral n)
+ hdr_size = fixedHdrSize dflags * wORD_SIZE dflags
+ base <- allocHeapClosure rep info_ptr curCCS
+ [ (mkIntExpr dflags (fromInteger n),
+ hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
+ , (mkIntExpr dflags (nonHdrSizeW rep),
+ hdr_size + oFFSET_StgMutArrPtrs_size dflags)
+ ]
+
+ arr <- CmmLocal `fmap` newTemp (bWord dflags)
+ emit $ mkAssign arr base
-- Initialise all elements of the the array
p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags)
@@ -1577,26 +1566,12 @@ doNewArrayOp res_r n init = do
(cmmOffsetW dflags (CmmReg arr) (fromInteger n)))
(catAGraphs loopBody)
- -- Initialise the mark bits with 0. This will be unrolled in the
- -- backend to e.g. a single assignment since the arguments are
- -- statically known.
- emitMemsetCall
- (cmmOffsetExprW dflags (CmmReg (CmmLocal p))
- (mkIntExpr dflags (fromInteger n)))
- (mkIntExpr dflags 0)
- (mkIntExpr dflags card_bytes)
- (mkIntExpr dflags (wORD_SIZE dflags))
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-- | The inline allocation limit is 128 bytes, expressed in words.
maxInlineAllocThreshold :: DynFlags -> Integer
maxInlineAllocThreshold dflags = toInteger (128 `quot` wORD_SIZE dflags)
-arrPtrsHdrSizeWords :: DynFlags -> WordOff
-arrPtrsHdrSizeWords dflags =
- fixedHdrSize dflags +
- (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags)
-
-- ----------------------------------------------------------------------------
-- Copying pointer arrays
@@ -1724,18 +1699,6 @@ emitCloneArray info_p res_r src0 src_off0 n0 = do
(mkIntExpr dflags (wORD_SIZE dflags))
emit $ mkAssign (CmmLocal res_r) arr
-card :: DynFlags -> Int -> Int
-card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
-
--- Convert a number of elements to a number of cards, rounding up
-cardRoundUp :: DynFlags -> Int -> Int
-cardRoundUp dflags i =
- card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
-
-bytesToWordsRoundUp :: DynFlags -> Int -> Int
-bytesToWordsRoundUp dflags e =
- (e + wORD_SIZE dflags - 1) `quot` (wORD_SIZE dflags)
-
-- | Takes and offset in the destination array, the base address of
-- the card table, and the number of elements affected (*not* the
-- number of cards). The number of elements may not be zero.
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index e8a2a10fdd..f858c5a0b6 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -149,7 +149,7 @@ profDynAlloc :: SMRep -> CmmExpr -> FCode ()
profDynAlloc rep ccs
= ifProfiling $
do dflags <- getDynFlags
- profAlloc (mkIntExpr dflags (heapClosureSize dflags rep)) ccs
+ profAlloc (mkIntExpr dflags (heapClosureSizeW dflags rep)) ccs
-- | Record the allocation of a closure (size is given by a CmmExpr)
-- The size must be in words, because the allocation counter in a CCS counts
diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs
index 3f3c3c5a19..50112f1ef8 100644
--- a/compiler/codeGen/StgCmmTicky.hs
+++ b/compiler/codeGen/StgCmmTicky.hs
@@ -415,7 +415,7 @@ tickyDynAlloc :: Maybe Id -> SMRep -> LambdaFormInfo -> FCode ()
--
-- TODO what else to count while we're here?
tickyDynAlloc mb_id rep lf = ifTicky $ getDynFlags >>= \dflags ->
- let bytes = wORD_SIZE dflags * heapClosureSize dflags rep
+ let bytes = wORD_SIZE dflags * heapClosureSizeW dflags rep
countGlobal tot ctr = do
bumpTickyCounterBy tot bytes