diff options
-rw-r--r-- | compiler/cmm/CLabel.hs | 5 | ||||
-rw-r--r-- | compiler/cmm/SMRep.lhs | 24 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 49 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/StaticByteArraySize.hs | 52 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/StaticByteArraySize.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/all.T | 1 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/InlineByteArrayAlloc.hs | 16 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/all.T | 7 | ||||
-rw-r--r-- | utils/deriveConstants/DeriveConstants.hs | 2 |
9 files changed, 143 insertions, 14 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 1b86f3d6b4..022792f1f4 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -56,6 +56,7 @@ module CLabel ( mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel, mkEMPTY_MVAR_infoLabel, + mkArrWords_infoLabel, mkTopTickyCtrLabel, mkCAFBlackHoleInfoTableLabel, @@ -402,7 +403,8 @@ mkDirty_MUT_VAR_Label, mkSplitMarkerLabel, mkUpdInfoLabel, mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel, mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel, mkEMPTY_MVAR_infoLabel, mkTopTickyCtrLabel, - mkCAFBlackHoleInfoTableLabel, mkCAFBlackHoleEntryLabel :: CLabel + mkCAFBlackHoleInfoTableLabel, mkCAFBlackHoleEntryLabel, + mkArrWords_infoLabel :: CLabel mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction mkSplitMarkerLabel = CmmLabel rtsPackageId (fsLit "__stg_split_marker") CmmCode mkUpdInfoLabel = CmmLabel rtsPackageId (fsLit "stg_upd_frame") CmmInfo @@ -415,6 +417,7 @@ mkEMPTY_MVAR_infoLabel = CmmLabel rtsPackageId (fsLit "stg_EMPTY_MVAR") mkTopTickyCtrLabel = CmmLabel rtsPackageId (fsLit "top_ct") CmmData mkCAFBlackHoleInfoTableLabel = CmmLabel rtsPackageId (fsLit "stg_CAF_BLACKHOLE") CmmInfo mkCAFBlackHoleEntryLabel = CmmLabel rtsPackageId (fsLit "stg_CAF_BLACKHOLE") CmmEntry +mkArrWords_infoLabel = CmmLabel rtsPackageId (fsLit "stg_ARR_WORDS") CmmInfo ----- mkCmmInfoLabel, mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel, diff --git a/compiler/cmm/SMRep.lhs b/compiler/cmm/SMRep.lhs index 7fb0a2b4f5..a5a8c903c6 100644 --- a/compiler/cmm/SMRep.lhs +++ b/compiler/cmm/SMRep.lhs @@ -26,6 +26,7 @@ module SMRep ( -- ** Construction mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, arrPtrsRep, + arrWordsRep, -- ** Predicates isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon, @@ -33,8 +34,8 @@ module SMRep ( -- ** Size-related things heapClosureSizeW, - fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, arrPtrsHdrSizeW, - profHdrSize, thunkHdrSize, nonHdrSizeW, + fixedHdrSize, arrWordsHdrSize, arrWordsHdrSizeW, arrPtrsHdrSize, + arrPtrsHdrSizeW, profHdrSize, thunkHdrSize, nonHdrSize, nonHdrSizeW, -- ** RTS closure types rtsClosureType, rET_SMALL, rET_BIG, @@ -157,6 +158,9 @@ data SMRep !WordOff -- # ptr words !WordOff -- # card table words + | ArrayWordsRep + !WordOff -- # bytes expressed in words, rounded up + | StackRep -- Stack frame (RET_SMALL or RET_BIG) Liveness @@ -241,6 +245,9 @@ indStaticRep = HeapRep True 1 0 IndStatic arrPtrsRep :: DynFlags -> WordOff -> SMRep arrPtrsRep dflags elems = ArrayPtrsRep elems (cardTableSizeW dflags elems) +arrWordsRep :: DynFlags -> ByteOff -> SMRep +arrWordsRep dflags bytes = ArrayWordsRep (bytesToWordsRoundUp dflags bytes) + ----------------------------------------------------------------------------- -- Predicates @@ -299,6 +306,11 @@ arrWordsHdrSize :: DynFlags -> ByteOff arrWordsHdrSize dflags = fixedHdrSize dflags * wORD_SIZE dflags + sIZEOF_StgArrWords_NoHdr dflags +arrWordsHdrSizeW :: DynFlags -> WordOff +arrWordsHdrSizeW dflags = + fixedHdrSize dflags + + (sIZEOF_StgArrWords_NoHdr dflags `quot` wORD_SIZE dflags) + arrPtrsHdrSize :: DynFlags -> ByteOff arrPtrsHdrSize dflags = fixedHdrSize dflags * wORD_SIZE dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags @@ -314,18 +326,24 @@ thunkHdrSize :: DynFlags -> WordOff thunkHdrSize dflags = fixedHdrSize dflags + smp_hdr where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags +nonHdrSize :: DynFlags -> SMRep -> ByteOff +nonHdrSize dflags rep = wordsToBytes dflags (nonHdrSizeW rep) nonHdrSizeW :: SMRep -> WordOff nonHdrSizeW (HeapRep _ p np _) = p + np nonHdrSizeW (ArrayPtrsRep elems ct) = elems + ct +nonHdrSizeW (ArrayWordsRep words) = words nonHdrSizeW (StackRep bs) = length bs nonHdrSizeW (RTSRep _ rep) = nonHdrSizeW rep +-- | The total size of the closure, in words. heapClosureSizeW :: DynFlags -> SMRep -> WordOff heapClosureSizeW dflags (HeapRep _ p np ty) = closureTypeHdrSize dflags ty + p + np heapClosureSizeW dflags (ArrayPtrsRep elems ct) = arrPtrsHdrSizeW dflags + elems + ct +heapClosureSizeW dflags (ArrayWordsRep words) + = arrWordsHdrSizeW dflags + words heapClosureSizeW _ _ = panic "SMRep.heapClosureSize" closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff @@ -454,6 +472,8 @@ instance Outputable SMRep where ppr (ArrayPtrsRep size _) = ptext (sLit "ArrayPtrsRep") <+> ppr size + ppr (ArrayWordsRep words) = ptext (sLit "ArrayWordsRep") <+> ppr words + ppr (StackRep bs) = ptext (sLit "StackRep") <+> ppr bs ppr (RTSRep ty rep) = ptext (sLit "tag:") <> ppr ty <+> ppr rep diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 22f6ec103d..28d50c1094 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -132,9 +132,12 @@ shouldInlinePrimOp :: DynFlags -> PrimOp -- ^ The primop -> [CmmExpr] -- ^ The primop arguments -> Maybe ([LocalReg] -> FCode ()) +shouldInlinePrimOp _ NewByteArrayOp_Char [(CmmLit (CmmInt n _))] + | fromInteger n <= maxInlineAllocThreshold = + Just $ \ [res] -> doNewByteArrayOp res (fromInteger n) shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init] - | n <= maxInlineAllocThreshold dflags = - Just $ \ [res] -> doNewArrayOp res n init + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocThreshold = + Just $ \ [res] -> doNewArrayOp res (fromInteger n) init shouldInlinePrimOp dflags primop args | primOpOutOfLine primop = Nothing | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args @@ -1437,6 +1440,32 @@ mkBasicPrefetch locality off res base idx _ -> panic "StgCmmPrim: mkBasicPrefetch" -- ---------------------------------------------------------------------------- +-- Allocating byte arrays + +-- | Takes a register to return the newly allocated array in and the +-- size of the new array in bytes. Allocates a new +-- 'MutableByteArray#'. +doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode () +doNewByteArrayOp res_r n = do + dflags <- getDynFlags + + let info_ptr = mkLblExpr mkArrWords_infoLabel + rep = arrWordsRep dflags n + + tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags)) + (mkIntExpr dflags (nonHdrSize dflags rep)) + (zeroExpr dflags) + + let hdr_size = wordsToBytes dflags (fixedHdrSize dflags) + + base <- allocHeapClosure rep info_ptr curCCS + [ (mkIntExpr dflags n, + hdr_size + oFFSET_StgArrWords_bytes dflags) + ] + + emit $ mkAssign (CmmLocal res_r) base + +-- ---------------------------------------------------------------------------- -- Copying byte arrays -- | Takes a source 'ByteArray#', an offset in the source array, a @@ -1530,21 +1559,21 @@ doSetByteArrayOp ba off len c -- | Takes a register to return the newly allocated array in, the size -- of the new array, and an initial value for the elements. Allocates -- a new 'MutableArray#'. -doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode () +doNewArrayOp :: CmmFormal -> WordOff -> CmmExpr -> FCode () doNewArrayOp res_r n init = do dflags <- getDynFlags let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel - rep = arrPtrsRep dflags (fromIntegral n) + rep = arrPtrsRep dflags n tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) - (mkIntExpr dflags (wordsToBytes dflags (heapClosureSizeW dflags rep))) + (mkIntExpr dflags (nonHdrSize dflags rep)) (zeroExpr dflags) let hdr_size = wordsToBytes dflags (fixedHdrSize dflags) base <- allocHeapClosure rep info_ptr curCCS - [ (mkIntExpr dflags (fromInteger n), + [ (mkIntExpr dflags n, hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags) , (mkIntExpr dflags (nonHdrSizeW rep), hdr_size + oFFSET_StgMutArrPtrs_size dflags) @@ -1564,14 +1593,14 @@ doNewArrayOp res_r n init = do emit =<< mkCmmIfThen (cmmULtWord dflags (CmmReg (CmmLocal p)) (cmmOffsetW dflags (CmmReg arr) - (arrPtrsHdrSizeW dflags + fromInteger n))) + (arrPtrsHdrSizeW dflags + n))) (catAGraphs loopBody) emit $ mkAssign (CmmLocal res_r) (CmmReg arr) --- | The inline allocation limit is 128 bytes, expressed in words. -maxInlineAllocThreshold :: DynFlags -> Integer -maxInlineAllocThreshold dflags = toInteger (128 `quot` wORD_SIZE dflags) +-- | The inline allocation limit is 128 bytes. +maxInlineAllocThreshold :: ByteOff +maxInlineAllocThreshold = 128 -- ---------------------------------------------------------------------------- -- Copying pointer arrays diff --git a/testsuite/tests/codeGen/should_run/StaticByteArraySize.hs b/testsuite/tests/codeGen/should_run/StaticByteArraySize.hs new file mode 100644 index 0000000000..c2d666049e --- /dev/null +++ b/testsuite/tests/codeGen/should_run/StaticByteArraySize.hs @@ -0,0 +1,52 @@ +{-# LANGUAGE MagicHash, UnboxedTuples #-} + +-- Test allocation of statically sized byte arrays. There's an +-- optimization that targets these and we want to make sure that the +-- code generated in the optimized case is correct. +-- +-- The tests proceeds by allocating a bunch of byte arrays of +-- different sizes, to try to provoke GC crashes, which would be a +-- symptom of the optimization not generating correct code. +module Main where + +import GHC.Exts +import GHC.IO + +main :: IO () +main = do + loop 1000 + putStrLn "success" + where + loop :: Int -> IO () + loop 0 = return () + loop i = do + -- Sizes have been picked to match the triggering of the + -- optimization and to match boundary conditions. Sizes are + -- given explicitly as to not rely on other optimizations to + -- make the static size known to the compiler. + newByteArray 0 + newByteArray 1 + newByteArray 2 + newByteArray 3 + newByteArray 4 + newByteArray 5 + newByteArray 6 + newByteArray 7 + newByteArray 8 + newByteArray 9 + newByteArray 10 + newByteArray 11 + newByteArray 12 + newByteArray 13 + newByteArray 14 + newByteArray 15 + newByteArray 16 + newByteArray 64 + newByteArray 128 + newByteArray 129 + loop (i-1) + +newByteArray :: Int -> IO () +newByteArray (I# sz#) = IO $ \s -> case newByteArray# sz# s of + (# s', _ #) -> (# s', () #) +{-# INLINE newByteArray #-} -- to make sure optimization triggers diff --git a/testsuite/tests/codeGen/should_run/StaticByteArraySize.stdout b/testsuite/tests/codeGen/should_run/StaticByteArraySize.stdout new file mode 100644 index 0000000000..2e9ba477f8 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/StaticByteArraySize.stdout @@ -0,0 +1 @@ +success diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index a8b013ec99..23393cd7c0 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -117,3 +117,4 @@ test('T7953', reqlib('random'), compile_and_run, ['']) test('T8256', reqlib('vector'), compile_and_run, ['']) test('T6084',normal, compile_and_run, ['-O2']) test('StaticArraySize', normal, compile_and_run, ['-O2']) +test('StaticByteArraySize', normal, compile_and_run, ['-O2']) diff --git a/testsuite/tests/perf/should_run/InlineByteArrayAlloc.hs b/testsuite/tests/perf/should_run/InlineByteArrayAlloc.hs new file mode 100644 index 0000000000..fa4883fa58 --- /dev/null +++ b/testsuite/tests/perf/should_run/InlineByteArrayAlloc.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE MagicHash, UnboxedTuples #-} +module Main where + +import GHC.Exts +import GHC.IO + +main :: IO () +main = loop 10000000 + where + loop :: Int -> IO () + loop 0 = return () + loop i = newByteArray >> loop (i-1) + +newByteArray :: IO () +newByteArray = IO $ \s -> case newByteArray# 128# s of + (# s', _ #) -> (# s', () #) diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T index ea1ba8f59e..14be74ed9d 100644 --- a/testsuite/tests/perf/should_run/all.T +++ b/testsuite/tests/perf/should_run/all.T @@ -337,3 +337,10 @@ test('InlineArrayAlloc', only_ways(['normal'])], compile_and_run, ['-O2']) + +test('InlineByteArrayAlloc', + [stats_num_field('bytes allocated', + [ (wordsize(64), 1440040960, 5)]), + only_ways(['normal'])], + compile_and_run, + ['-O2']) diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs index 10df61ca7d..293fe65492 100644 --- a/utils/deriveConstants/DeriveConstants.hs +++ b/utils/deriveConstants/DeriveConstants.hs @@ -392,7 +392,7 @@ wanteds = concat ,closureField Both "StgMutArrPtrs" "size" ,closureSize Both "StgArrWords" - ,closureField C "StgArrWords" "bytes" + ,closureField Both "StgArrWords" "bytes" ,closurePayload C "StgArrWords" "payload" ,closureField C "StgTSO" "_link" |