diff options
Diffstat (limited to 'compiler/codeGen')
| -rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 33 | ||||
| -rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 132 |
2 files changed, 74 insertions, 91 deletions
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index d94eca493e..9b9d6397c4 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -422,8 +422,8 @@ cgCase scrut bndr alt_type alts ; up_hp_usg <- getVirtHp -- Upstream heap usage ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts alt_regs = map (idToReg dflags) ret_bndrs - simple_scrut = isSimpleScrut scrut alt_type - do_gc | not simple_scrut = True + ; simple_scrut <- isSimpleScrut scrut alt_type + ; let do_gc | not simple_scrut = True | isSingleton alts = False | up_hp_usg > 0 = False | otherwise = True @@ -450,6 +450,13 @@ recover any unused heap before passing control to the sequel. If we don't do this, then any unused heap will become slop because the heap check will reset the heap usage. Slop in the heap breaks LDV profiling (+RTS -hb) which needs to do a linear sweep through the nursery. + + +Note [Inlining out-of-line primops and heap checks] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If shouldInlinePrimOp returns True when called from StgCmmExpr for the +purpose of heap check placement, we *must* inline the primop later in +StgCmmPrim. If we don't things will go wrong. -} ----------------- @@ -460,21 +467,25 @@ maybeSaveCostCentre simple_scrut ----------------- -isSimpleScrut :: StgExpr -> AltType -> Bool +isSimpleScrut :: StgExpr -> AltType -> FCode Bool -- Simple scrutinee, does not block or allocate; hence safe to amalgamate -- heap usage from alternatives into the stuff before the case -- NB: if you get this wrong, and claim that the expression doesn't allocate -- when it does, you'll deeply mess up allocation -isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op -isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... } -isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... } -isSimpleScrut _ _ = False +isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args +isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... } +isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... } +isSimpleScrut _ _ = return False -isSimpleOp :: StgOp -> Bool +isSimpleOp :: StgOp -> [StgArg] -> FCode Bool -- True iff the op cannot block or allocate -isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) = not (playSafe safe) -isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op) -isSimpleOp (StgPrimCallOp _) = False +isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe) +isSimpleOp (StgPrimOp op) stg_args = do + arg_exprs <- getNonVoidArgAmodes stg_args + dflags <- getDynFlags + -- See Note [Inlining out-of-line primops and heap checks] + return $! isJust $ shouldInlinePrimOp dflags op arg_exprs +isSimpleOp (StgPrimCallOp _) _ = return False ----------------- chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id] diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 28d50c1094..9a748da736 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -8,8 +8,9 @@ module StgCmmPrim ( cgOpApp, - cgPrimOp -- internal(ish), used by cgCase to get code for a - -- comparison without also turning it into a Bool. + cgPrimOp, -- internal(ish), used by cgCase to get code for a + -- comparison without also turning it into a Bool. + shouldInlinePrimOp ) where #include "HsVersions.h" @@ -41,7 +42,6 @@ import Outputable import Util import Control.Monad (liftM, when) -import Data.Bits ------------------------------------------------------------------------ -- Primitive operations and foreign calls @@ -132,12 +132,31 @@ shouldInlinePrimOp :: DynFlags -> PrimOp -- ^ The primop -> [CmmExpr] -- ^ The primop arguments -> Maybe ([LocalReg] -> FCode ()) -shouldInlinePrimOp _ NewByteArrayOp_Char [(CmmLit (CmmInt n _))] - | fromInteger n <= maxInlineAllocThreshold = + +shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n _))] + | fromInteger n <= maxInlineAllocSize dflags = Just $ \ [res] -> doNewByteArrayOp res (fromInteger n) + shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init] - | wordsToBytes dflags (fromInteger n) <= maxInlineAllocThreshold = + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = Just $ \ [res] -> doNewArrayOp res (fromInteger n) init + +shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n _))] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n) + +shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n _))] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n) + +shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n _))] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n) + +shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n _))] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n) + shouldInlinePrimOp dflags primop args | primOpOutOfLine primop = Nothing | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args @@ -328,11 +347,11 @@ emitPrimOp dflags [res] DataToTagOp [arg] -- } emitPrimOp _ [res] UnsafeFreezeArrayOp [arg] = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)), + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)), mkAssign (CmmLocal res) arg ] emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg] = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)), + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)), mkAssign (CmmLocal res) arg ] -- #define unsafeFreezzeByteArrayzh(r,a) r=(a) @@ -345,15 +364,6 @@ emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] = doCopyArrayOp src src_off dst dst_off n emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] = doCopyMutableArrayOp src src_off dst dst_off n -emitPrimOp _ [res] CloneArrayOp [src,src_off,n] = - emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n -emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] = - emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n -emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] = - emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n -emitPrimOp _ [res] ThawArrayOp [src,src_off,n] = - emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n - emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] = doCopyArrayOp src src_off dst dst_off n emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] = @@ -1598,10 +1608,6 @@ doNewArrayOp res_r n init = do emit $ mkAssign (CmmLocal res_r) (CmmReg arr) --- | The inline allocation limit is 128 bytes. -maxInlineAllocThreshold :: ByteOff -maxInlineAllocThreshold = 128 - -- ---------------------------------------------------------------------------- -- Copying pointer arrays @@ -1689,45 +1695,40 @@ emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do -- allocated array in, a source array, an offset in the source array, -- and the number of elements to copy. Allocates a new array and -- initializes it from the source array. -emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr +emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff -> FCode () -emitCloneArray info_p res_r src0 src_off0 n0 = do +emitCloneArray info_p res_r src src_off n = do dflags <- getDynFlags - let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags + - (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags)) - myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags)) - -- Passed as arguments (be careful) - src <- assignTempE src0 - src_off <- assignTempE src_off0 - n <- assignTempE n0 - card_bytes <- assignTempE $ cardRoundUpCmm dflags n - size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUpCmm dflags card_bytes) - words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size + let info_ptr = mkLblExpr info_p + rep = arrPtrsRep dflags n - arr_r <- newTemp (bWord dflags) - emitAllocateCall arr_r myCapability words - tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags)) - (zeroExpr dflags) + tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) + (mkIntExpr dflags (nonHdrSize dflags rep)) + (zeroExpr dflags) - let arr = CmmReg (CmmLocal arr_r) - emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS - emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags + - oFFSET_StgMutArrPtrs_ptrs dflags)) n - emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags + - oFFSET_StgMutArrPtrs_size dflags)) size + let hdr_size = wordsToBytes dflags (fixedHdrSize dflags) - dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags) - src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) - src_off + base <- allocHeapClosure rep info_ptr curCCS + [ (mkIntExpr dflags n, + hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags) + , (mkIntExpr dflags (nonHdrSizeW rep), + hdr_size + oFFSET_StgMutArrPtrs_size dflags) + ] - emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags)) + arr <- CmmLocal `fmap` newTemp (bWord dflags) + emit $ mkAssign arr base - emitMemsetCall (cmmOffsetExprW dflags dst_p n) - (mkIntExpr dflags 1) - card_bytes + dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr) + (arrPtrsHdrSize dflags) + src_p <- assignTempE $ cmmOffsetExprW dflags src + (cmmAddWord dflags + (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off) + + emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n)) (mkIntExpr dflags (wORD_SIZE dflags)) - emit $ mkAssign (CmmLocal res_r) arr + + emit $ mkAssign (CmmLocal res_r) (CmmReg arr) -- | Takes and offset in the destination array, the base address of -- the card table, and the number of elements affected (*not* the @@ -1748,22 +1749,6 @@ cardCmm :: DynFlags -> CmmExpr -> CmmExpr cardCmm dflags i = cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)) --- Convert a number of elements to a number of cards, rounding up -cardRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr -cardRoundUpCmm dflags i = - cardCmm dflags (cmmAddWord dflags i - (mkIntExpr dflags - ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))) - -bytesToWordsRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr -bytesToWordsRoundUpCmm dflags e = - cmmQuotWord dflags (cmmAddWord dflags e - (mkIntExpr dflags - (wORD_SIZE dflags - 1))) (wordSize dflags) - -wordSize :: DynFlags -> CmmExpr -wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags) - -- | Emit a call to @memcpy@. emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () emitMemcpyCall dst src n align = do @@ -1789,19 +1774,6 @@ emitMemsetCall dst c n align = do MO_Memset [ dst, c, n, align ] --- | Emit a call to @allocate@. -emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode () -emitAllocateCall res cap n = do - emitCCall - [ (res, AddrHint) ] - allocate - [ (cap, AddrHint) - , (n, NoHint) - ] - where - allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing - ForeignLabelInExternalPackage IsFunction)) - emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode () emitBSwapCall res x width = do emitPrimCall |
