summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmHeap.hs
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-10-03 09:30:56 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-10-08 09:04:40 +0100
commita7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch)
treeb95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /compiler/codeGen/StgCmmHeap.hs
parentaed37acd4d157791381800d5de960a2461bcbef3 (diff)
downloadhaskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm code with argument-passing and function calls. For example: foo ( gcptr a, bits32 b ) { if (b > 0) { // we can make tail calls passing arguments: jump stg_ap_0_fast(a); } return (x,y); } More details on the new cmm syntax are in Note [Syntax of .cmm files] in CmmParse.y. The old syntax is still more-or-less supported for those occasional code fragments that really need to explicitly manipulate the stack. However there are a couple of differences: it is now obligatory to give a list of live GlobalRegs on every jump, e.g. jump %ENTRY_CODE(Sp(0)) [R1]; Again, more details in Note [Syntax of .cmm files]. I have rewritten most of the .cmm files in the RTS into the new syntax, except for AutoApply.cmm which is generated by the genapply program: this file could be generated in the new syntax instead and would probably be better off for it, but I ran out of enthusiasm. Some other changes in this batch: - The PrimOp calling convention is gone, primops now use the ordinary NativeNodeCall convention. This means that primops and "foreign import prim" code must be written in high-level cmm, but they can now take more than 10 arguments. - CmmSink now does constant-folding (should fix #7219) - .cmm files now go through the cmmPipeline, and as a result we generate better code in many cases. All the object files generated for the RTS .cmm files are now smaller. Performance should be better too, but I haven't measured it yet. - RET_DYN frames are removed from the RTS, lots of code goes away - we now have some more canned GC points to cover unboxed-tuples with 2-4 pointers, which will reduce code size a little.
Diffstat (limited to 'compiler/codeGen/StgCmmHeap.hs')
-rw-r--r--compiler/codeGen/StgCmmHeap.hs161
1 files changed, 84 insertions, 77 deletions
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index b7cca48f5a..c133ab00d4 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -11,6 +11,8 @@ module StgCmmHeap (
getHpRelOffset, hpRel,
entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
+ heapStackCheckGen,
+ entryHeapCheck',
mkVirtHeapOffsets, mkVirtConstrOffsets,
mkStaticClosureFields, mkStaticClosure,
@@ -47,6 +49,7 @@ import FastString( mkFastString, fsLit )
import Util
import Control.Monad (when)
+import Data.Maybe (isJust)
-----------------------------------------------------------
-- Initialise dynamic heap objects
@@ -334,16 +337,28 @@ entryHeapCheck :: ClosureInfo
-> FCode ()
entryHeapCheck cl_info nodeSet arity args code
+ = entryHeapCheck' is_fastf node arity args code
+ where
+ node = case nodeSet of
+ Just r -> CmmReg (CmmLocal r)
+ Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
+
+ is_fastf = case closureFunInfo cl_info of
+ Just (_, ArgGen _) -> False
+ _otherwise -> True
+
+-- | lower-level version for CmmParse
+entryHeapCheck' :: Bool -- is a known function pattern
+ -> CmmExpr -- expression for the closure pointer
+ -> Int -- Arity -- not same as len args b/c of voids
+ -> [LocalReg] -- Non-void args (empty for thunk)
+ -> FCode ()
+ -> FCode ()
+entryHeapCheck' is_fastf node arity args code
= do dflags <- getDynFlags
let is_thunk = arity == 0
- is_fastf = case closureFunInfo cl_info of
- Just (_, ArgGen _) -> False
- _otherwise -> True
args' = map (CmmReg . CmmLocal) args
- node = case nodeSet of
- Just r -> CmmReg (CmmLocal r)
- Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
stg_gc_fun = CmmReg (CmmGlobal GCFun)
stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
@@ -373,50 +388,6 @@ entryHeapCheck cl_info nodeSet arity args code
emitLabel loop_id
heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
-{-
- -- This code is slightly outdated now and we could easily keep the above
- -- GC methods. However, there may be some performance gains to be made by
- -- using more specialised GC entry points. Since the semi generic GCFun
- -- entry needs to check the node and figure out what registers to save...
- -- if we provided and used more specialised GC entry points then these
- -- runtime decisions could be turned into compile time decisions.
-
- args' = case fun of Just f -> f : args
- Nothing -> args
- arg_exprs = map (CmmReg . CmmLocal) args'
- gc_call updfr_sz
- | arity == 0 = mkJumpGC (CmmReg (CmmGlobal GCEnter1)) arg_exprs updfr_sz
- | otherwise =
- case gc_lbl args' of
- Just _lbl -> panic "StgCmmHeap.entryHeapCheck: not finished"
- -- mkJumpGC (CmmLit (CmmLabel (mkRtsCodeLabel lbl)))
- -- arg_exprs updfr_sz
- Nothing -> mkCall generic_gc (GC, GC) [] [] updfr_sz
-
- gc_lbl :: [LocalReg] -> Maybe FastString
- gc_lbl [reg]
- | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p"
- | isFloatType ty = case width of
- W32 -> Just (sLit "stg_gc_f1")
- W64 -> Just (sLit "stg_gc_d1")
- _other -> Nothing
- | width == wordWidth dflags = Just (mkGcLabel "stg_gc_unbx_r1")
- | width == W64 = Just (mkGcLabel "stg_gc_l1")
- | otherwise = Nothing
- where
- ty = localRegType reg
- width = typeWidth ty
-
- gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs)
-
- gc_lbl_ptrs :: [Bool] -> Maybe FastString
- -- JD: TEMPORARY -- UNTIL THESE FUNCTIONS EXIST...
- --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p")
- --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p")
- gc_lbl_ptrs _ = Nothing
--}
-
-
-- ------------------------------------------------------------
-- A heap/stack check in a case alternative
@@ -445,7 +416,7 @@ altOrNoEscapeHeapCheck checkYield regs code = do
Nothing -> genericGC checkYield code
Just gc -> do
lret <- newLabelC
- let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) regs
+ let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
lcont <- newLabelC
emitOutOfLine lret (copyin <*> mkBranch lcont)
emitLabel lcont
@@ -475,23 +446,29 @@ cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
reg_exprs = map (CmmReg . CmmLocal) regs
-- Note [stg_gc arguments]
+ -- NB. we use the NativeReturn convention for passing arguments
+ -- to the canned heap-check routines, because we are in a case
+ -- alternative and hence the [LocalReg] was passed to us in the
+ -- NativeReturn convention.
gc_call dflags label sp
- | cont_on_stack = mkJumpReturnsTo dflags label GC reg_exprs lret off sp
- | otherwise = mkCallReturnsTo dflags label GC reg_exprs lret off sp (0,[])
+ | cont_on_stack
+ = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
+ | otherwise
+ = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
genericGC :: Bool -> FCode a -> FCode a
genericGC checkYield code
= do updfr_sz <- getUpdFrameOff
lretry <- newLabelC
emitLabel lretry
- call <- mkCall generic_gc (GC, GC) [] [] updfr_sz (0,[])
+ call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
heapCheck False checkYield (call <*> mkBranch lretry) code
cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
cannedGCEntryPoint dflags regs
- = case regs of
+ = case map localRegType regs of
[] -> Just (mkGcLabel "stg_gc_noregs")
- [reg]
+ [ty]
| isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
| isFloatType ty -> case width of
W32 -> Just (mkGcLabel "stg_gc_f1")
@@ -502,8 +479,19 @@ cannedGCEntryPoint dflags regs
| width == W64 -> Just (mkGcLabel "stg_gc_l1")
| otherwise -> Nothing
where
- ty = localRegType reg
width = typeWidth ty
+ [ty1,ty2]
+ | isGcPtrType ty1
+ && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
+ [ty1,ty2,ty3]
+ | isGcPtrType ty1
+ && isGcPtrType ty2
+ && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
+ [ty1,ty2,ty3,ty4]
+ | isGcPtrType ty1
+ && isGcPtrType ty2
+ && isGcPtrType ty3
+ && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
_otherwise -> Nothing
-- Note [stg_gc arguments]
@@ -538,51 +526,70 @@ heapCheck checkStack checkYield do_gc code
= getHeapUsage $ \ hpHw ->
-- Emit heap checks, but be sure to do it lazily so
-- that the conditionals on hpHw don't cause a black hole
- do { codeOnly $ do_checks checkStack checkYield hpHw do_gc
+ do { dflags <- getDynFlags
+ ; let mb_alloc_bytes
+ | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
+ | otherwise = Nothing
+ stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
+ | otherwise = Nothing
+ ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
; tickyAllocHeap hpHw
; doGranAllocate hpHw
; setRealHp hpHw
; code }
-do_checks :: Bool -- Should we check the stack?
+heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
+heapStackCheckGen stk_hwm mb_bytes
+ = do updfr_sz <- getUpdFrameOff
+ lretry <- newLabelC
+ emitLabel lretry
+ call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
+ do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
+
+do_checks :: Maybe CmmExpr -- Should we check the stack?
-> Bool -- Should we check for preemption?
- -> WordOff -- Heap headroom
+ -> Maybe CmmExpr -- Heap headroom (bytes)
-> CmmAGraph -- What to do on failure
-> FCode ()
-do_checks checkStack checkYield alloc do_gc = do
+do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
dflags <- getDynFlags
+ gc_id <- newLabelC
+
let
- alloc_lit = mkIntExpr dflags (alloc * wORD_SIZE dflags) -- Bytes
- bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
+ Just alloc_lit = mb_alloc_lit
+
+ bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
-- Sp overflow if (Sp - CmmHighStack < SpLim)
- sp_oflo = CmmMachOp (mo_wordULt dflags)
+ sp_oflo sp_hwm =
+ CmmMachOp (mo_wordULt dflags)
[CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
- [CmmReg spReg, CmmLit CmmHighStackMark],
+ [CmmReg spReg, sp_hwm],
CmmReg spLimReg]
-- Hp overflow if (Hp > HpLim)
-- (Hp has been incremented by now)
-- HpLim points to the LAST WORD of valid allocation space.
hp_oflo = CmmMachOp (mo_wordUGt dflags)
- [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
-
- -- Yielding if HpLim == 0
- yielding = CmmMachOp (mo_wordEq dflags)
- [CmmReg (CmmGlobal HpLim), CmmLit (zeroCLit dflags)]
+ [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
- gc_id <- newLabelC
- when checkStack $ do
- emit =<< mkCmmIfGoto sp_oflo gc_id
+ case mb_stk_hwm of
+ Nothing -> return ()
+ Just stk_hwm -> emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id
- if (alloc /= 0)
+ if (isJust mb_alloc_lit)
then do
- emitAssign hpReg bump_hp
- emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
+ emitAssign hpReg bump_hp
+ emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
else do
- when (not (dopt Opt_OmitYields dflags) && checkYield) (emit =<< mkCmmIfGoto yielding gc_id)
+ when (not (dopt Opt_OmitYields dflags) && checkYield) $ do
+ -- Yielding if HpLim == 0
+ let yielding = CmmMachOp (mo_wordEq dflags)
+ [CmmReg (CmmGlobal HpLim),
+ CmmLit (zeroCLit dflags)]
+ emit =<< mkCmmIfGoto yielding gc_id
emitOutOfLine gc_id $
do_gc -- this is expected to jump back somewhere