diff options
60 files changed, 4610 insertions, 2610 deletions
diff --git a/compiler/basicTypes/UniqSupply.lhs b/compiler/basicTypes/UniqSupply.lhs index bb40be7ac1..f3fb28ac21 100644 --- a/compiler/basicTypes/UniqSupply.lhs +++ b/compiler/basicTypes/UniqSupply.lhs @@ -109,7 +109,7 @@ takeUniqFromSupply (MkSplitUniqSupply n s1 _) = (mkUniqueGrimily (iBox n), s1) \begin{code} -- | A monad which just gives the ability to obtain 'Unique's -newtype UniqSM result = USM { unUSM :: UniqSupply -> (result, UniqSupply) } +newtype UniqSM result = USM { unUSM :: UniqSupply -> (# result, UniqSupply #) } instance Monad UniqSM where return = returnUs @@ -118,21 +118,21 @@ instance Monad UniqSM where instance Functor UniqSM where fmap f (USM x) = USM (\us -> case x us of - (r, us') -> (f r, us')) + (# r, us' #) -> (# f r, us' #)) instance Applicative UniqSM where pure = returnUs (USM f) <*> (USM x) = USM $ \us -> case f us of - (ff, us') -> case x us' of - (xx, us'') -> (ff xx, us'') + (# ff, us' #) -> case x us' of + (# xx, us'' #) -> (# ff xx, us'' #) -- | Run the 'UniqSM' action, returning the final 'UniqSupply' initUs :: UniqSupply -> UniqSM a -> (a, UniqSupply) -initUs init_us m = case unUSM m init_us of { (r,us) -> (r,us) } +initUs init_us m = case unUSM m init_us of { (# r, us #) -> (r,us) } -- | Run the 'UniqSM' action, discarding the final 'UniqSupply' initUs_ :: UniqSupply -> UniqSM a -> a -initUs_ init_us m = case unUSM m init_us of { (r, _) -> r } +initUs_ init_us m = case unUSM m init_us of { (# r, _ #) -> r } {-# INLINE thenUs #-} {-# INLINE lazyThenUs #-} @@ -142,27 +142,30 @@ initUs_ init_us m = case unUSM m init_us of { (r, _) -> r } @thenUs@ is where we split the @UniqSupply@. \begin{code} +liftUSM :: UniqSM a -> UniqSupply -> (a, UniqSupply) +liftUSM (USM m) us = case m us of (# a, us' #) -> (a, us') + instance MonadFix UniqSM where - mfix m = USM (\us -> let (r,us') = unUSM (m r) us in (r,us')) + mfix m = USM (\us -> let (r,us') = liftUSM (m r) us in (# r,us' #)) thenUs :: UniqSM a -> (a -> UniqSM b) -> UniqSM b thenUs (USM expr) cont = USM (\us -> case (expr us) of - (result, us') -> unUSM (cont result) us') + (# result, us' #) -> unUSM (cont result) us') lazyThenUs :: UniqSM a -> (a -> UniqSM b) -> UniqSM b -lazyThenUs (USM expr) cont - = USM (\us -> let (result, us') = expr us in unUSM (cont result) us') +lazyThenUs expr cont + = USM (\us -> let (result, us') = liftUSM expr us in unUSM (cont result) us') thenUs_ :: UniqSM a -> UniqSM b -> UniqSM b thenUs_ (USM expr) (USM cont) - = USM (\us -> case (expr us) of { (_, us') -> cont us' }) + = USM (\us -> case (expr us) of { (# _, us' #) -> cont us' }) returnUs :: a -> UniqSM a -returnUs result = USM (\us -> (result, us)) +returnUs result = USM (\us -> (# result, us #)) getUs :: UniqSM UniqSupply -getUs = USM (\us -> splitUniqSupply us) +getUs = USM (\us -> case splitUniqSupply us of (us1,us2) -> (# us1, us2 #)) -- | A monad for generating unique identifiers class Monad m => MonadUnique m where @@ -177,17 +180,17 @@ class Monad m => MonadUnique m where getUniquesM = liftM uniqsFromSupply getUniqueSupplyM instance MonadUnique UniqSM where - getUniqueSupplyM = USM (\us -> splitUniqSupply us) + getUniqueSupplyM = getUs getUniqueM = getUniqueUs getUniquesM = getUniquesUs getUniqueUs :: UniqSM Unique getUniqueUs = USM (\us -> case splitUniqSupply us of - (us1,us2) -> (uniqFromSupply us1, us2)) + (us1,us2) -> (# uniqFromSupply us1, us2 #)) getUniquesUs :: UniqSM [Unique] getUniquesUs = USM (\us -> case splitUniqSupply us of - (us1,us2) -> (uniqsFromSupply us1, us2)) + (us1,us2) -> (# uniqsFromSupply us1, us2 #)) \end{code} \begin{code} diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs index feeacb553d..4aedcb7074 100644 --- a/compiler/cmm/BlockId.hs +++ b/compiler/cmm/BlockId.hs @@ -15,7 +15,7 @@ import Outputable import Unique import Compiler.Hoopl as Hoopl hiding (Unique) -import Compiler.Hoopl.GHC (uniqueToInt, uniqueToLbl, lblToUnique) +import Compiler.Hoopl.Internals (uniqueToLbl) ---------------------------------------------------------------- --- Block Ids, their environments, and their sets @@ -31,15 +31,9 @@ compilation unit in which it appears. type BlockId = Hoopl.Label -instance Uniquable BlockId where - getUnique label = getUnique (uniqueToInt $ lblToUnique label) - mkBlockId :: Unique -> BlockId mkBlockId unique = uniqueToLbl $ intToUnique $ getKey unique -instance Outputable BlockId where - ppr label = ppr (getUnique label) - retPtLbl :: BlockId -> CLabel retPtLbl label = mkReturnPtLabel $ getUnique label diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index f1318c1dc9..1c77409e49 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -32,9 +32,9 @@ module Cmm ( import CLabel import BlockId import CmmNode -import OptimizationFuel as F import SMRep import CmmExpr +import UniqSupply import Compiler.Hoopl import Data.Word ( Word8 ) @@ -69,8 +69,6 @@ type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph -- (a) C--, i.e. populated with various C-- constructs -- (Cmm and RawCmm in OldCmm.hs) -- (b) Native code, populated with data/instructions --- --- A second family of instances based on Hoopl is in Cmm.hs. -- | A top-level chunk, abstracted over the type of the contents of -- the basic blocks (Cmm or instructions are the likely instantiations). @@ -95,19 +93,23 @@ data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C } type CmmBlock = Block CmmNode C C type CmmReplGraph e x = GenCmmReplGraph CmmNode e x -type GenCmmReplGraph n e x = FuelUniqSM (Maybe (Graph n e x)) -type CmmFwdRewrite f = FwdRewrite FuelUniqSM CmmNode f -type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f +type GenCmmReplGraph n e x = UniqSM (Maybe (Graph n e x)) +type CmmFwdRewrite f = FwdRewrite UniqSM CmmNode f +type CmmBwdRewrite f = BwdRewrite UniqSM CmmNode f ----------------------------------------------------------------------------- -- Info Tables ----------------------------------------------------------------------------- -data CmmTopInfo = TopInfo {info_tbl :: CmmInfoTable, stack_info :: CmmStackInfo} +data CmmTopInfo = TopInfo { info_tbl :: CmmInfoTable + , stack_info :: CmmStackInfo } data CmmStackInfo = StackInfo { - arg_space :: ByteOff, -- XXX: comment? + arg_space :: ByteOff, + -- number of bytes of arguments on the stack on entry to the + -- the proc. This is filled in by StgCmm.codeGen, and used + -- by the stack allocator later. updfr_space :: Maybe ByteOff -- XXX: comment? } diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs index 011947f55d..ebe755219b 100644 --- a/compiler/cmm/CmmBuildInfoTables.hs +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -14,169 +14,53 @@ {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} module CmmBuildInfoTables ( CAFSet, CAFEnv, cafAnal, localCAFInfo, mkTopCAFInfo - , setInfoTableSRT, setInfoTableStackMap + , setInfoTableSRT , TopSRT, emptySRT, srtToData , bundleCAFs - , lowerSafeForeignCalls - , cafTransfers, liveSlotTransfers - , mkLiveness ) + , cafTransfers ) where #include "HsVersions.h" -- These should not be imported here! -import StgCmmForeign import StgCmmUtils -import Constants import Digraph import qualified Prelude as P import Prelude hiding (succ) -import Util import BlockId import Bitmap import CLabel import Cmm import CmmUtils -import CmmStackLayout -import Module -import FastString -import ForeignCall import IdInfo import Data.List import Maybes -import MkGraph as M -import Control.Monad import Name -import OptimizationFuel import Outputable import SMRep import UniqSupply -import Compiler.Hoopl +import Hoopl import Data.Map (Map) import qualified Data.Map as Map -import qualified FiniteMap as Map +import Data.Set (Set) +import qualified Data.Set as Set + +foldSet :: (a -> b -> b) -> b -> Set a -> b +#if __GLASGOW_HASKELL__ < 704 +foldSet = Set.fold +#else +foldSet = Set.foldr +#endif ---------------------------------------------------------------- -- Building InfoTables ----------------------------------------------------------------------- --- Stack Maps - --- Given a block ID, we return a representation of the layout of the stack, --- as suspended before entering that block. --- (For a return site to a function call, the layout does not include the --- parameter passing area (or the "return address" on the stack)). --- If the element is `Nothing`, then it represents a word of the stack that --- does not contain a live pointer. --- If the element is `Just` a register, then it represents a live spill slot --- for a pointer; we assume that a pointer is the size of a word. --- The head of the list represents the young end of the stack where the infotable --- pointer for the block `Bid` is stored. --- The infotable pointer itself is not included in the list. --- Call areas are also excluded from the list: besides the stuff in the update --- frame (and the return infotable), call areas should never be live across --- function calls. - --- RTS Invariant: All pointers must be word-aligned because each bit in the bitmap --- represents a word. Consequently, we have to be careful when we see a live slot --- on the stack: if we have packed multiple sub-word values into a word, --- we have to make sure that we only mark the entire word as a non-pointer. - --- Also, don't forget to stop at the old end of the stack (oldByte), --- which may differ depending on whether there is an update frame. - -type RegSlotInfo - = ( Int -- Offset from oldest byte of Old area - , LocalReg -- The register - , Int) -- Width of the register - -live_ptrs :: ByteOff -> BlockEnv SubAreaSet -> AreaMap -> BlockId -> StackLayout -live_ptrs oldByte slotEnv areaMap bid = - -- pprTrace "live_ptrs for" (ppr bid <+> text (show oldByte ++ "-" ++ show youngByte) <+> - -- ppr liveSlots) $ - -- pprTrace ("stack layout for " ++ show bid ++ ": ") (ppr res) $ res - res - where - res = mkLiveness (reverse $ slotsToList youngByte liveSlots []) - - slotsToList :: Int -> [RegSlotInfo] -> [Maybe LocalReg] -> [Maybe LocalReg] - -- n starts at youngByte and is decremented down to oldByte - -- Returns a list, one element per word, with - -- (Just r) meaning 'pointer register r is saved here', - -- Nothing meaning 'non-pointer or empty' - - slotsToList n [] results | n == oldByte = results -- at old end of stack frame - - slotsToList n (s : _) _ | n == oldByte = - pprPanic "slot left off live_ptrs" (ppr s <+> ppr oldByte <+> - ppr n <+> ppr liveSlots <+> ppr youngByte) - - slotsToList n _ _ | n < oldByte = - panic "stack slots not allocated on word boundaries?" - - slotsToList n l@((n', r, w) : rst) results = - if n == (n' + w) then -- slot's young byte is at n - ASSERT (not (isPtr r) || - (n `mod` wORD_SIZE == 0 && w == wORD_SIZE)) -- ptrs must be aligned - slotsToList next (dropWhile (non_ptr_younger_than next) rst) - (stack_rep : results) - else slotsToList next (dropWhile (non_ptr_younger_than next) l) - (Nothing : results) - where next = n - wORD_SIZE - stack_rep = if isPtr r then Just r else Nothing - - slotsToList n [] results = slotsToList (n - wORD_SIZE) [] (Nothing : results) - - non_ptr_younger_than next (n', r, w) = - n' + w > next && - ASSERT (not (isPtr r)) - True - isPtr = isGcPtrType . localRegType - - liveSlots :: [RegSlotInfo] - liveSlots = sortBy (\ (off,_,_) (off',_,_) -> compare off' off) - (Map.foldRightWithKey (\_ -> flip $ foldl add_slot) [] slots) - - add_slot :: [RegSlotInfo] -> SubArea -> [RegSlotInfo] - add_slot rst (a@(RegSlot r@(LocalReg _ ty)), off, w) = - if off == w && widthInBytes (typeWidth ty) == w then - (expectJust "add_slot" (Map.lookup a areaMap), r, w) : rst - else panic "live_ptrs: only part of a variable live at a proc point" - add_slot rst (CallArea Old, _, _) = - rst -- the update frame (or return infotable) should be live - -- would be nice to check that only that part of the callarea is live... - add_slot rst ((CallArea _), _, _) = - rst - -- JD: THIS ISN'T CURRENTLY A CORRECTNESS PROBLEM, BUT WE SHOULD REALLY - -- MAKE LIVENESS INFO AROUND CALLS MORE PRECISE -- FOR NOW, A 32-BIT - -- FLOAT PADS OUT TO 64 BITS, BUT WE ASSUME THE WHOLE PARAMETER-PASSING - -- AREA IS LIVE (WHICH IT ISN'T...). WE SHOULD JUST PUT THE LIVE AREAS - -- IN THE CALL NODES, WHICH SHOULD EVENTUALLY HAVE LIVE REGISTER AS WELL, - -- SO IT'S ALL GOING IN THE SAME DIRECTION. - -- pprPanic "CallAreas must not be live across function calls" (ppr bid <+> ppr c) - - slots :: SubAreaSet -- The SubAreaSet for 'bid' - slots = expectJust "live_ptrs slots" $ mapLookup bid slotEnv - youngByte = expectJust "live_ptrs bid_pos" $ Map.lookup (CallArea (Young bid)) areaMap - --- Construct the stack maps for a procedure _if_ it needs an infotable. --- When wouldn't a procedure need an infotable? If it is a procpoint that --- is not the successor of a call. -setInfoTableStackMap :: SlotEnv -> AreaMap -> CmmDecl -> CmmDecl -setInfoTableStackMap slotEnv areaMap - t@(CmmProc (TopInfo {stack_info=StackInfo {updfr_space = Just updfr_off}}) _ - (CmmGraph {g_entry = eid})) - = updInfo (const (live_ptrs updfr_off slotEnv areaMap eid)) id t -setInfoTableStackMap _ _ t = t - - - ------------------------------------------------------------------------ -- SRTs -- WE NEED AN EXAMPLE HERE. @@ -191,14 +75,14 @@ setInfoTableStackMap _ _ t = t ----------------------------------------------------------------------- -- Finding the CAFs used by a procedure -type CAFSet = Map CLabel () +type CAFSet = Set CLabel type CAFEnv = BlockEnv CAFSet -- First, an analysis to find live CAFs. cafLattice :: DataflowLattice CAFSet -cafLattice = DataflowLattice "live cafs" Map.empty add - where add _ (OldFact old) (NewFact new) = case old `Map.union` new of - new' -> (changeIf $ Map.size new' > Map.size old, new') +cafLattice = DataflowLattice "live cafs" Set.empty add + where add _ (OldFact old) (NewFact new) = case old `Set.union` new of + new' -> (changeIf $ Set.size new' > Set.size old, new') cafTransfers :: BwdTransfer CmmNode CAFSet cafTransfers = mkBTransfer3 first middle last @@ -210,11 +94,11 @@ cafTransfers = mkBTransfer3 first middle last CmmLit (CmmLabelOff c _) -> add c set CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set _ -> set - add l s = if hasCAF l then Map.insert (toClosureLbl l) () s + add l s = if hasCAF l then Set.insert (toClosureLbl l) s else s -cafAnal :: CmmGraph -> FuelUniqSM CAFEnv -cafAnal g = liftM snd $ dataflowPassBwd g [] $ analBwd cafLattice cafTransfers +cafAnal :: CmmGraph -> CAFEnv +cafAnal g = dataflowAnalBwd g [] $ analBwd cafLattice cafTransfers ----------------------------------------------------------------------- -- Building the SRTs @@ -264,15 +148,15 @@ srtToData srt = [CmmData RelocatableReadOnlyData (Statics (lbl srt) tbl)] -- we make sure they're all close enough to the bottom of the table that the -- bitmap will be able to cover all of them. buildSRTs :: TopSRT -> Map CLabel CAFSet -> CAFSet -> - FuelUniqSM (TopSRT, Maybe CmmDecl, C_SRT) + UniqSM (TopSRT, Maybe CmmDecl, C_SRT) buildSRTs topSRT topCAFMap cafs = - do let liftCAF lbl () z = -- get CAFs for functions without static closures - case Map.lookup lbl topCAFMap of Just cafs -> z `Map.union` cafs - Nothing -> Map.insert lbl () z + do let liftCAF lbl z = -- get CAFs for functions without static closures + case Map.lookup lbl topCAFMap of Just cafs -> z `Set.union` cafs + Nothing -> Set.insert lbl z -- For each label referring to a function f without a static closure, -- replace it with the CAFs that are reachable from f. sub_srt topSRT localCafs = - let cafs = Map.keys (Map.foldRightWithKey liftCAF Map.empty localCafs) + let cafs = Set.elems (foldSet liftCAF Set.empty localCafs) mkSRT topSRT = do localSRTs <- procpointSRT (lbl topSRT) (elt_map topSRT) cafs return (topSRT, localSRTs) @@ -307,7 +191,7 @@ buildSRTs topSRT topCAFMap cafs = -- Construct an SRT bitmap. -- Adapted from simpleStg/SRT.lhs, which expects Id's. procpointSRT :: CLabel -> Map CLabel Int -> [CLabel] -> - FuelUniqSM (Maybe CmmDecl, C_SRT) + UniqSM (Maybe CmmDecl, C_SRT) procpointSRT _ _ [] = return (Nothing, NoC_SRT) procpointSRT top_srt top_table entries = @@ -325,7 +209,7 @@ maxBmpSize :: Int maxBmpSize = widthInBits wordWidth `div` 2 -- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT. -to_SRT :: CLabel -> Int -> Int -> Bitmap -> FuelUniqSM (Maybe CmmDecl, C_SRT) +to_SRT :: CLabel -> Int -> Int -> Bitmap -> UniqSM (Maybe CmmDecl, C_SRT) to_SRT top_srt off len bmp | len > maxBmpSize || bmp == [fromIntegral srt_escape] = do id <- getUniqueM @@ -373,30 +257,30 @@ mkTopCAFInfo localCAFs = foldl addToTop Map.empty g Map.insert l (flatten env cafset) env addToTop env (CyclicSCC nodes) = let (lbls, cafsets) = unzip nodes - cafset = lbls `Map.deleteList` foldl Map.union Map.empty cafsets + cafset = foldr Set.delete (foldl Set.union Set.empty cafsets) lbls in foldl (\env l -> Map.insert l (flatten env cafset) env) env lbls - flatten env cafset = Map.foldRightWithKey (lookup env) Map.empty cafset - lookup env caf () cafset' = - case Map.lookup caf env of Just cafs -> Map.foldRightWithKey add cafset' cafs - Nothing -> add caf () cafset' - add caf () cafset' = Map.insert caf () cafset' + flatten env cafset = foldSet (lookup env) Set.empty cafset + lookup env caf cafset' = + case Map.lookup caf env of Just cafs -> foldSet add cafset' cafs + Nothing -> add caf cafset' + add caf cafset' = Set.insert caf cafset' g = stronglyConnCompFromEdgedVertices - (map (\n@(l, cafs) -> (n, l, Map.keys cafs)) localCAFs) + (map (\n@(l, cafs) -> (n, l, Set.elems cafs)) localCAFs) -- Bundle the CAFs used at a procpoint. bundleCAFs :: CAFEnv -> CmmDecl -> (CAFSet, CmmDecl) bundleCAFs cafEnv t@(CmmProc _ _ (CmmGraph {g_entry=entry})) = (expectJust "bundleCAFs" (mapLookup entry cafEnv), t) -bundleCAFs _ t = (Map.empty, t) +bundleCAFs _ t = (Set.empty, t) -- Construct the SRTs for the given procedure. setInfoTableSRT :: Map CLabel CAFSet -> TopSRT -> (CAFSet, CmmDecl) -> - FuelUniqSM (TopSRT, [CmmDecl]) + UniqSM (TopSRT, [CmmDecl]) setInfoTableSRT topCAFMap topSRT (cafs, t) = setSRT cafs topCAFMap topSRT t setSRT :: CAFSet -> Map CLabel CAFSet -> TopSRT -> - CmmDecl -> FuelUniqSM (TopSRT, [CmmDecl]) + CmmDecl -> UniqSM (TopSRT, [CmmDecl]) setSRT cafs topCAFMap topSRT t = do (topSRT, cafTable, srt) <- buildSRTs topSRT topCAFMap cafs let t' = updInfo id (const srt) t @@ -418,91 +302,3 @@ updInfoTbl toVars toSrt info_tbl@(CmmInfoTable {}) StackRep ls -> StackRep (toVars ls) other -> other } updInfoTbl _ _ t@CmmNonInfoTable = t - ----------------------------------------------------------------- --- Safe foreign calls: We need to insert the code that suspends and resumes --- the thread before and after a safe foreign call. --- Why do we do this so late in the pipeline? --- Because we need this code to appear without interrruption: you can't rely on the --- value of the stack pointer between the call and resetting the thread state; --- you need to have an infotable on the young end of the stack both when --- suspending the thread and making the foreign call. --- All of this is much easier if we insert the suspend and resume calls here. - --- At the same time, we prepare for the stages of the compiler that --- build the proc points. We have to do this at the same time because --- the safe foreign calls need special treatment with respect to infotables. --- A safe foreign call needs an infotable even though it isn't --- a procpoint. The following datatype captures the information --- needed to generate the infotables along with the Cmm data and procedures. - --- JD: Why not do this while splitting procedures? -lowerSafeForeignCalls :: AreaMap -> CmmDecl -> FuelUniqSM CmmDecl -lowerSafeForeignCalls _ t@(CmmData _ _) = return t -lowerSafeForeignCalls areaMap (CmmProc info l g@(CmmGraph {g_entry=entry})) = do - let block b mblocks = mblocks >>= lowerSafeCallBlock entry areaMap b - blocks <- foldGraphBlocks block (return mapEmpty) g - return $ CmmProc info l (ofBlockMap entry blocks) - --- If the block ends with a safe call in the block, lower it to an unsafe --- call (with appropriate saves and restores before and after). -lowerSafeCallBlock :: BlockId -> AreaMap -> CmmBlock -> BlockEnv CmmBlock - -> FuelUniqSM (BlockEnv CmmBlock) -lowerSafeCallBlock entry areaMap b blocks = - case blockToNodeList b of - (JustC (CmmEntry id), m, JustC l@(CmmForeignCall {})) -> lowerSafeForeignCall entry areaMap blocks id m l - _ -> return $ insertBlock b blocks - --- Late in the code generator, we want to insert the code necessary --- to lower a safe foreign call to a sequence of unsafe calls. -lowerSafeForeignCall :: BlockId -> AreaMap -> BlockEnv CmmBlock -> BlockId -> [CmmNode O O] -> CmmNode O C - -> FuelUniqSM (BlockEnv CmmBlock) -lowerSafeForeignCall entry areaMap blocks bid m - (CmmForeignCall {tgt=tgt, res=rs, args=as, succ=succ, updfr = updfr_off, intrbl = intrbl}) = - do let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) - -- Both 'id' and 'new_base' are KindNonPtr because they're - -- RTS-only objects and are not subject to garbage collection - id <- newTemp bWord - new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) - let (caller_save, caller_load) = callerSaveVolatileRegs - load_tso <- newTemp gcWord -- TODO FIXME NOW - load_stack <- newTemp gcWord -- TODO FIXME NOW - let (<**>) = (M.<*>) - let suspendThread = foreignLbl "suspendThread" - resumeThread = foreignLbl "resumeThread" - foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit name))) - suspend = saveThreadState <**> - caller_save <**> - mkUnsafeCall (ForeignTarget suspendThread - (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) - [id] [CmmReg (CmmGlobal BaseReg), CmmLit (CmmInt (fromIntegral (fromEnum intrbl)) wordWidth)] - midCall = mkUnsafeCall tgt rs as - resume = mkUnsafeCall (ForeignTarget resumeThread - (ForeignConvention CCallConv [AddrHint] [AddrHint])) - [new_base] [CmmReg (CmmLocal id)] <**> - -- Assign the result to BaseReg: we - -- might now have a different Capability! - mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <**> - caller_load <**> - loadThreadState load_tso load_stack - -- We have to save the return value on the stack because its next use - -- may appear in a different procedure due to procpoint splitting... - saveRetVals = foldl (<**>) emptyAGraph $ map (M.mkMiddle . spill) rs - spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) - regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset) - where offset = w + expectJust "lowerForeign" (Map.lookup (RegSlot r) areaMap) - sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup (CallArea area) areaMap) - area = if succ == entry then Old else Young succ - w = widthInBytes $ typeWidth $ localRegType r - -- Note: The successor must be a procpoint, and we have already split, - -- so we use a jump, not a branch. - succLbl = CmmLit (CmmLabel (infoTblLbl succ)) - jump = CmmCall { cml_target = succLbl, cml_cont = Nothing - , cml_args = widthInBytes wordWidth ,cml_ret_args = 0 - , cml_ret_off = updfr_off} - graph' <- liftUniq $ labelAGraph bid $ catAGraphs (map M.mkMiddle m) <**> - suspend <**> midCall <**> - resume <**> saveRetVals <**> M.mkLast jump - return $ blocks `mapUnion` toBlockMap graph' -lowerSafeForeignCall _ _ _ _ _ _ = panic "lowerSafeForeignCall was passed something else" - diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index d3d9ba4b41..484e89cd9b 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -53,7 +53,6 @@ assignArgumentsPos conv arg_ty reps = assignments ([_], PrimOpReturn) -> allRegs (_, PrimOpReturn) -> getRegsWithNode (_, Slow) -> noRegs - _ -> pprPanic "Unknown calling convention" (ppr conv) -- The calling conventions first assign arguments to registers, -- then switch to the stack when we first run out of registers -- (even if there are still available registers for args of a different type). diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs index abbfd01156..eafa2a00f3 100644 --- a/compiler/cmm/CmmCommonBlockElim.hs +++ b/compiler/cmm/CmmCommonBlockElim.hs @@ -13,22 +13,22 @@ where import BlockId import Cmm import CmmUtils +import CmmContFlowOpt import Prelude hiding (iterate, succ, unzip, zip) -import Compiler.Hoopl +import Hoopl hiding (ChangeFlag) import Data.Bits import qualified Data.List as List import Data.Word -import FastString -import Control.Monad import Outputable import UniqFM -import Unique my_trace :: String -> SDoc -> a -> a my_trace = if False then pprTrace else \_ _ a -> a --- Eliminate common blocks: +-- ----------------------------------------------------------------------------- +-- Eliminate common blocks + -- If two blocks are identical except for the label on the first node, -- then we can eliminate one of the blocks. To ensure that the semantics -- of the program are preserved, we have to rewrite each predecessor of the @@ -42,59 +42,50 @@ my_trace = if False then pprTrace else \_ _ a -> a -- TODO: Use optimization fuel elimCommonBlocks :: CmmGraph -> CmmGraph -elimCommonBlocks g = - upd_graph g . snd $ iterate common_block reset hashed_blocks - (emptyUFM, mapEmpty) - where hashed_blocks = map (\b -> (hash_block b, b)) (reverse (postorderDfs g)) - reset (_, subst) = (emptyUFM, subst) +elimCommonBlocks g = replaceLabels env g + where + env = iterate hashed_blocks mapEmpty + hashed_blocks = map (\b -> (hash_block b, b)) $ postorderDfs g -- Iterate over the blocks until convergence -iterate :: (t -> a -> (Bool, t)) -> (t -> t) -> [a] -> t -> t -iterate upd reset blocks state = - case foldl upd' (False, state) blocks of - (True, state') -> iterate upd reset blocks (reset state') - (False, state') -> state' - where upd' (b, s) a = let (b', s') = upd s a in (b || b', s') -- lift to track changes +iterate :: [(HashCode,CmmBlock)] -> BlockEnv BlockId -> BlockEnv BlockId +iterate blocks subst = + case foldl common_block (False, emptyUFM, subst) blocks of + (changed, _, subst) + | changed -> iterate blocks subst + | otherwise -> subst + +type State = (ChangeFlag, UniqFM [CmmBlock], BlockEnv BlockId) + +type ChangeFlag = Bool +type HashCode = Int -- Try to find a block that is equal (or ``common'') to b. -type BidMap = BlockEnv BlockId -type State = (UniqFM [CmmBlock], BidMap) -common_block :: (Outputable h, Uniquable h) => State -> (h, CmmBlock) -> (Bool, State) -common_block (bmap, subst) (hash, b) = +common_block :: State -> (HashCode, CmmBlock) -> State +common_block (old_change, bmap, subst) (hash, b) = case lookupUFM bmap hash of Just bs -> case (List.find (eqBlockBodyWith (eqBid subst) b) bs, mapLookup bid subst) of (Just b', Nothing) -> addSubst b' (Just b', Just b'') | entryLabel b' /= b'' -> addSubst b' - _ -> (False, (addToUFM bmap hash (b : bs), subst)) - Nothing -> (False, (addToUFM bmap hash [b], subst)) + | otherwise -> (old_change, bmap, subst) + _ -> (old_change, addToUFM bmap hash (b : bs), subst) + Nothing -> (old_change, addToUFM bmap hash [b], subst) where bid = entryLabel b - addSubst b' = my_trace "found new common block" (ppr (entryLabel b')) $ - (True, (bmap, mapInsert bid (entryLabel b') subst)) - --- Given the map ``subst'' from BlockId -> BlockId, we rewrite the graph. -upd_graph :: CmmGraph -> BidMap -> CmmGraph -upd_graph g subst = mapGraphNodes (id, middle, last) g - where middle = mapExpDeep exp - last l = last' (mapExpDeep exp l) - last' :: CmmNode O C -> CmmNode O C - last' (CmmBranch bid) = CmmBranch $ sub bid - last' (CmmCondBranch p t f) = cond p (sub t) (sub f) - last' (CmmCall t (Just bid) a r o) = CmmCall t (Just $ sub bid) a r o - last' l@(CmmCall _ Nothing _ _ _) = l - last' (CmmForeignCall t r a bid u i) = CmmForeignCall t r a (sub bid) u i - last' (CmmSwitch e bs) = CmmSwitch e $ map (liftM sub) bs - cond p t f = if t == f then CmmBranch t else CmmCondBranch p t f - exp (CmmStackSlot (CallArea (Young id)) off) = - CmmStackSlot (CallArea (Young (sub id))) off - exp (CmmLit (CmmBlock id)) = CmmLit (CmmBlock (sub id)) - exp e = e - sub = lookupBid subst + addSubst b' = my_trace "found new common block" (ppr bid <> char '=' <> ppr (entryLabel b')) $ + (True, bmap, mapInsert bid (entryLabel b') subst) + + +-- ----------------------------------------------------------------------------- +-- Hashing and equality on blocks + +-- Below here is mostly boilerplate: hashing blocks ignoring labels, +-- and comparing blocks modulo a label mapping. -- To speed up comparisons, we hash each basic block modulo labels. -- The hashing is a bit arbitrary (the numbers are completely arbitrary), -- but it should be fast and good enough. -hash_block :: CmmBlock -> Int +hash_block :: CmmBlock -> HashCode hash_block block = fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32)) -- UniqFM doesn't like negative Ints @@ -103,11 +94,11 @@ hash_block block = hash_lst m h = hash_node m + h `shiftL` 1 hash_node :: CmmNode O x -> Word32 - hash_node (CmmComment (FastString u _ _ _ _)) = cvt u + hash_node (CmmComment _) = 0 -- don't care hash_node (CmmAssign r e) = hash_reg r + hash_e e hash_node (CmmStore e e') = hash_e e + hash_e e' hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as - hash_node (CmmBranch _) = 23 -- would be great to hash these properly + hash_node (CmmBranch _) = 23 -- NB. ignore the label hash_node (CmmCondBranch p _ _) = hash_e p hash_node (CmmCall e _ _ _ _) = hash_e e hash_node (CmmForeignCall t _ _ _ _ _) = hash_tgt t @@ -143,18 +134,60 @@ hash_block block = -- Utilities: equality and substitution on the graph. -- Given a map ``subst'' from BlockID -> BlockID, we define equality. -eqBid :: BidMap -> BlockId -> BlockId -> Bool +eqBid :: BlockEnv BlockId -> BlockId -> BlockId -> Bool eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid' -lookupBid :: BidMap -> BlockId -> BlockId +lookupBid :: BlockEnv BlockId -> BlockId -> BlockId lookupBid subst bid = case mapLookup bid subst of Just bid -> lookupBid subst bid Nothing -> bid --- Equality on the body of a block, modulo a function mapping block IDs to block IDs. +-- Middle nodes and expressions can contain BlockIds, in particular in +-- CmmStackSlot and CmmBlock, so we have to use a special equality for +-- these. +-- +eqMiddleWith :: (BlockId -> BlockId -> Bool) + -> CmmNode O O -> CmmNode O O -> Bool +eqMiddleWith _ (CmmComment _) (CmmComment _) = True +eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2) + = r1 == r2 && eqExprWith eqBid e1 e2 +eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2) + = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2 +eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1) + (CmmUnsafeForeignCall t2 r2 a2) + = t1 == t2 && r1 == r2 && and (zipWith (eqExprWith eqBid) a1 a2) +eqMiddleWith _ _ _ = False + +eqExprWith :: (BlockId -> BlockId -> Bool) + -> CmmExpr -> CmmExpr -> Bool +eqExprWith eqBid = eq + where + CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2 + CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2 + CmmReg r1 `eq` CmmReg r2 = r1==r2 + CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2 + CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2 + _e1 `eq` _e2 = False + + xs `eqs` ys = and (zipWith eq xs ys) + + eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2 + eqLit l1 l2 = l1 == l2 + + eqArea Old Old = True + eqArea (Young id1) (Young id2) = eqBid id1 id2 + eqArea _ _ = False + +-- Equality on the body of a block, modulo a function mapping block +-- IDs to block IDs. eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool -eqBlockBodyWith eqBid block block' = middles == middles' && eqLastWith eqBid last last' - where (_, middles , JustC last :: MaybeC C (CmmNode O C)) = blockToNodeList block - (_, middles', JustC last' :: MaybeC C (CmmNode O C)) = blockToNodeList block' +eqBlockBodyWith eqBid block block' + = and (zipWith (eqMiddleWith eqBid) (blockToList m) (blockToList m')) && + eqLastWith eqBid l l' + where (_,m,l) = blockSplit block + (_,m',l') = blockSplit block' + + eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2 diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 73ce57e93f..3fabf33f97 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -2,19 +2,19 @@ {-# OPTIONS_GHC -fno-warn-warnings-deprecations -fno-warn-incomplete-patterns #-} module CmmContFlowOpt - ( runCmmContFlowOpts - , removeUnreachableBlocks, replaceBranches + ( cmmCfgOpts + , cmmCfgOptsProc + , removeUnreachableBlocks + , replaceLabels ) where import BlockId import Cmm import CmmUtils -import Digraph import Maybes -import Outputable -import Compiler.Hoopl +import Hoopl import Control.Monad import Prelude hiding (succ, unzip, zip) @@ -24,104 +24,158 @@ import Prelude hiding (succ, unzip, zip) -- ----------------------------------------------------------------------------- -runCmmContFlowOpts :: CmmGroup -> CmmGroup -runCmmContFlowOpts = map (optProc cmmCfgOpts) - cmmCfgOpts :: CmmGraph -> CmmGraph -cmmCfgOpts = removeUnreachableBlocks . blockConcat . branchChainElim - -- Here branchChainElim can ultimately be replaced - -- with a more exciting combination of optimisations +cmmCfgOpts = removeUnreachableBlocks . blockConcat + +cmmCfgOptsProc :: CmmDecl -> CmmDecl +cmmCfgOptsProc = optProc cmmCfgOpts optProc :: (g -> g) -> GenCmmDecl d h g -> GenCmmDecl d h g optProc opt (CmmProc info lbl g) = CmmProc info lbl (opt g) optProc _ top = top + ----------------------------------------------------------------------------- -- --- Branch Chain Elimination +-- Block concatenation -- ----------------------------------------------------------------------------- --- | Remove any basic block of the form L: goto L', and replace L with --- L' everywhere else, unless L is the successor of a call instruction --- and L' is the entry block. You don't want to set the successor of a --- function call to the entry block because there is no good way to --- store both the infotables for the call and from the callee, while --- putting the stack pointer in a consistent place. +-- This optimisation does two things: +-- - If a block finishes with an unconditional branch, then we may +-- be able to concatenate the block it points to and remove the +-- branch. We do this either if the destination block is small +-- (e.g. just another branch), or if this is the only jump to +-- this particular destination block. +-- +-- - If a block finishes in a call whose continuation block is a +-- goto, then we can shortcut the destination, making the +-- continuation block the destination of the goto. +-- +-- Both transformations are improved by working from the end of the +-- graph towards the beginning, because we may be able to perform many +-- shortcuts in one go. + + +-- We need to walk over the blocks from the end back to the +-- beginning. We are going to maintain the "current" graph +-- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId +-- to BlockId, representing continuation labels that we have +-- renamed. This latter mapping is important because we might +-- shortcut a CmmCall continuation. For example: +-- +-- Sp[0] = L +-- call g returns to L +-- +-- L: goto M -- --- JD isn't quite sure when it's safe to share continuations for different --- function calls -- have to think about where the SP will be, --- so we'll table that problem for now by leaving all call successors alone. - -branchChainElim :: CmmGraph -> CmmGraph -branchChainElim g - | null lone_branch_blocks = g -- No blocks to remove - | otherwise = {- pprTrace "branchChainElim" (ppr forest) $ -} - replaceLabels (mapFromList edges) g +-- M: ... +-- +-- So when we shortcut the L block, we need to replace not only +-- the continuation of the call, but also references to L in the +-- code (e.g. the assignment Sp[0] = L). So we keep track of +-- which labels we have renamed and apply the mapping at the end +-- with replaceLabels. + +blockConcat :: CmmGraph -> CmmGraph +blockConcat g@CmmGraph { g_entry = entry_id } + = replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks where - blocks = toBlockList g - - lone_branch_blocks :: [(BlockId, BlockId)] - -- each (L,K) is a block of the form - -- L : goto K - lone_branch_blocks = mapCatMaybes isLoneBranch blocks - - call_succs = foldl add emptyBlockSet blocks - where add :: BlockSet -> CmmBlock -> BlockSet - add succs b = - case lastNode b of - (CmmCall _ (Just k) _ _ _) -> setInsert k succs - (CmmForeignCall {succ=k}) -> setInsert k succs - _ -> succs - - isLoneBranch :: CmmBlock -> Maybe (BlockId, BlockId) - isLoneBranch block - | (JustC (CmmEntry id), [], JustC (CmmBranch target)) <- blockToNodeList block - , not (setMember id call_succs) - = Just (id,target) - | otherwise - = Nothing - - -- We build a graph from lone_branch_blocks (every node has only - -- one out edge). Then we - -- - topologically sort the graph: if from A we can reach B, - -- then A occurs before B in the result list. - -- - depth-first search starting from the nodes in this list. - -- This gives us a [[node]], in which each list is a dependency - -- chain. - -- - for each list [a1,a2,...an] replace branches to ai with an. - -- - -- This approach nicely deals with cycles by ignoring them. - -- Branches in a cycle will be redirected to somewhere in the - -- cycle, but we don't really care where. A cycle should be dead code, - -- and so will be eliminated by removeUnreachableBlocks. - -- - fromNode (b,_) = b - toNode a = (a,a) - - all_block_ids :: LabelSet - all_block_ids = setFromList (map fst lone_branch_blocks) - `setUnion` - setFromList (map snd lone_branch_blocks) - - forest = dfsTopSortG $ graphFromVerticesAndAdjacency nodes lone_branch_blocks - where nodes = map toNode $ setElems $ all_block_ids - - edges = [ (fromNode y, fromNode x) - | (x:xs) <- map reverse forest, y <- xs ] + -- we might be able to shortcut the entry BlockId itself + new_entry + | Just entry_blk <- mapLookup entry_id new_blocks + , Just dest <- canShortcut entry_blk + = dest + | otherwise + = entry_id ----------------------------------------------------------------- + blocks = postorderDfs g + + (new_blocks, shortcut_map) = + foldr maybe_concat (toBlockMap g, mapEmpty) blocks + + maybe_concat :: CmmBlock + -> (BlockEnv CmmBlock, BlockEnv BlockId) + -> (BlockEnv CmmBlock, BlockEnv BlockId) + maybe_concat block (blocks, shortcut_map) + | CmmBranch b' <- last + , Just blk' <- mapLookup b' blocks + , shouldConcatWith b' blk' + = (mapInsert bid (splice head blk') blocks, shortcut_map) + + -- calls: if we can shortcut the continuation label, then + -- we must *also* remember to substitute for the label in the + -- code, because we will push it somewhere. + | Just b' <- callContinuation_maybe last + , Just blk' <- mapLookup b' blocks + , Just dest <- canShortcut blk' + = (blocks, mapInsert b' dest shortcut_map) + -- replaceLabels will substitute dest for b' everywhere, later + + -- non-calls: see if we can shortcut any of the successors. + | Nothing <- callContinuation_maybe last + = ( mapInsert bid (blockJoinTail head shortcut_last) blocks + , shortcut_map ) + + | otherwise + = (blocks, shortcut_map) + where + (head, last) = blockSplitTail block + bid = entryLabel block + shortcut_last = mapSuccessors shortcut last + shortcut l = + case mapLookup l blocks of + Just b | Just dest <- canShortcut b -> dest + _otherwise -> l + + shouldConcatWith b block + | num_preds b == 1 = True -- only one predecessor: go for it + | okToDuplicate block = True -- short enough to duplicate + | otherwise = False + where num_preds bid = mapLookup bid backEdges `orElse` 0 + + canShortcut :: CmmBlock -> Maybe BlockId + canShortcut block + | (_, middle, CmmBranch dest) <- blockSplit block + , isEmptyBlock middle + = Just dest + | otherwise + = Nothing + + backEdges :: BlockEnv Int -- number of predecessors for each block + backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id + mapMap setSize $ predMap blocks + + splice :: Block CmmNode C O -> CmmBlock -> CmmBlock + splice head rest = head `blockAppend` snd (blockSplitHead rest) + + +callContinuation_maybe :: CmmNode O C -> Maybe BlockId +callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b +callContinuation_maybe (CmmForeignCall { succ = b }) = Just b +callContinuation_maybe _ = Nothing + +okToDuplicate :: CmmBlock -> Bool +okToDuplicate block + = case blockSplit block of (_, m, _) -> isEmptyBlock m + -- cheap and cheerful; we might expand this in the future to + -- e.g. spot blocks that represent a single instruction or two + +------------------------------------------------------------------------ +-- Map over the CmmGraph, replacing each label with its mapping in the +-- supplied BlockEnv. replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph -replaceLabels env = - replace_eid . mapGraphNodes1 txnode +replaceLabels env g + | mapNull env = g + | otherwise = replace_eid $ mapGraphNodes1 txnode g where replace_eid g = g {g_entry = lookup (g_entry g)} lookup id = mapLookup id env `orElse` id txnode :: CmmNode e x -> CmmNode e x txnode (CmmBranch bid) = CmmBranch (lookup bid) - txnode (CmmCondBranch p t f) = CmmCondBranch (exp p) (lookup t) (lookup f) + txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f) txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms) txnode (CmmCall t k a res r) = CmmCall (exp t) (liftM lookup k) a res r txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc) @@ -130,90 +184,25 @@ replaceLabels env = exp :: CmmExpr -> CmmExpr exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid)) - exp (CmmStackSlot (CallArea (Young id)) i) = CmmStackSlot (CallArea (Young (lookup id))) i + exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i exp e = e - -replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph -replaceBranches env g = mapGraphNodes (id, id, last) g - where - last :: CmmNode O C -> CmmNode O C - last (CmmBranch id) = CmmBranch (lookup id) - last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi) - last (CmmSwitch e tbl) = CmmSwitch e (map (fmap lookup) tbl) - last l@(CmmCall {}) = l - last l@(CmmForeignCall {}) = l - lookup id = fmap lookup (mapLookup id env) `orElse` id - -- XXX: this is a recursive lookup, it follows chains until the lookup - -- returns Nothing, at which point we return the last BlockId +mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C +mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f ---------------------------------------------------------------- -- Build a map from a block to its set of predecessors. Very useful. + predMap :: [CmmBlock] -> BlockEnv BlockSet predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges where add_preds block env = foldl (add (entryLabel block)) env (successors block) add bid env b' = mapInsert b' (setInsert bid (mapLookup b' env `orElse` setEmpty)) env ------------------------------------------------------------------------------ --- --- Block concatenation --- ------------------------------------------------------------------------------ - --- If a block B branches to a label L, L is not the entry block, --- and L has no other predecessors, --- then we can splice the block starting with L onto the end of B. --- Order matters, so we work bottom up (reverse postorder DFS). --- This optimization can be inhibited by unreachable blocks, but --- the reverse postorder DFS returns only reachable blocks. --- --- To ensure correctness, we have to make sure that the BlockId of the block --- we are about to eliminate is not named in another instruction. --- --- Note: This optimization does _not_ subsume branch chain elimination. - -blockConcat :: CmmGraph -> CmmGraph -blockConcat g@(CmmGraph {g_entry=eid}) = - replaceLabels concatMap $ ofBlockMap (g_entry g) blocks' - where - blocks = postorderDfs g - - (blocks', concatMap) = - foldr maybe_concat (toBlockMap g, mapEmpty) $ blocks - - maybe_concat :: CmmBlock -> (LabelMap CmmBlock, LabelMap Label) -> (LabelMap CmmBlock, LabelMap Label) - maybe_concat b unchanged@(blocks', concatMap) = - let bid = entryLabel b - in case blockToNodeList b of - (JustC h, m, JustC (CmmBranch b')) -> - if canConcatWith b' then - (mapInsert bid (splice blocks' h m b') blocks', - mapInsert b' bid concatMap) - else unchanged - _ -> unchanged - - num_preds bid = liftM setSize (mapLookup bid backEdges) `orElse` 0 - - canConcatWith b' = b' /= eid && num_preds b' == 1 - - backEdges = predMap blocks - - splice :: forall map n e x. - IsMap map => - map (Block n e x) -> n C O -> [n O O] -> KeyOf map -> Block n C x - splice blocks' h m bid' = - case mapLookup bid' blocks' of - Nothing -> panic "unknown successor block" - Just block | (_, m', l') <- blockToNodeList block - -> blockOfNodeList (JustC h, (m ++ m'), l') - ----------------------------------------------------------------------------- -- -- Removing unreachable blocks --- ------------------------------------------------------------------------------ removeUnreachableBlocks :: CmmGraph -> CmmGraph removeUnreachableBlocks g diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index 80c6079aac..e72eee041c 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -12,29 +12,25 @@ import CmmUtils import qualified OldCmm as Old import OldPprCmm () -import Compiler.Hoopl hiding ((<*>), mkLabel, mkBranch) +import Hoopl hiding ((<*>), mkLabel, mkBranch) import Data.Maybe import Maybes import Outputable cmmOfZgraph :: CmmGroup -> Old.CmmGroup cmmOfZgraph tops = map mapTop tops - where mapTop (CmmProc h l g) = CmmProc (Old.CmmInfo Nothing Nothing (info_tbl h)) l (ofZgraph g) + where mapTop (CmmProc h l g) = CmmProc (info_tbl h) l (ofZgraph g) mapTop (CmmData s ds) = CmmData s ds data ValueDirection = Arguments | Results -add_hints :: Convention -> ValueDirection -> [a] -> [Old.CmmHinted a] +add_hints :: ForeignTarget -> ValueDirection -> [a] -> [Old.CmmHinted a] add_hints conv vd args = zipWith Old.CmmHinted args (get_hints conv vd) -get_hints :: Convention -> ValueDirection -> [ForeignHint] -get_hints (Foreign (ForeignConvention _ hints _)) Arguments = hints -get_hints (Foreign (ForeignConvention _ _ hints)) Results = hints -get_hints _other_conv _vd = repeat NoHint - -get_conv :: ForeignTarget -> Convention -get_conv (PrimTarget _) = NativeNodeCall -- JD: SUSPICIOUS -get_conv (ForeignTarget _ fc) = Foreign fc +get_hints :: ForeignTarget -> ValueDirection -> [ForeignHint] +get_hints (ForeignTarget _ (ForeignConvention _ hints _)) Arguments = hints +get_hints (ForeignTarget _ (ForeignConvention _ _ hints)) Results = hints +get_hints (PrimTarget _) _vd = repeat NoHint cmm_target :: ForeignTarget -> Old.CmmCallTarget cmm_target (PrimTarget op) = Old.CmmPrim op Nothing @@ -89,8 +85,8 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g CmmUnsafeForeignCall (PrimTarget MO_Touch) _ _ -> Old.CmmNop CmmUnsafeForeignCall target ress args -> Old.CmmCall (cmm_target target) - (add_hints (get_conv target) Results ress) - (add_hints (get_conv target) Arguments args) + (add_hints target Results ress) + (add_hints target Arguments args) Old.CmmMayReturn last :: CmmNode O C -> () -> [Old.CmmStmt] diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 6eb91e89ba..646ecb5c67 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -14,11 +14,11 @@ module CmmExpr , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node, baseReg , VGcPtr(..), vgcFlag -- Temporary! , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed - , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet - , plusRegSet, minusRegSet, timesRegSet - , regUsedIn, regSlot - , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf + , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet + , regSetToList + , regUsedIn + , Area(..) , module CmmMachOp , module CmmType ) @@ -31,9 +31,9 @@ import CmmMachOp import BlockId import CLabel import Unique -import UniqSet -import Data.Map (Map) +import Data.Set (Set) +import qualified Data.Set as Set ----------------------------------------------------------------------------- -- CmmExpr @@ -42,11 +42,12 @@ import Data.Map (Map) data CmmExpr = CmmLit CmmLit -- Literal - | CmmLoad CmmExpr CmmType -- Read memory location - | CmmReg CmmReg -- Contents of register + | CmmLoad !CmmExpr !CmmType -- Read memory location + | CmmReg !CmmReg -- Contents of register | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) - | CmmStackSlot Area Int -- addressing expression of a stack slot - | CmmRegOff CmmReg Int + | CmmStackSlot Area {-# UNPACK #-} !Int + -- addressing expression of a stack slot + | CmmRegOff !CmmReg Int -- CmmRegOff reg i -- ** is shorthand only, meaning ** -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] @@ -62,20 +63,16 @@ instance Eq CmmExpr where -- Equality ignores the types _e1 == _e2 = False data CmmReg - = CmmLocal LocalReg + = CmmLocal {-# UNPACK #-} !LocalReg | CmmGlobal GlobalReg deriving( Eq, Ord ) -- | A stack area is either the stack slot where a variable is spilled -- or the stack space where function arguments and results are passed. data Area - = RegSlot LocalReg - | CallArea AreaId - deriving (Eq, Ord) - -data AreaId = Old -- See Note [Old Area] - | Young BlockId + | Young {-# UNPACK #-} !BlockId -- Invariant: must be a continuation BlockId + -- See Note [Continuation BlockId] in CmmNode. deriving (Eq, Ord) {- Note [Old Area] @@ -94,15 +91,8 @@ necessarily at the young end of the Old area. End of note -} -type SubArea = (Area, Int, Int) -- area, offset, width -type SubAreaSet = Map Area [SubArea] - -type AreaMap = Map Area Int - -- Byte offset of the oldest byte of the Area, - -- relative to the oldest byte of the Old Area - data CmmLit - = CmmInt Integer Width + = CmmInt !Integer Width -- Interpretation: the 2's complement representation of the value -- is truncated to the specified size. This is easier than trying -- to keep the value within range, because we don't know whether @@ -120,7 +110,11 @@ data CmmLit -- It is also used inside the NCG during when generating -- position-independent code. | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset - | CmmBlock BlockId -- Code label + + | CmmBlock {-# UNPACK #-} !BlockId -- Code label + -- Invariant: must be a continuation BlockId + -- See Note [Continuation BlockId] in CmmNode. + | CmmHighStackMark -- stands for the max stack space used during a procedure deriving Eq @@ -163,7 +157,7 @@ maybeInvertCmmExpr _ = Nothing ----------------------------------------------------------------------------- data LocalReg - = LocalReg !Unique CmmType + = LocalReg {-# UNPACK #-} !Unique CmmType -- ^ Parameters: -- 1. Identifier -- 2. Type @@ -189,22 +183,35 @@ localRegType (LocalReg _ rep) = rep ----------------------------------------------------------------------------- -- | Sets of local registers -type RegSet = UniqSet LocalReg + +-- These are used for dataflow facts, and a common operation is taking +-- the union of two RegSets and then asking whether the union is the +-- same as one of the inputs. UniqSet isn't good here, because +-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary +-- Sets. + +type RegSet = Set LocalReg emptyRegSet :: RegSet +nullRegSet :: RegSet -> Bool elemRegSet :: LocalReg -> RegSet -> Bool extendRegSet :: RegSet -> LocalReg -> RegSet deleteFromRegSet :: RegSet -> LocalReg -> RegSet mkRegSet :: [LocalReg] -> RegSet minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet - -emptyRegSet = emptyUniqSet -elemRegSet = elementOfUniqSet -extendRegSet = addOneToUniqSet -deleteFromRegSet = delOneFromUniqSet -mkRegSet = mkUniqSet -minusRegSet = minusUniqSet -plusRegSet = unionUniqSets -timesRegSet = intersectUniqSets +sizeRegSet :: RegSet -> Int +regSetToList :: RegSet -> [LocalReg] + +emptyRegSet = Set.empty +nullRegSet = Set.null +elemRegSet = Set.member +extendRegSet = flip Set.insert +deleteFromRegSet = flip Set.delete +mkRegSet = Set.fromList +minusRegSet = Set.difference +plusRegSet = Set.union +timesRegSet = Set.intersection +sizeRegSet = Set.size +regSetToList = Set.toList class UserOfLocalRegs a where foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b @@ -236,7 +243,7 @@ instance DefinerOfLocalRegs LocalReg where foldRegsDefd f z r = f z r instance UserOfLocalRegs RegSet where - foldRegsUsed f = foldUniqSet (flip f) + foldRegsUsed f = Set.fold (flip f) instance UserOfLocalRegs CmmExpr where foldRegsUsed f z e = expr z e @@ -271,49 +278,6 @@ reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es _ `regUsedIn` CmmStackSlot _ _ = False ----------------------------------------------------------------------------- --- Stack slots ------------------------------------------------------------------------------ - -isStackSlotOf :: CmmExpr -> LocalReg -> Bool -isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r' -isStackSlotOf _ _ = False - -regSlot :: LocalReg -> CmmExpr -regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r) - ------------------------------------------------------------------------------ --- Stack slot use information for expressions and other types [_$_] ------------------------------------------------------------------------------ - --- Fold over the area, the offset into the area, and the width of the subarea. -class UserOfSlots a where - foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b - -class DefinerOfSlots a where - foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b - -instance UserOfSlots CmmExpr where - foldSlotsUsed f z e = expr z e - where expr z (CmmLit _) = z - expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty) - expr z (CmmLoad addr _) = foldSlotsUsed f z addr - expr z (CmmReg _) = z - expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs - expr z (CmmRegOff _ _) = z - expr z (CmmStackSlot _ _) = z - -instance UserOfSlots a => UserOfSlots [a] where - foldSlotsUsed _ set [] = set - foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs - -instance DefinerOfSlots a => DefinerOfSlots [a] where - foldSlotsDefd _ set [] = set - foldSlotsDefd f set (x:xs) = foldSlotsDefd f (foldSlotsDefd f set x) xs - -instance DefinerOfSlots SubArea where - foldSlotsDefd f z a = f z a - ------------------------------------------------------------------------------ -- Global STG registers ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index fe0c104d1c..a171faa057 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -19,6 +19,8 @@ import CmmUtils import CLabel import SMRep import Bitmap +import Stream (Stream) +import qualified Stream import Maybes import Constants @@ -40,10 +42,16 @@ mkEmptyContInfoTable info_lbl , cit_prof = NoProfilingInfo , cit_srt = NoC_SRT } -cmmToRawCmm :: Platform -> [Old.CmmGroup] -> IO [Old.RawCmmGroup] +cmmToRawCmm :: Platform -> Stream IO Old.CmmGroup () + -> IO (Stream IO Old.RawCmmGroup ()) cmmToRawCmm platform cmms = do { uniqs <- mkSplitUniqSupply 'i' - ; return (initUs_ uniqs (mapM (concatMapM (mkInfoTable platform)) cmms)) } + ; let do_one uniqs cmm = do + case initUs uniqs $ concatMapM (mkInfoTable platform) cmm of + (b,uniqs') -> return (uniqs',b) + -- NB. strictness fixes a space leak. DO NOT REMOVE. + ; return (Stream.mapAccumL do_one uniqs cmms >> return ()) + } -- Make a concrete info table, represented as a list of CmmStatic -- (it can't be simply a list of Word, because the SRT field is @@ -82,7 +90,7 @@ mkInfoTable :: Platform -> CmmDecl -> UniqSM [RawCmmDecl] mkInfoTable _ (CmmData sec dat) = return [CmmData sec dat] -mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks) +mkInfoTable platform (CmmProc info entry_label blocks) | CmmNonInfoTable <- info -- Code without an info table. Easy. = return [CmmProc Nothing entry_label blocks] @@ -91,7 +99,8 @@ mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks) ; return (top_decls ++ mkInfoTableAndCode info_lbl info_cts entry_label blocks) } - | otherwise = panic "mkInfoTable" -- Patern match overlap check not clever enough + | otherwise = panic "mkInfoTable" + -- Patern match overlap check not clever enough ----------------------------------------------------- type InfoTableContents = ( [CmmLit] -- The standard part diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs new file mode 100644 index 0000000000..f0dce4a6a1 --- /dev/null +++ b/compiler/cmm/CmmLayoutStack.hs @@ -0,0 +1,1048 @@ +{-# LANGUAGE RecordWildCards, GADTs #-} +module CmmLayoutStack ( + cmmLayoutStack, setInfoTableStackMap, cmmSink + ) where + +import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX +import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX + +import Cmm +import BlockId +import CLabel +import CmmUtils +import MkGraph +import Module +import ForeignCall +import CmmLive +import CmmProcPoint +import SMRep +import Hoopl hiding ((<*>), mkLast, mkMiddle) +import Constants +import UniqSupply +import Maybes +import UniqFM +import Util + +import FastString +import Outputable +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.Set as Set +import Control.Monad.Fix +import Data.Array as Array +import Data.Bits +import Data.List (nub, partition) +import Control.Monad (liftM) + +#include "HsVersions.h" + + +data StackSlot = Occupied | Empty + -- Occupied: a return address or part of an update frame + +instance Outputable StackSlot where + ppr Occupied = ptext (sLit "XXX") + ppr Empty = ptext (sLit "---") + +-- All stack locations are expressed as positive byte offsets from the +-- "base", which is defined to be the address above the return address +-- on the stack on entry to this CmmProc. +-- +-- Lower addresses have higher StackLocs. +-- +type StackLoc = ByteOff + +{- + A StackMap describes the stack at any given point. At a continuation + it has a particular layout, like this: + + | | <- base + |-------------| + | ret0 | <- base + 8 + |-------------| + . upd frame . <- base + sm_ret_off + |-------------| + | | + . vars . + . (live/dead) . + | | <- base + sm_sp - sm_args + |-------------| + | ret1 | + . ret vals . <- base + sm_sp (<--- Sp points here) + |-------------| + +Why do we include the final return address (ret0) in our stack map? I +have absolutely no idea, but it seems to be done that way consistently +in the rest of the code generator, so I played along here. --SDM + +Note that we will be constructing an info table for the continuation +(ret1), which needs to describe the stack down to, but not including, +the update frame (or ret0, if there is no update frame). +-} + +data StackMap = StackMap + { sm_sp :: StackLoc + -- ^ the offset of Sp relative to the base on entry + -- to this block. + , sm_args :: ByteOff + -- ^ the number of bytes of arguments in the area for this block + -- Defn: the offset of young(L) relative to the base is given by + -- (sm_sp - sm_args) of the StackMap for block L. + , sm_ret_off :: ByteOff + -- ^ Number of words of stack that we do not describe with an info + -- table, because it contains an update frame. + , sm_regs :: UniqFM (LocalReg,StackLoc) + -- ^ regs on the stack + } + +instance Outputable StackMap where + ppr StackMap{..} = + text "Sp = " <> int sm_sp $$ + text "sm_args = " <> int sm_args $$ + text "sm_ret_off = " <> int sm_ret_off $$ + text "sm_regs = " <> ppr (eltsUFM sm_regs) + + +cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph + -> UniqSM (CmmGraph, BlockEnv StackMap) +cmmLayoutStack procpoints entry_args + graph0@(CmmGraph { g_entry = entry }) + = do + pprTrace "cmmLayoutStack" (ppr entry_args) $ return () + (graph, liveness) <- removeDeadAssignments graph0 + pprTrace "liveness" (ppr liveness) $ return () + let blocks = postorderDfs graph + + (final_stackmaps, final_high_sp, new_blocks) <- + mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> + layout procpoints liveness entry entry_args + rec_stackmaps rec_high_sp blocks + + new_blocks' <- mapM lowerSafeForeignCall new_blocks + + pprTrace ("Sp HWM") (ppr final_high_sp) $ + return (ofBlockList entry new_blocks', final_stackmaps) + + + +layout :: BlockSet -- proc points + -> BlockEnv CmmLive -- liveness + -> BlockId -- entry + -> ByteOff -- stack args on entry + + -> BlockEnv StackMap -- [final] stack maps + -> ByteOff -- [final] Sp high water mark + + -> [CmmBlock] -- [in] blocks + + -> UniqSM + ( BlockEnv StackMap -- [out] stack maps + , ByteOff -- [out] Sp high water mark + , [CmmBlock] -- [out] new blocks + ) + +layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks + = go blocks init_stackmap entry_args [] + where + (updfr, cont_info) = collectContInfo blocks + + init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args + , sm_args = entry_args + , sm_ret_off = updfr + , sm_regs = emptyUFM + } + + go [] acc_stackmaps acc_hwm acc_blocks + = return (acc_stackmaps, acc_hwm, acc_blocks) + + go (b0 : bs) acc_stackmaps acc_hwm acc_blocks + = do + let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0 + + let stack0@StackMap { sm_sp = sp0 } + = mapFindWithDefault + (pprPanic "no stack map for" (ppr entry_lbl)) + entry_lbl acc_stackmaps + + pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return () + + -- (a) Update the stack map to include the effects of + -- assignments in this block + let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0 + + -- (b) Insert assignments to reload all the live variables if this + -- block is a proc point + let middle1 = if entry_lbl `setMember` procpoints + then foldr blockCons middle0 (insertReloads stack0) + else middle0 + + -- (c) Look at the last node and if we are making a call or + -- jumping to a proc point, we must save the live + -- variables, adjust Sp, and construct the StackMaps for + -- each of the successor blocks. See handleLastNode for + -- details. + (middle2, sp_off, last1, fixup_blocks, out) + <- handleLastNode procpoints liveness cont_info + acc_stackmaps stack1 middle0 last0 + + pprTrace "layout(out)" (ppr out) $ return () + + -- (d) Manifest Sp: run over the nodes in the block and replace + -- CmmStackSlot with CmmLoad from Sp with a concrete offset. + -- + -- our block: + -- middle1 -- the original middle nodes + -- middle2 -- live variable saves from handleLastNode + -- Sp = Sp + sp_off -- Sp adjustment goes here + -- last1 -- the last node + -- + let middle_pre = blockToList $ foldl blockSnoc middle1 middle2 + + sp_high = final_hwm - entry_args + -- The stack check value is adjusted by the Sp offset on + -- entry to the proc, which is entry_args. We are + -- assuming that we only do a stack check at the + -- beginning of a proc, and we don't modify Sp before the + -- check. + + final_blocks = manifestSp final_stackmaps stack0 sp0 sp_high entry0 + middle_pre sp_off last1 fixup_blocks + + acc_stackmaps' = mapUnion acc_stackmaps out + + hwm' = maximum (acc_hwm : (sp0 - sp_off) : map sm_sp (mapElems out)) + + go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks) + + +-- ----------------------------------------------------------------------------- + +-- This doesn't seem right somehow. We need to find out whether this +-- proc will push some update frame material at some point, so that we +-- can avoid using that area of the stack for spilling. The +-- updfr_space field of the CmmProc *should* tell us, but it doesn't +-- (I think maybe it gets filled in later when we do proc-point +-- splitting). +-- +-- So we'll just take the max of all the cml_ret_offs. This could be +-- unnecessarily pessimistic, but probably not in the code we +-- generate. + +collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff) +collectContInfo blocks + = (maximum ret_offs, mapFromList (catMaybes mb_argss)) + where + (mb_argss, ret_offs) = mapAndUnzip get_cont blocks + + get_cont b = + case lastNode b of + CmmCall { cml_cont = Just l, .. } + -> (Just (l, cml_ret_args), cml_ret_off) + CmmForeignCall { .. } + -> (Just (succ, 0), updfr) -- ?? + _other -> (Nothing, 0) + + +-- ----------------------------------------------------------------------------- +-- Updating the StackMap from middle nodes + +-- Look for loads from stack slots, and update the StackMap. This is +-- purely for optimisation reasons, so that we can avoid saving a +-- variable back to a different stack slot if it is already on the +-- stack. +-- +-- This happens a lot: for example when function arguments are passed +-- on the stack and need to be immediately saved across a call, we +-- want to just leave them where they are on the stack. +-- +procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap +procMiddle stackmaps node sm + = case node of + CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _) + -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) } + where loc = getStackLoc area off stackmaps + CmmAssign (CmmLocal r) _other + -> sm { sm_regs = delFromUFM (sm_regs sm) r } + _other + -> sm + +getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc +getStackLoc Old n _ = n +getStackLoc (Young l) n stackmaps = + case mapLookup l stackmaps of + Nothing -> pprPanic "getStackLoc" (ppr l) + Just sm -> sm_sp sm - sm_args sm + n + + +-- ----------------------------------------------------------------------------- +-- Handling stack allocation for a last node + +-- We take a single last node and turn it into: +-- +-- C1 (some statements) +-- Sp = Sp + N +-- C2 (some more statements) +-- call f() -- the actual last node +-- +-- plus possibly some more blocks (we may have to add some fixup code +-- between the last node and the continuation). +-- +-- C1: is the code for saving the variables across this last node onto +-- the stack, if the continuation is a call or jumps to a proc point. +-- +-- C2: if the last node is a safe foreign call, we have to inject some +-- extra code that goes *after* the Sp adjustment. + +handleLastNode + :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff + -> BlockEnv StackMap -> StackMap + -> Block CmmNode O O + -> CmmNode O C + -> UniqSM + ( [CmmNode O O] -- nodes to go *before* the Sp adjustment + , ByteOff -- amount to adjust Sp + , CmmNode O C -- new last node + , [CmmBlock] -- new blocks + , BlockEnv StackMap -- stackmaps for the continuations + ) + +handleLastNode procpoints liveness cont_info stackmaps + stack0@StackMap { sm_sp = sp0 } middle last + = case last of + -- At each return / tail call, + -- adjust Sp to point to the last argument pushed, which + -- is cml_args, after popping any other junk from the stack. + CmmCall{ cml_cont = Nothing, .. } -> do + let sp_off = sp0 - cml_args + return ([], sp_off, last, [], mapEmpty) + + -- At each CmmCall with a continuation: + CmmCall{ cml_cont = Just cont_lbl, .. } -> + return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off + + CmmForeignCall{ succ = cont_lbl, .. } -> do + return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0) + -- one word each for args and results: the return address + + CmmBranch{..} -> handleProcPoints + CmmCondBranch{..} -> handleProcPoints + CmmSwitch{..} -> handleProcPoints + + where + -- Calls and ForeignCalls are handled the same way: + lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff + -> ( [CmmNode O O] + , ByteOff + , CmmNode O C + , [CmmBlock] + , BlockEnv StackMap + ) + lastCall lbl cml_args cml_ret_args cml_ret_off + = ( assignments + , spOffsetForCall sp0 cont_stack cml_args + , last + , [] -- no new blocks + , mapSingleton lbl cont_stack ) + where + (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off + + + prepareStack lbl cml_ret_args cml_ret_off + | Just cont_stack <- mapLookup lbl stackmaps + -- If we have already seen this continuation before, then + -- we just have to make the stack look the same: + = (fixupStack stack0 cont_stack, cont_stack) + -- Otherwise, we have to allocate the stack frame + | otherwise + = (save_assignments, new_cont_stack) + where + (new_cont_stack, save_assignments) + = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0 + + + -- For other last nodes (branches), if any of the targets is a + -- proc point, we have to set up the stack to match what the proc + -- point is expecting. + -- + handleProcPoints :: UniqSM ( [CmmNode O O] + , ByteOff + , CmmNode O C + , [CmmBlock] + , BlockEnv StackMap ) + + handleProcPoints + -- Note [diamond proc point] + | Just l <- futureContinuation middle + , (nub $ filter (`setMember` procpoints) $ successors last) == [l] + = do + let cont_args = mapFindWithDefault 0 l cont_info + (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0) + out = mapFromList [ (l', cont_stack) + | l' <- successors last ] + return ( assigs + , spOffsetForCall sp0 cont_stack wORD_SIZE + , last + , [] + , out) + + | otherwise = do + pps <- mapM handleProcPoint (successors last) + let lbl_map :: LabelMap Label + lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ] + fix_lbl l = mapLookup l lbl_map `orElse` l + return ( [] + , 0 + , mapSuccessors fix_lbl last + , concat [ blk | (_,_,_,blk) <- pps ] + , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] ) + + -- For each proc point that is a successor of this block + -- (a) if the proc point already has a stackmap, we need to + -- shuffle the current stack to make it look the same. + -- We have to insert a new block to make this happen. + -- (b) otherwise, call "allocate live stack0" to make the + -- stack map for the proc point + handleProcPoint :: BlockId + -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock]) + handleProcPoint l + | not (l `setMember` procpoints) = return (l, l, stack0, []) + | otherwise = do + tmp_lbl <- liftM mkBlockId $ getUniqueM + let + (stack2, assigs) = + case mapLookup l stackmaps of + Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm) + Nothing -> + pprTrace "first visit to proc point" + (ppr l <+> ppr stack1) $ + (stack1, assigs) + where + cont_args = mapFindWithDefault 0 l cont_info + (stack1, assigs) = + setupStackFrame l liveness (sm_ret_off stack0) + cont_args stack0 + + sp_off = sp0 - sm_sp stack2 + + block = blockJoin (CmmEntry tmp_lbl) + (maybeAddSpAdj sp_off (blockFromList assigs)) + (CmmBranch l) + -- + return (l, tmp_lbl, stack2, [block]) + + + +-- Sp is currently pointing to current_sp, +-- we want it to point to +-- (sm_sp cont_stack - sm_args cont_stack + args) +-- so the difference is +-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args) +spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff +spOffsetForCall current_sp cont_stack args + = current_sp - (sm_sp cont_stack - sm_args cont_stack + args) + + +-- | create a sequence of assignments to establish the new StackMap, +-- given the old StackMap. +fixupStack :: StackMap -> StackMap -> [CmmNode O O] +fixupStack old_stack new_stack = concatMap move new_locs + where + old_map :: Map LocalReg ByteOff + old_map = Map.fromList (stackSlotRegs old_stack) + new_locs = stackSlotRegs new_stack + + move (r,n) + | Just m <- Map.lookup r old_map, n == m = [] + | otherwise = [CmmStore (CmmStackSlot Old n) + (CmmReg (CmmLocal r))] + + + +setupStackFrame + :: BlockId -- label of continuation + -> BlockEnv CmmLive -- liveness + -> ByteOff -- updfr + -> ByteOff -- bytes of return values on stack + -> StackMap -- current StackMap + -> (StackMap, [CmmNode O O]) + +setupStackFrame lbl liveness updfr_off ret_args stack0 + = (cont_stack, assignments) + where + -- get the set of LocalRegs live in the continuation + live = mapFindWithDefault Set.empty lbl liveness + + -- the stack from the base to updfr_off is off-limits. + -- our new stack frame contains: + -- * saved live variables + -- * the return address [young(C) + 8] + -- * the args for the call, + -- which are replaced by the return values at the return + -- point. + + -- everything up to updfr_off is off-limits + -- stack1 contains updfr_off, plus everything we need to save + (stack1, assignments) = allocate updfr_off live stack0 + + -- And the Sp at the continuation is: + -- sm_sp stack1 + ret_args + cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args + , sm_args = ret_args + , sm_ret_off = updfr_off + } + + +-- ----------------------------------------------------------------------------- +-- Note [diamond proc point] +-- +-- This special case looks for the pattern we get from a typical +-- tagged case expression: +-- +-- Sp[young(L1)] = L1 +-- if (R1 & 7) != 0 goto L1 else goto L2 +-- L2: +-- call [R1] returns to L1 +-- L1: live: {y} +-- x = R1 +-- +-- If we let the generic case handle this, we get +-- +-- Sp[-16] = L1 +-- if (R1 & 7) != 0 goto L1a else goto L2 +-- L2: +-- Sp[-8] = y +-- Sp = Sp - 16 +-- call [R1] returns to L1 +-- L1a: +-- Sp[-8] = y +-- Sp = Sp - 16 +-- goto L1 +-- L1: +-- x = R1 +-- +-- The code for saving the live vars is duplicated in each branch, and +-- furthermore there is an extra jump in the fast path (assuming L1 is +-- a proc point, which it probably is if there is a heap check). +-- +-- So to fix this we want to set up the stack frame before the +-- conditional jump. How do we know when to do this, and when it is +-- safe? The basic idea is, when we see the assignment +-- +-- Sp[young(L)] = L +-- +-- we know that +-- * we are definitely heading for L +-- * there can be no more reads from another stack area, because young(L) +-- overlaps with it. +-- +-- We don't necessarily know that everything live at L is live now +-- (some might be assigned between here and the jump to L). So we +-- simplify and only do the optimisation when we see +-- +-- (1) a block containing an assignment of a return address L +-- (2) ending in a branch where one (and only) continuation goes to L, +-- and no other continuations go to proc points. +-- +-- then we allocate the stack frame for L at the end of the block, +-- before the branch. +-- +-- We could generalise (2), but that would make it a bit more +-- complicated to handle, and this currently catches the common case. + +futureContinuation :: Block CmmNode O O -> Maybe BlockId +futureContinuation middle = foldBlockNodesB f middle Nothing + where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId + f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _ + = Just l + f _ r = r + +-- ----------------------------------------------------------------------------- +-- Saving live registers + +-- | Given a set of live registers and a StackMap, save all the registers +-- on the stack and return the new StackMap and the assignments to do +-- the saving. +-- +allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O]) +allocate ret_off live stackmap@StackMap{ sm_sp = sp0 + , sm_regs = regs0 } + = + pprTrace "allocate" (ppr live $$ ppr stackmap) $ + + -- we only have to save regs that are not already in a slot + let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live) + regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0 + in + + -- make a map of the stack + let stack = reverse $ Array.elems $ + accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $ + ret_words ++ live_words + where ret_words = + [ (x, Occupied) + | x <- [ 1 .. toWords ret_off] ] + live_words = + [ (toWords x, Occupied) + | (r,off) <- eltsUFM regs1, + let w = localRegBytes r, + x <- [ off, off-wORD_SIZE .. off - w + 1] ] + in + + -- Pass over the stack: find slots to save all the new live variables, + -- choosing the oldest slots first (hence a foldr). + let + save slot ([], stack, n, assigs, regs) -- no more regs to save + = ([], slot:stack, n `plusW` 1, assigs, regs) + save slot (to_save, stack, n, assigs, regs) + = case slot of + Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs) + Empty + | Just (stack', r, to_save') <- + select_save to_save (slot:stack) + -> let assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + n' = n `plusW` 1 + in + (to_save', stack', n', assig : assigs, (r,(r,n')):regs) + + | otherwise + -> (to_save, slot:stack, n `plusW` 1, assigs, regs) + + -- we should do better here: right now we'll fit the smallest first, + -- but it would make more sense to fit the biggest first. + select_save :: [LocalReg] -> [StackSlot] + -> Maybe ([StackSlot], LocalReg, [LocalReg]) + select_save regs stack = go regs [] + where go [] _no_fit = Nothing + go (r:rs) no_fit + | Just rest <- dropEmpty words stack + = Just (replicate words Occupied ++ rest, r, rs++no_fit) + | otherwise + = go rs (r:no_fit) + where words = localRegWords r + + -- fill in empty slots as much as possible + (still_to_save, save_stack, n, save_assigs, save_regs) + = foldr save (to_save, [], 0, [], []) stack + + -- push any remaining live vars on the stack + (push_sp, push_assigs, push_regs) + = foldr push (n, [], []) still_to_save + where + push r (n, assigs, regs) + = (n', assig : assigs, (r,(r,n')) : regs) + where + n' = n + localRegBytes r + assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + + trim_sp + | not (null push_regs) = push_sp + | otherwise + = n `plusW` (- length (takeWhile isEmpty save_stack)) + + final_regs = regs1 `addListToUFM` push_regs + `addListToUFM` save_regs + + in + -- XXX should be an assert + if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else + + if (trim_sp .&. (wORD_SIZE - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else + + ( stackmap { sm_regs = final_regs , sm_sp = trim_sp } + , push_assigs ++ save_assigs ) + + +-- ----------------------------------------------------------------------------- +-- Manifesting Sp + +-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The +-- block looks like this: +-- +-- middle_pre -- the middle nodes +-- Sp = Sp + sp_off -- Sp adjustment goes here +-- last -- the last node +-- +-- And we have some extra blocks too (that don't contain Sp adjustments) +-- +-- The adjustment for middle_pre will be different from that for +-- middle_post, because the Sp adjustment intervenes. +-- +manifestSp + :: BlockEnv StackMap -- StackMaps for other blocks + -> StackMap -- StackMap for this block + -> ByteOff -- Sp on entry to the block + -> ByteOff -- SpHigh + -> CmmNode C O -- first node + -> [CmmNode O O] -- middle + -> ByteOff -- sp_off + -> CmmNode O C -- last node + -> [CmmBlock] -- new blocks + -> [CmmBlock] -- final blocks with Sp manifest + +manifestSp stackmaps stack0 sp0 sp_high + first middle_pre sp_off last fixup_blocks + = final_block : fixup_blocks' + where + area_off = getAreaOff stackmaps + + adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x + adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off) + adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off) + + final_middle = maybeAddSpAdj sp_off $ + blockFromList $ + map adj_pre_sp $ + elimStackStores stack0 stackmaps area_off $ + middle_pre + + final_last = optStackCheck (adj_post_sp last) + + final_block = blockJoin first final_middle final_last + + fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks + + +getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc) +getAreaOff _ Old = 0 +getAreaOff stackmaps (Young l) = + case mapLookup l stackmaps of + Just sm -> sm_sp sm - sm_args sm + Nothing -> pprPanic "getAreaOff" (ppr l) + + +maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O +maybeAddSpAdj 0 block = block +maybeAddSpAdj sp_off block + = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off) + + +{- +Sp(L) is the Sp offset on entry to block L relative to the base of the +OLD area. + +SpArgs(L) is the size of the young area for L, i.e. the number of +arguments. + + - in block L, each reference to [old + N] turns into + [Sp + Sp(L) - N] + + - in block L, each reference to [young(L') + N] turns into + [Sp + Sp(L) - Sp(L') + SpArgs(L') - N] + + - be careful with the last node of each block: Sp has already been adjusted + to be Sp + Sp(L) - Sp(L') +-} + +areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr +areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) = + cmmOffset (CmmReg spReg) (sp_old - area_off area - n) +areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm) +areaToSp _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check] + [CmmMachOp (MO_Sub _) + [ CmmReg (CmmGlobal Sp) + , CmmLit (CmmInt 0 _)], + CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth) +areaToSp _ _ _ other = other + +-- ----------------------------------------------------------------------------- +-- Note [null stack check] +-- +-- If the high-water Sp is zero, then we end up with +-- +-- if (Sp - 0 < SpLim) then .. else .. +-- +-- and possibly some dead code for the failure case. Optimising this +-- away depends on knowing that SpLim <= Sp, so it is really the job +-- of the stack layout algorithm, hence we do it now. This is also +-- convenient because control-flow optimisation later will drop the +-- dead code. + +optStackCheck :: CmmNode O C -> CmmNode O C +optStackCheck n = -- Note [null stack check] + case n of + CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false + other -> other + + +-- ----------------------------------------------------------------------------- + +-- | Eliminate stores of the form +-- +-- Sp[area+n] = r +-- +-- when we know that r is already in the same slot as Sp[area+n]. We +-- could do this in a later optimisation pass, but that would involve +-- a separate analysis and we already have the information to hand +-- here. It helps clean up some extra stack stores in common cases. +-- +-- Note that we may have to modify the StackMap as we walk through the +-- code using procMiddle, since an assignment to a variable in the +-- StackMap will invalidate its mapping there. +-- +elimStackStores :: StackMap + -> BlockEnv StackMap + -> (Area -> ByteOff) + -> [CmmNode O O] + -> [CmmNode O O] +elimStackStores stackmap stackmaps area_off nodes + = go stackmap nodes + where + go _stackmap [] = [] + go stackmap (n:ns) + = case n of + CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) + | Just (_,off) <- lookupUFM (sm_regs stackmap) r + , area_off area + m == off + -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns + _otherwise + -> n : go (procMiddle stackmaps n stackmap) ns + + +-- ----------------------------------------------------------------------------- +-- Update info tables to include stack liveness + + +setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl +setInfoTableStackMap stackmaps + (CmmProc top_info@TopInfo{..} l g@CmmGraph{g_entry = eid}) + = CmmProc top_info{ info_tbl = fix_info info_tbl } l g + where + fix_info info_tbl@CmmInfoTable{ cit_rep = StackRep _ } = + info_tbl { cit_rep = StackRep (get_liveness eid) } + fix_info other = other + + get_liveness :: BlockId -> Liveness + get_liveness lbl + = case mapLookup lbl stackmaps of + Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl) + Just sm -> stackMapToLiveness sm + +setInfoTableStackMap _ d = d + + +stackMapToLiveness :: StackMap -> Liveness +stackMapToLiveness StackMap{..} = + reverse $ Array.elems $ + accumArray (\_ x -> x) True (toWords sm_ret_off + 1, + toWords (sm_sp - sm_args)) live_words + where + live_words = [ (toWords off, False) + | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ] + + +-- ----------------------------------------------------------------------------- +-- Lowering safe foreign calls + +{- +Note [lower safe foreign calls] + +We start with + + Sp[young(L1)] = L1 + ,----------------------- + | r1 = foo(x,y,z) returns to L1 + '----------------------- + L1: + R1 = r1 -- copyIn, inserted by mkSafeCall + ... + +the stack layout algorithm will arrange to save and reload everything +live across the call. Our job now is to expand the call so we get + + Sp[young(L1)] = L1 + ,----------------------- + | SAVE_THREAD_STATE() + | token = suspendThread(BaseReg, interruptible) + | r = foo(x,y,z) + | BaseReg = resumeThread(token) + | LOAD_THREAD_STATE() + | R1 = r -- copyOut + | jump L1 + '----------------------- + L1: + r = R1 -- copyIn, inserted by mkSafeCall + ... + +Note the copyOut, which saves the results in the places that L1 is +expecting them (see Note {safe foreign call convention]). +-} + +lowerSafeForeignCall :: CmmBlock -> UniqSM CmmBlock +lowerSafeForeignCall block + | (entry, middle, CmmForeignCall { .. }) <- blockSplit block + = do + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS-only objects and are not subject to garbage collection + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) + let (caller_save, caller_load) = callerSaveVolatileRegs + load_tso <- newTemp gcWord + load_stack <- newTemp gcWord + let suspend = saveThreadState <*> + caller_save <*> + mkMiddle (callSuspendThread id intrbl) + midCall = mkUnsafeCall tgt res args + resume = mkMiddle (callResumeThread new_base id) <*> + -- Assign the result to BaseReg: we + -- might now have a different Capability! + mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*> + caller_load <*> + loadThreadState load_tso load_stack + -- Note: The successor must be a procpoint, and we have already split, + -- so we use a jump, not a branch. + succLbl = CmmLit (CmmLabel (infoTblLbl succ)) + + (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ) + (map (CmmReg . CmmLocal) res) + updfr (0, []) + + jump = CmmCall { cml_target = succLbl + , cml_cont = Just succ + , cml_args = widthInBytes wordWidth + , cml_ret_args = ret_args + , cml_ret_off = updfr } + + graph' <- lgraphOfAGraph $ suspend <*> + midCall <*> + resume <*> + copyout <*> + mkLast jump + + case toBlockList graph' of + [one] -> let (_, middle', last) = blockSplit one + in return (blockJoin entry (middle `blockAppend` middle') last) + _ -> panic "lowerSafeForeignCall0" + + -- Block doesn't end in a safe foreign call: + | otherwise = return block + + +foreignLbl :: FastString -> CmmExpr +foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name)) + +newTemp :: CmmType -> UniqSM LocalReg +newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) + +callSuspendThread :: LocalReg -> Bool -> CmmNode O O +callSuspendThread id intrbl = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "suspendThread")) + (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) + [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))] + +callResumeThread :: LocalReg -> LocalReg -> CmmNode O O +callResumeThread new_base id = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "resumeThread")) + (ForeignConvention CCallConv [AddrHint] [AddrHint])) + [new_base] [CmmReg (CmmLocal id)] + +-- ----------------------------------------------------------------------------- + +plusW :: ByteOff -> WordOff -> ByteOff +plusW b w = b + w * wORD_SIZE + +dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot] +dropEmpty 0 ss = Just ss +dropEmpty n (Empty : ss) = dropEmpty (n-1) ss +dropEmpty _ _ = Nothing + +isEmpty :: StackSlot -> Bool +isEmpty Empty = True +isEmpty _ = False + +localRegBytes :: LocalReg -> ByteOff +localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r))) + +localRegWords :: LocalReg -> WordOff +localRegWords = toWords . localRegBytes + +toWords :: ByteOff -> WordOff +toWords x = x `quot` wORD_SIZE + + +insertReloads :: StackMap -> [CmmNode O O] +insertReloads stackmap = + [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp) + (localRegType r)) + | (r,sp) <- stackSlotRegs stackmap + ] + + +stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)] +stackSlotRegs sm = eltsUFM (sm_regs sm) + +-- ----------------------------------------------------------------------------- + +-- If we do this *before* stack layout, we might be able to avoid +-- saving some things across calls/procpoints. +-- +-- *but*, that will invalidate the liveness analysis, and we'll have +-- to re-do it. + +cmmSink :: CmmGraph -> UniqSM CmmGraph +cmmSink graph = do + let liveness = cmmLiveness graph + return $ cmmSink' liveness graph + +cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph +cmmSink' liveness graph + = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph + where + + sink :: BlockEnv [(LocalReg, CmmExpr)] -> [CmmBlock] -> [CmmBlock] + sink _ [] = [] + sink sunk (b:bs) = + pprTrace "sink" (ppr l) $ + blockJoin first final_middle last : sink sunk' bs + where + l = entryLabel b + (first, middle, last) = blockSplit b + (middle', assigs) = walk (blockToList middle) emptyBlock + (mapFindWithDefault [] l sunk) + + (dropped_last, assigs') = partition (`conflictsWithLast` last) assigs + + final_middle = foldl blockSnoc middle' (toNodes dropped_last) + + sunk' = mapUnion sunk $ + mapFromList [ (l, filt assigs' (getLive l)) + | l <- successors last ] + where + getLive l = mapFindWithDefault Set.empty l liveness + filt as live = [ (r,e) | (r,e) <- as, r `Set.member` live ] + + +walk :: [CmmNode O O] -> Block CmmNode O O -> [(LocalReg, CmmExpr)] + -> (Block CmmNode O O, [(LocalReg, CmmExpr)]) + +walk [] acc as = (acc, as) +walk (n:ns) acc as + | Just a <- collect_it = walk ns acc (a:as) + | otherwise = walk ns (foldr (flip blockSnoc) acc (n:drop_nodes)) as' + where + collect_it = case n of + CmmAssign (CmmLocal r) e@(CmmReg (CmmGlobal _)) -> Just (r,e) +-- CmmAssign (CmmLocal r) e@(CmmLoad addr _) | +-- foldRegsUsed (\b r -> False) True addr -> Just (r,e) + _ -> Nothing + + drop_nodes = toNodes dropped + (dropped, as') = partition should_drop as + where should_drop a = a `conflicts` n + +toNodes :: [(LocalReg,CmmExpr)] -> [CmmNode O O] +toNodes as = [ CmmAssign (CmmLocal r) rhs | (r,rhs) <- as ] + +-- We only sink "r = G" assignments right now, so conflicts is very simple: +conflicts :: (LocalReg,CmmExpr) -> CmmNode O O -> Bool +(_, rhs) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True +--(r, CmmLoad _ _) `conflicts` CmmStore _ _ = True +(r, _) `conflicts` node + = foldRegsUsed (\b r' -> r == r' || b) False node + +conflictsWithLast :: (LocalReg,CmmExpr) -> CmmNode O C -> Bool +(r, _) `conflictsWithLast` node + = foldRegsUsed (\b r' -> r == r' || b) False node diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 01ebac6254..2e24dd7f82 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -1,67 +1,70 @@ ----------------------------------------------------------------------------- -- --- (c) The University of Glasgow 2004-2006 +-- (c) The University of Glasgow 2011 -- -- CmmLint: checking the correctness of Cmm statements and expressions -- ----------------------------------------------------------------------------- - -{-# OPTIONS -fno-warn-tabs #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and --- detab the module (please do the detabbing in a separate patch). See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces --- for details - +{-# LANGUAGE GADTs #-} module CmmLint ( - cmmLint, cmmLintTop + cmmLint, cmmLintGraph ) where +import Hoopl +import Cmm +import CmmUtils +import PprCmm () import BlockId -import OldCmm -import CLabel +import FastString import Outputable -import OldPprCmm() import Constants -import FastString -import Platform import Data.Maybe +-- Things to check: +-- - invariant on CmmBlock in CmmExpr (see comment there) +-- - check for branches to blocks that don't exist +-- - check types + -- ----------------------------------------------------------------------------- -- Exported entry points: cmmLint :: (Outputable d, Outputable h) - => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops + => GenCmmGroup d h CmmGraph -> Maybe SDoc +cmmLint tops = runCmmLint (mapM_ lintCmmDecl) tops -cmmLintTop :: (Outputable d, Outputable h) - => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top +cmmLintGraph :: CmmGraph -> Maybe SDoc +cmmLintGraph g = runCmmLint lintCmmGraph g -runCmmLint :: Outputable a - => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc -runCmmLint _ l p = +runCmmLint :: Outputable a => (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint l p = case unCL (l p) of - Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), - nest 2 err, - ptext $ sLit ("Program was:"), - nest 2 (ppr p)]) - Right _ -> Nothing - -lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint () -lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks)) - = addLintInfo (text "in proc " <> pprCLabel platform lbl) $ - let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks - in mapM_ (lintCmmBlock platform labels) blocks - -lintCmmDecl _ (CmmData {}) + Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), + nest 2 err, + ptext $ sLit ("Program was:"), + nest 2 (ppr p)]) + Right _ -> Nothing + +lintCmmDecl :: GenCmmDecl h i CmmGraph -> CmmLint () +lintCmmDecl (CmmProc _ lbl g) + = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph g +lintCmmDecl (CmmData {}) = return () -lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint () -lintCmmBlock platform labels (BasicBlock id stmts) - = addLintInfo (text "in basic block " <> ppr id) $ - mapM_ (lintCmmStmt platform labels) stmts + +lintCmmGraph :: CmmGraph -> CmmLint () +lintCmmGraph g = mapM_ (lintCmmBlock labels) blocks + where + blocks = toBlockList g + labels = setFromList (map entryLabel blocks) + + +lintCmmBlock :: BlockSet -> CmmBlock -> CmmLint () +lintCmmBlock labels block + = addLintInfo (text "in basic block " <> ppr (entryLabel block)) $ do + let (_, middle, last) = blockSplit block + mapM_ lintCmmMiddle (blockToList middle) + lintCmmLast labels last -- ----------------------------------------------------------------------------- -- lintCmmExpr @@ -69,24 +72,24 @@ lintCmmBlock platform labels (BasicBlock id stmts) -- Checks whether a CmmExpr is "type-correct", and check for obvious-looking -- byte/word mismatches. -lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType -lintCmmExpr platform (CmmLoad expr rep) = do - _ <- lintCmmExpr platform expr +lintCmmExpr :: CmmExpr -> CmmLint CmmType +lintCmmExpr (CmmLoad expr rep) = do + _ <- lintCmmExpr expr -- Disabled, if we have the inlining phase before the lint phase, -- we can have funny offsets due to pointer tagging. -- EZY -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ -- cmmCheckWordAddress expr return rep -lintCmmExpr platform expr@(CmmMachOp op args) = do - tys <- mapM (lintCmmExpr platform) args +lintCmmExpr expr@(CmmMachOp op args) = do + tys <- mapM lintCmmExpr args if map (typeWidth . cmmExprType) args == machOpArgReps op - then cmmCheckMachOp op args tys - else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) -lintCmmExpr platform (CmmRegOff reg offset) - = lintCmmExpr platform (CmmMachOp (MO_Add rep) - [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) + then cmmCheckMachOp op args tys + else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) +lintCmmExpr (CmmRegOff reg offset) + = lintCmmExpr (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) where rep = typeWidth (cmmRegType reg) -lintCmmExpr _ expr = +lintCmmExpr expr = return (cmmExprType expr) -- Check for some common byte/word mismatches (eg. Sp + 1) @@ -119,43 +122,61 @@ notNodeReg :: CmmExpr -> Bool notNodeReg (CmmReg reg) | reg == nodeReg = False notNodeReg _ = True -lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint () -lintCmmStmt platform labels = lint - where lint (CmmNop) = return () - lint (CmmComment {}) = return () - lint stmt@(CmmAssign reg expr) = do - erep <- lintCmmExpr platform expr - let reg_ty = cmmRegType reg +lintCmmMiddle :: CmmNode O O -> CmmLint () +lintCmmMiddle node = case node of + CmmComment _ -> return () + + CmmAssign reg expr -> do + erep <- lintCmmExpr expr + let reg_ty = cmmRegType reg if (erep `cmmEqType_ignoring_ptrhood` reg_ty) then return () - else cmmLintAssignErr stmt erep reg_ty - lint (CmmStore l r) = do - _ <- lintCmmExpr platform l - _ <- lintCmmExpr platform r + else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty + + CmmStore l r -> do + _ <- lintCmmExpr l + _ <- lintCmmExpr r return () - lint (CmmCall target _res args _) = - do lintTarget platform labels target - mapM_ (lintCmmExpr platform . hintlessCmm) args - lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e - lint (CmmSwitch e branches) = do + + CmmUnsafeForeignCall target _formals actuals -> do + lintTarget target + mapM_ lintCmmExpr actuals + + +lintCmmLast :: BlockSet -> CmmNode O C -> CmmLint () +lintCmmLast labels node = case node of + CmmBranch id -> checkTarget id + + CmmCondBranch e t f -> do + mapM_ checkTarget [t,f] + _ <- lintCmmExpr e + checkCond e + + CmmSwitch e branches -> do mapM_ checkTarget $ catMaybes branches - erep <- lintCmmExpr platform e + erep <- lintCmmExpr e if (erep `cmmEqType_ignoring_ptrhood` bWord) then return () - else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <> - text " :: " <> ppr erep) - lint (CmmJump e _) = lintCmmExpr platform e >> return () - lint (CmmReturn) = return () - lint (CmmBranch id) = checkTarget id - checkTarget id = if setMember id labels then return () - else cmmLintErr (text "Branch to nonexistent id" <+> ppr id) - -lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint () -lintTarget platform _ (CmmCallee e _) = do _ <- lintCmmExpr platform e - return () -lintTarget _ _ (CmmPrim _ Nothing) = return () -lintTarget platform labels (CmmPrim _ (Just stmts)) - = mapM_ (lintCmmStmt platform labels) stmts + else cmmLintErr (text "switch scrutinee is not a word: " <> + ppr e <> text " :: " <> ppr erep) + + CmmCall { cml_target = target, cml_cont = cont } -> do + _ <- lintCmmExpr target + maybe (return ()) checkTarget cont + + CmmForeignCall tgt _ args succ _ _ -> do + lintTarget tgt + mapM_ lintCmmExpr args + checkTarget succ + where + checkTarget id + | setMember id labels = return () + | otherwise = cmmLintErr (text "Branch to nonexistent id" <+> ppr id) + + +lintTarget :: ForeignTarget -> CmmLint () +lintTarget (ForeignTarget e _) = lintCmmExpr e >> return () +lintTarget (PrimTarget {}) = return () checkCond :: CmmExpr -> CmmLint () @@ -163,7 +184,7 @@ checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return () checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values checkCond expr = cmmLintErr (hang (text "expression is not a conditional:") 2 - (ppr expr)) + (ppr expr)) -- ----------------------------------------------------------------------------- -- CmmLint monad @@ -173,37 +194,36 @@ checkCond expr newtype CmmLint a = CmmLint { unCL :: Either SDoc a } instance Monad CmmLint where - CmmLint m >>= k = CmmLint $ case m of - Left e -> Left e - Right a -> unCL (k a) + CmmLint m >>= k = CmmLint $ case m of + Left e -> Left e + Right a -> unCL (k a) return a = CmmLint (Right a) cmmLintErr :: SDoc -> CmmLint a cmmLintErr msg = CmmLint (Left msg) addLintInfo :: SDoc -> CmmLint a -> CmmLint a -addLintInfo info thing = CmmLint $ +addLintInfo info thing = CmmLint $ case unCL thing of - Left err -> Left (hang info 2 err) - Right a -> Right a + Left err -> Left (hang info 2 err) + Right a -> Right a cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a cmmLintMachOpErr expr argsRep opExpectsRep - = cmmLintErr (text "in MachOp application: " $$ - nest 2 (ppr expr) $$ - (text "op is expecting: " <+> ppr opExpectsRep) $$ - (text "arguments provide: " <+> ppr argsRep)) + = cmmLintErr (text "in MachOp application: " $$ + nest 2 (ppr expr) $$ + (text "op is expecting: " <+> ppr opExpectsRep) $$ + (text "arguments provide: " <+> ppr argsRep)) -cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a cmmLintAssignErr stmt e_ty r_ty - = cmmLintErr (text "in assignment: " $$ - nest 2 (vcat [ppr stmt, - text "Reg ty:" <+> ppr r_ty, - text "Rhs ty:" <+> ppr e_ty])) - - + = cmmLintErr (text "in assignment: " $$ + nest 2 (vcat [ppr stmt, + text "Reg ty:" <+> ppr r_ty, + text "Rhs ty:" <+> ppr e_ty])) + cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a cmmLintDubiousWordOffset expr = cmmLintErr (text "offset is not a multiple of words: " $$ - nest 2 (ppr expr)) + nest 2 (ppr expr)) diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 9a5bb2d5ae..f0163fefc4 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -11,17 +11,15 @@ module CmmLive ) where +import UniqSupply import BlockId import Cmm import CmmUtils -import Control.Monad -import OptimizationFuel import PprCmmExpr () -import Compiler.Hoopl +import Hoopl import Maybes import Outputable -import UniqSet ----------------------------------------------------------------------------- -- Calculating what variables are live on entry to a basic block @@ -33,8 +31,10 @@ type CmmLive = RegSet -- | The dataflow lattice liveLattice :: DataflowLattice CmmLive liveLattice = DataflowLattice "live LocalReg's" emptyRegSet add - where add _ (OldFact old) (NewFact new) = case unionUniqSets old new of - join -> (changeIf $ sizeUniqSet join > sizeUniqSet old, join) + where add _ (OldFact old) (NewFact new) = + (changeIf $ sizeRegSet join > sizeRegSet old, join) + where !join = plusRegSet old new + -- | A mapping from block labels to the variables live on entry type BlockEntryLiveness = BlockEnv CmmLive @@ -43,16 +43,17 @@ type BlockEntryLiveness = BlockEnv CmmLive -- | Calculated liveness info for a CmmGraph ----------------------------------------------------------------------------- -cmmLiveness :: CmmGraph -> FuelUniqSM BlockEntryLiveness +cmmLiveness :: CmmGraph -> BlockEntryLiveness cmmLiveness graph = - liftM check $ liftM snd $ dataflowPassBwd graph [] $ analBwd liveLattice xferLive + check $ dataflowAnalBwd graph [] $ analBwd liveLattice xferLive where entry = g_entry graph - check facts = noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts + check facts = noLiveOnEntry entry + (expectJust "check" $ mapLookup entry facts) facts -- | On entry to the procedure, there had better not be any LocalReg's live-in. noLiveOnEntry :: BlockId -> CmmLive -> a -> a noLiveOnEntry bid in_fact x = - if isEmptyUniqSet in_fact then x + if nullRegSet in_fact then x else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact) -- | The transfer equations use the traditional 'gen' and 'kill' @@ -60,42 +61,42 @@ noLiveOnEntry bid in_fact x = gen :: UserOfLocalRegs a => a -> RegSet -> RegSet gen a live = foldRegsUsed extendRegSet live a kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet -kill a live = foldRegsDefd delOneFromUniqSet live a +kill a live = foldRegsDefd deleteFromRegSet live a -gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) => a -> CmmLive -> CmmLive +gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) + => a -> CmmLive -> CmmLive gen_kill a = gen a . kill a -- | The transfer function --- EZY: Bits of this analysis are duplicated in CmmSpillReload, though --- it's not really easy to efficiently reuse all of this. Keep in mind --- if you need to update this analysis. xferLive :: BwdTransfer CmmNode CmmLive xferLive = mkBTransfer3 fst mid lst where fst _ f = f mid :: CmmNode O O -> CmmLive -> CmmLive mid n f = gen_kill n f lst :: CmmNode O C -> FactBase CmmLive -> CmmLive - -- slightly inefficient: kill is unnecessary for emptyRegSet - lst n f = gen_kill n - $ case n of CmmCall{} -> emptyRegSet - CmmForeignCall{} -> emptyRegSet - _ -> joinOutFacts liveLattice n f + lst n f = gen_kill n $ joinOutFacts liveLattice n f ----------------------------------------------------------------------------- -- Removing assignments to dead variables ----------------------------------------------------------------------------- -removeDeadAssignments :: CmmGraph -> FuelUniqSM CmmGraph +removeDeadAssignments :: CmmGraph -> UniqSM (CmmGraph, BlockEnv CmmLive) removeDeadAssignments g = - liftM fst $ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites - where rewrites = deepBwdRw3 nothing middle nothing - -- Beware: deepBwdRw with one polymorphic function seems more reasonable here, - -- but GHC panics while compiling, see bug #4045. + dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites + where rewrites = mkBRewrite3 nothing middle nothing + -- SDM: no need for deepBwdRw here, we only rewrite to empty + -- Beware: deepBwdRw with one polymorphic function seems more + -- reasonable here, but GHC panics while compiling, see bug + -- #4045. middle :: CmmNode O O -> Fact O CmmLive -> CmmReplGraph O O - middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` live) = return $ Just emptyGraph + middle (CmmAssign (CmmLocal reg') _) live + | not (reg' `elemRegSet` live) + = return $ Just emptyGraph -- XXX maybe this should be somewhere else... - middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs = return $ Just emptyGraph - middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph + middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs + = return $ Just emptyGraph + middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs + = return $ Just emptyGraph middle _ _ = return Nothing nothing :: CmmNode e x -> Fact x CmmLive -> CmmReplGraph e x diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index 4844af9d9a..9e75387436 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -18,7 +18,7 @@ module CmmNode ( CmmNode(..), ForeignHint(..), CmmFormal, CmmActual, UpdFrameOffset, Convention(..), ForeignConvention(..), ForeignTarget(..), mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf, - mapExpM, mapExpDeepM, wrapRecExpM + mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors ) where import CmmExpr @@ -35,15 +35,17 @@ import Prelude hiding (succ) ------------------------ -- CmmNode +#define ULabel {-# UNPACK #-} !Label + data CmmNode e x where - CmmEntry :: Label -> CmmNode C O + CmmEntry :: ULabel -> CmmNode C O CmmComment :: FastString -> CmmNode O O - CmmAssign :: CmmReg -> CmmExpr -> CmmNode O O + CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O -- Assign to register - CmmStore :: CmmExpr -> CmmExpr -> CmmNode O O + CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O -- Assign to memory location. Size is -- given by cmmExprType of the rhs. @@ -60,11 +62,12 @@ data CmmNode e x where -- bug for what can be put in arguments, see -- Note [Register Parameter Passing] - CmmBranch :: Label -> CmmNode O C -- Goto another block in the same procedure + CmmBranch :: ULabel -> CmmNode O C + -- Goto another block in the same procedure CmmCondBranch :: { -- conditional branch cml_pred :: CmmExpr, - cml_true, cml_false :: Label + cml_true, cml_false :: ULabel } -> CmmNode O C CmmSwitch :: CmmExpr -> [Maybe Label] -> CmmNode O C -- Table branch @@ -78,6 +81,11 @@ data CmmNode e x where cml_cont :: Maybe Label, -- Label of continuation (Nothing for return or tail call) + -- + -- Note [Continuation BlockId]: these BlockIds are called + -- Continuation BlockIds, and are the only BlockIds that can + -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or + -- (CmmStackSlot (Young b) _). -- ToDO: add this: -- cml_args_regs :: [GlobalReg], @@ -117,7 +125,7 @@ data CmmNode e x where tgt :: ForeignTarget, -- call target and convention res :: [CmmFormal], -- zero or more results args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing] - succ :: Label, -- Label of continuation + succ :: ULabel, -- Label of continuation updfr :: UpdFrameOffset, -- where the update frame is (for building infotable) intrbl:: Bool -- whether or not the call is interruptible } -> CmmNode O C @@ -218,14 +226,6 @@ data Convention | GC -- Entry to the garbage collector: uses the node reg! | PrimOpCall -- Calling prim ops | PrimOpReturn -- Returning from prim ops - | Foreign -- Foreign call/return - ForeignConvention - | Private - -- Used for control transfers within a (pre-CPS) procedure All - -- jump sites known, never pushed on the stack (hence no SRT) - -- You can choose whatever calling convention you please - -- (provided you make sure all the call sites agree)! - -- This data type eventually to be extended to record the convention. deriving( Eq ) data ForeignConvention @@ -283,37 +283,6 @@ instance DefinerOfLocalRegs (CmmNode e x) where fold f z n = foldRegsDefd f z n -instance UserOfSlots (CmmNode e x) where - foldSlotsUsed f z n = case n of - CmmAssign _ expr -> fold f z expr - CmmStore addr rval -> fold f (fold f z addr) rval - CmmUnsafeForeignCall _ _ args -> fold f z args - CmmCondBranch expr _ _ -> fold f z expr - CmmSwitch expr _ -> fold f z expr - CmmCall {cml_target=tgt} -> fold f z tgt - CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args - _ -> z - where fold :: forall a b. - UserOfSlots a => - (b -> SubArea -> b) -> b -> a -> b - fold f z n = foldSlotsUsed f z n - -instance UserOfSlots ForeignTarget where - foldSlotsUsed f z (ForeignTarget e _) = foldSlotsUsed f z e - foldSlotsUsed _f z (PrimTarget _) = z - -instance DefinerOfSlots (CmmNode e x) where - foldSlotsDefd f z n = case n of - CmmStore (CmmStackSlot a i) expr -> f z (a, i, widthInBytes $ typeWidth $ cmmExprType expr) - CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res - _ -> z - where - fold :: forall a b. - DefinerOfSlots a => - (b -> SubArea -> b) -> b -> a -> b - fold f z n = foldSlotsDefd f z n - foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w) - ----------------------------------- -- mapping Expr in CmmNode @@ -416,4 +385,20 @@ foldExp f (CmmCall {cml_target=tgt}) z = f tgt z foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z -foldExpDeep f = foldExp $ wrapRecExpf f +foldExpDeep f = foldExp go + where -- go :: CmmExpr -> z -> z + go e@(CmmMachOp _ es) z = gos es $! f e z + go e@(CmmLoad addr _) z = go addr $! f e z + go e z = f e z + + gos [] z = z + gos (e:es) z = gos es $! f e z + +-- ----------------------------------------------------------------------------- + +mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C +mapSuccessors f (CmmBranch bid) = CmmBranch (f bid) +mapSuccessors f (CmmCondBranch p y n) = CmmCondBranch p (f y) (f n) +mapSuccessors f (CmmSwitch e arms) = CmmSwitch e (map (fmap f) arms) +mapSuccessors _ n = n + diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 8cc18fc1ca..8ff04cfa7b 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -145,8 +145,7 @@ To inline _smi: -} countUses :: UserOfLocalRegs a => a -> UniqFM Int -countUses a = foldRegsUsed (\m r -> addToUFM m r (count m r + 1)) emptyUFM a - where count m r = lookupWithDefaultUFM m (0::Int) r +countUses a = foldRegsUsed (\m r -> addToUFM_C (+) m r 1) emptyUFM a cmmMiniInline :: DynFlags -> [CmmBasicBlock] -> [CmmBasicBlock] cmmMiniInline dflags blocks = map do_inline blocks @@ -157,25 +156,16 @@ cmmMiniInlineStmts :: DynFlags -> UniqFM Int -> [CmmStmt] -> [CmmStmt] cmmMiniInlineStmts _ _ [] = [] cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts) -- not used: just discard this assignment - | Nothing <- lookupUFM uses u + | 0 <- lookupWithDefaultUFM uses 0 u = cmmMiniInlineStmts dflags uses stmts - -- used (literal): try to inline at all the use sites - | Just n <- lookupUFM uses u, isLit expr - = - ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $ - case lookForInlineLit u expr stmts of - (m, stmts') - | n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts' - | otherwise -> - stmt : cmmMiniInlineStmts dflags (adjustUFM (\x -> x - m) uses u) stmts' - - -- used (foldable to literal): try to inline at all the use sites + -- used (foldable to small thing): try to inline at all the use sites | Just n <- lookupUFM uses u, - e@(CmmLit _) <- wrapRecExp foldExp expr + e <- wrapRecExp foldExp expr, + isTiny e = ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $ - case lookForInlineLit u e stmts of + case lookForInlineMany u e stmts of (m, stmts') | n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts' | otherwise -> @@ -188,6 +178,11 @@ cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $ cmmMiniInlineStmts dflags uses stmts' where + isTiny (CmmLit _) = True + isTiny (CmmReg (CmmGlobal _)) = True + -- not CmmLocal: that might invalidate the usage analysis results + isTiny _ = False + platform = targetPlatform dflags foldExp (CmmMachOp op args) = cmmMachOpFold platform op args foldExp e = e @@ -201,26 +196,28 @@ cmmMiniInlineStmts platform uses (stmt:stmts) -- register, and a list of statements. Inlines the expression at all -- use sites of the register. Returns the number of substituations -- made and the, possibly modified, list of statements. -lookForInlineLit :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt]) -lookForInlineLit _ _ [] = (0, []) -lookForInlineLit u expr stmts@(stmt : rest) - | Just n <- lookupUFM (countUses stmt) u - = case lookForInlineLit u expr rest of - (m, stmts) -> let z = n + m - in z `seq` (z, inlineStmt u expr stmt : stmts) - - | ok_to_skip - = case lookForInlineLit u expr rest of +lookForInlineMany :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt]) +lookForInlineMany u expr stmts = lookForInlineMany' u expr regset stmts + where regset = foldRegsUsed extendRegSet emptyRegSet expr + +lookForInlineMany' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> (Int, [CmmStmt]) +lookForInlineMany' _ _ _ [] = (0, []) +lookForInlineMany' u expr regset stmts@(stmt : rest) + | Just n <- lookupUFM (countUses stmt) u, okToInline expr stmt + = let stmt' = inlineStmt u expr stmt in + if okToSkip stmt' u expr regset + then case lookForInlineMany' u expr regset rest of + (m, stmts) -> let z = n + m + in z `seq` (z, stmt' : stmts) + else (n, stmt' : rest) + + | okToSkip stmt u expr regset + = case lookForInlineMany' u expr regset rest of (n, stmts) -> (n, stmt : stmts) | otherwise = (0, stmts) - where - -- We skip over assignments to registers, unless the register - -- being assigned to is the one we're inlining. - ok_to_skip = case stmt of - CmmAssign (CmmLocal (LocalReg u' _)) _ | u' == u -> False - _other -> True + lookForInline :: Unique -> CmmExpr -> [CmmStmt] -> Maybe [CmmStmt] lookForInline u expr stmts = lookForInline' u expr regset stmts @@ -229,10 +226,10 @@ lookForInline u expr stmts = lookForInline' u expr regset stmts lookForInline' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> Maybe [CmmStmt] lookForInline' _ _ _ [] = panic "lookForInline' []" lookForInline' u expr regset (stmt : rest) - | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline + | Just 1 <- lookupUFM (countUses stmt) u, okToInline expr stmt = Just (inlineStmt u expr stmt : rest) - | ok_to_skip + | okToSkip stmt u expr regset = case lookForInline' u expr regset rest of Nothing -> Nothing Just stmts -> Just (stmt:stmts) @@ -240,31 +237,37 @@ lookForInline' u expr regset (stmt : rest) | otherwise = Nothing - where - -- we don't inline into CmmCall if the expression refers to global - -- registers. This is a HACK to avoid global registers clashing with - -- C argument-passing registers, really the back-end ought to be able - -- to handle it properly, but currently neither PprC nor the NCG can - -- do it. See also CgForeignCall:load_args_into_temps. - ok_to_inline = case stmt of - CmmCall{} -> hasNoGlobalRegs expr - _ -> True - - -- Expressions aren't side-effecting. Temporaries may or may not - -- be single-assignment depending on the source (the old code - -- generator creates single-assignment code, but hand-written Cmm - -- and Cmm from the new code generator is not single-assignment.) - -- So we do an extra check to make sure that the register being - -- changed is not one we were relying on. I don't know how much of a - -- performance hit this is (we have to create a regset for every - -- instruction.) -- EZY - ok_to_skip = case stmt of - CmmNop -> True - CmmComment{} -> True - CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True - CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr) - _other -> False +-- we don't inline into CmmCall if the expression refers to global +-- registers. This is a HACK to avoid global registers clashing with +-- C argument-passing registers, really the back-end ought to be able +-- to handle it properly, but currently neither PprC nor the NCG can +-- do it. See also CgForeignCall:load_args_into_temps. +okToInline :: CmmExpr -> CmmStmt -> Bool +okToInline expr CmmCall{} = hasNoGlobalRegs expr +okToInline _ _ = True + +-- Expressions aren't side-effecting. Temporaries may or may not +-- be single-assignment depending on the source (the old code +-- generator creates single-assignment code, but hand-written Cmm +-- and Cmm from the new code generator is not single-assignment.) +-- So we do an extra check to make sure that the register being +-- changed is not one we were relying on. I don't know how much of a +-- performance hit this is (we have to create a regset for every +-- instruction.) -- EZY +okToSkip :: CmmStmt -> Unique -> CmmExpr -> RegSet -> Bool +okToSkip stmt u expr regset + = case stmt of + CmmNop -> True + CmmComment{} -> True + CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True + CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr) + CmmStore _ _ -> not_a_load expr + _other -> False + where + not_a_load (CmmMachOp _ args) = all not_a_load args + not_a_load (CmmLoad _ _) = False + not_a_load _ = True inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e) diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 075ed22ea9..f46d49e022 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -230,35 +230,31 @@ lits :: { [ExtFCode CmmExpr] } cmmproc :: { ExtCode } -- TODO: add real SRT/info tables to parsed Cmm - : info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' - { do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <- + : info maybe_formals_without_hints '{' body '}' + { do ((entry_ret_label, info, live, formals), stmts) <- getCgStmtsEC' $ loopDecls $ do { (entry_ret_label, info, live) <- $1; formals <- sequence $2; - gc_block <- $3; - frame <- $4; - $6; - return (entry_ret_label, info, live, formals, gc_block, frame) } + $4; + return (entry_ret_label, info, live, formals) } blks <- code (cgStmtsToBlocks stmts) - code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) } + code (emitInfoTableAndCode entry_ret_label info formals blks) } | info maybe_formals_without_hints ';' { do (entry_ret_label, info, live) <- $1; formals <- sequence $2; - code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) } + code (emitInfoTableAndCode entry_ret_label info formals []) } - | NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' + | NAME maybe_formals_without_hints '{' body '}' {% withThisPackage $ \pkg -> do newFunctionName $1 pkg - ((formals, gc_block, frame), stmts) <- + (formals, stmts) <- getCgStmtsEC' $ loopDecls $ do { formals <- sequence $2; - gc_block <- $3; - frame <- $4; - $6; - return (formals, gc_block, frame) } + $4; + return formals } blks <- code (cgStmtsToBlocks stmts) - code (emitProc (CmmInfo gc_block frame CmmNonInfoTable) (mkCmmCodeLabel pkg $1) formals blks) } + code (emitProc CmmNonInfoTable (mkCmmCodeLabel pkg $1) formals blks) } info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } : 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' @@ -599,18 +595,7 @@ formals_without_hints :: { [ExtFCode LocalReg] } formal_without_hint :: { ExtFCode LocalReg } : type NAME { newLocal $1 $2 } -maybe_frame :: { ExtFCode (Maybe UpdateFrame) } - : {- empty -} { return Nothing } - | 'jump' expr '(' exprs0 ')' { do { target <- $2; - args <- sequence $4; - return $ Just (UpdateFrame target args) } } - -maybe_gc_block :: { ExtFCode (Maybe BlockId) } - : {- empty -} { return Nothing } - | 'goto' NAME - { do l <- lookupLabel $2; return (Just l) } - -type :: { CmmType } +type :: { CmmType } : 'bits8' { b8 } | typenot8 { $1 } @@ -1073,7 +1058,8 @@ parseCmmFile dflags filename = do let msg = mkPlainErrMsg dflags span err return ((emptyBag, unitBag msg), Nothing) POk pst code -> do - cmm <- initC dflags no_module (getCmm (unEC code initEnv [] >> return ())) + st <- initC + let (cmm,_) = runC dflags no_module st (getCmm (unEC code initEnv [] >> return ())) let ms = getMessages pst if (errorsFound dflags ms) then return (ms, Nothing) diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index 7361bbf385..bb8d5b2f22 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -11,25 +11,23 @@ module CmmPipeline ( import CLabel import Cmm -import CmmLive +import CmmLint import CmmBuildInfoTables import CmmCommonBlockElim import CmmProcPoint -import CmmSpillReload -import CmmRewriteAssignments -import CmmStackLayout import CmmContFlowOpt -import OptimizationFuel +import CmmLayoutStack +import UniqSupply import DynFlags import ErrUtils import HscTypes import Data.Maybe import Control.Monad -import Data.Map (Map) -import qualified Data.Map as Map import Outputable -import StaticFlags + +import qualified Data.Set as Set +import Data.Map (Map) ----------------------------------------------------------------------------- -- | Top level driver for C-- pipeline @@ -53,32 +51,28 @@ import StaticFlags -- we actually need to do the initial pass. cmmPipeline :: HscEnv -- Compilation env including -- dynamic flags: -dcmm-lint -ddump-cps-cmm - -> (TopSRT, [CmmGroup]) -- SRT table and accumulating list of compiled procs + -> TopSRT -- SRT table and accumulating list of compiled procs -> CmmGroup -- Input C-- with Procedures - -> IO (TopSRT, [CmmGroup]) -- Output CPS transformed C-- -cmmPipeline hsc_env (topSRT, rst) prog = + -> IO (TopSRT, CmmGroup) -- Output CPS transformed C-- +cmmPipeline hsc_env topSRT prog = do let dflags = hsc_dflags hsc_env -- showPass dflags "CPSZ" - let tops = runCmmContFlowOpts prog - (cafEnvs, tops) <- liftM unzip $ mapM (cpsTop hsc_env) tops + (cafEnvs, tops) <- {-# SCC "tops" #-} liftM unzip $ mapM (cpsTop hsc_env) prog -- tops :: [[(CmmDecl,CAFSet]] (one list per group) - let topCAFEnv = mkTopCAFInfo (concat cafEnvs) + let topCAFEnv = {-# SCC "topCAFEnv" #-} mkTopCAFInfo (concat cafEnvs) -- folding over the groups - (topSRT, tops) <- foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops + (topSRT, tops) <- {-# SCC "toTops" #-} foldM (toTops topCAFEnv) (topSRT, []) tops let cmms :: CmmGroup cmms = reverse (concat tops) dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr cmms) - -- SRT is not affected by control flow optimization pass - let prog' = runCmmContFlowOpts cmms - - return (topSRT, prog' : rst) + return (topSRT, cmms) {- [Note global fuel] ~~~~~~~~~~~~~~~~~~~~~ @@ -92,105 +86,110 @@ global to one compiler session. -- -ddump-cmmz cpsTop :: HscEnv -> CmmDecl -> IO ([(CLabel, CAFSet)], [(CAFSet, CmmDecl)]) -cpsTop _ p@(CmmData {}) = return ([], [(Map.empty, p)]) +cpsTop _ p@(CmmData {}) = return ([], [(Set.empty, p)]) cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) = do - -- Why bother doing these early: dualLivenessWithInsertion, - -- insertLateReloads, rewriteAssignments? + ----------- Control-flow optimisations --------------- + g <- {-# SCC "cmmCfgOpts(1)" #-} return $ cmmCfgOpts g + dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g ----------- Eliminate common blocks ------------------- - g <- return $ elimCommonBlocks g + g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g dump Opt_D_dump_cmmz_cbe "Post common block elimination" g - -- Any work storing block Labels must be performed _after_ elimCommonBlocks + -- Any work storing block Labels must be performed _after_ + -- elimCommonBlocks ----------- Proc points ------------------- - let callPPs = callProcPoints g - procPoints <- run $ minimalProcPointSet (targetPlatform dflags) callPPs g - g <- run $ addProcPointProtocols callPPs procPoints g - dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g - - ----------- Spills and reloads ------------------- - g <- run $ dualLivenessWithInsertion procPoints g - dump Opt_D_dump_cmmz_spills "Post spills and reloads" g - - ----------- Sink and inline assignments ------------------- - g <- runOptimization $ rewriteAssignments platform g - dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g - - ----------- Eliminate dead assignments ------------------- - g <- runOptimization $ removeDeadAssignments g - dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g - - ----------- Zero dead stack slots (Debug only) --------------- - -- Debugging: stubbing slots on death can cause crashes early - g <- if opt_StubDeadValues - then run $ stubSlotsOnDeath g - else return g - dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g - - --------------- Stack layout ---------------- - slotEnv <- run $ liveSlotAnal g - let spEntryMap = getSpEntryMap entry_off g - mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return () - let areaMap = layout procPoints spEntryMap slotEnv entry_off g - mbpprTrace "areaMap" (ppr areaMap) $ return () - - ------------ Manifest the stack pointer -------- - g <- run $ manifestSP spEntryMap areaMap entry_off g - dump Opt_D_dump_cmmz_sp "Post manifestSP" g - -- UGH... manifestSP can require updates to the procPointMap. - -- We can probably do something quicker here for the update... + let callPPs = {-# SCC "callProcPoints" #-} callProcPoints g + procPoints <- {-# SCC "minimalProcPointSet" #-} runUniqSM $ + minimalProcPointSet (targetPlatform dflags) callPPs g + + ----------- Layout the stack and manifest Sp --------------- + -- (also does: removeDeadAssignments, and lowerSafeForeignCalls) + (g, stackmaps) <- {-# SCC "layoutStack" #-} + runUniqSM $ cmmLayoutStack procPoints entry_off g + dump Opt_D_dump_cmmz_sp "Layout Stack" g + +-- g <- {-# SCC "sink" #-} runUniqSM $ cmmSink g +-- dump Opt_D_dump_cmmz_rewrite "Sink assignments" g + +-- ----------- Sink and inline assignments ------------------- +-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $ +-- rewriteAssignments platform g +-- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g ------------- Split into separate procedures ------------ - procPointMap <- run $ procPointAnalysis procPoints g - dump Opt_D_dump_cmmz_procmap "procpoint map" procPointMap - gs <- run $ splitAtProcPoints l callPPs procPoints procPointMap - (CmmProc h l g) - mapM_ (dump Opt_D_dump_cmmz_split "Post splitting") gs - - ------------- More CAFs and foreign calls ------------ - cafEnv <- run $ cafAnal g - let localCAFs = catMaybes $ map (localCAFInfo cafEnv) gs + procPointMap <- {-# SCC "procPointAnalysis" #-} runUniqSM $ + procPointAnalysis procPoints g + dumpWith dflags Opt_D_dump_cmmz_procmap "procpoint map" procPointMap + gs <- {-# SCC "splitAtProcPoints" #-} runUniqSM $ + splitAtProcPoints l callPPs procPoints procPointMap (CmmProc h l g) + dumps Opt_D_dump_cmmz_split "Post splitting" gs + + ------------- More CAFs ------------------------------ + let cafEnv = {-# SCC "cafAnal" #-} cafAnal g + let localCAFs = {-# SCC "localCAFs" #-} catMaybes $ map (localCAFInfo cafEnv) gs mbpprTrace "localCAFs" (ppr localCAFs) $ return () - gs <- run $ mapM (lowerSafeForeignCalls areaMap) gs - mapM_ (dump Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls") gs - -- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES - gs <- return $ map (setInfoTableStackMap slotEnv areaMap) gs - mapM_ (dump Opt_D_dump_cmmz_info "after setInfoTableStackMap") gs - gs <- return $ map (bundleCAFs cafEnv) gs - mapM_ (dump Opt_D_dump_cmmz_cafs "after bundleCAFs") gs + gs <- {-# SCC "setInfoTableStackMap" #-} + return $ map (setInfoTableStackMap stackmaps) gs + dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs + + ----------- Control-flow optimisations --------------- + gs <- {-# SCC "cmmCfgOpts(2)" #-} return $ map cmmCfgOptsProc gs + dumps Opt_D_dump_cmmz_cfg "Post control-flow optimsations" gs + + gs <- {-# SCC "bundleCAFs" #-} return $ map (bundleCAFs cafEnv) gs + dumps Opt_D_dump_cmmz_cafs "after bundleCAFs" gs + return (localCAFs, gs) -- gs :: [ (CAFSet, CmmDecl) ] -- localCAFs :: [ (CLabel, CAFSet) ] -- statics filtered out(?) where dflags = hsc_dflags hsc_env - platform = targetPlatform dflags - mbpprTrace x y z = if dopt Opt_D_dump_cmmz dflags then pprTrace x y z else z - dump f = dumpWith ppr f - dumpWith pprFun f txt g = do - -- ToDo: No easy way of say "dump all the cmmz, *and* split - -- them into files." Also, -ddump-cmmz doesn't play nicely - -- with -ddump-to-file, since the headers get omitted. - dumpIfSet_dyn dflags f txt (pprFun g) - when (not (dopt f dflags)) $ - dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (pprFun g) - -- Runs a required transformation/analysis - run = runInfiniteFuelIO (hsc_OptFuel hsc_env) - -- Runs an optional transformation/analysis (and should - -- thus be subject to optimization fuel) - runOptimization = runFuelIO (hsc_OptFuel hsc_env) + mbpprTrace x y z | dopt Opt_D_dump_cmmz dflags = pprTrace x y z + | otherwise = z + dump = dumpGraph dflags + + dumps flag name + = mapM_ (dumpWith dflags flag name) + +runUniqSM :: UniqSM a -> IO a +runUniqSM m = do + us <- mkSplitUniqSupply 'u' + return (initUs_ us m) + + +dumpGraph :: DynFlags -> DynFlag -> String -> CmmGraph -> IO () +dumpGraph dflags flag name g = do + when (dopt Opt_DoCmmLinting dflags) $ do_lint g + dumpWith dflags flag name g + where + do_lint g = case cmmLintGraph g of + Just err -> do { fatalErrorMsg dflags err + ; ghcExit dflags 1 + } + Nothing -> return () + +dumpWith :: Outputable a => DynFlags -> DynFlag -> String -> a -> IO () +dumpWith dflags flag txt g = do + -- ToDo: No easy way of say "dump all the cmmz, *and* split + -- them into files." Also, -ddump-cmmz doesn't play nicely + -- with -ddump-to-file, since the headers get omitted. + dumpIfSet_dyn dflags flag txt (ppr g) + when (not (dopt flag dflags)) $ + dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (ppr g) -- This probably belongs in CmmBuildInfoTables? -- We're just finishing the job here: once we know what CAFs are defined -- in non-static closures, we can build the SRTs. -toTops :: HscEnv -> Map CLabel CAFSet -> (TopSRT, [[CmmDecl]]) - -> [(CAFSet, CmmDecl)] -> IO (TopSRT, [[CmmDecl]]) -toTops hsc_env topCAFEnv (topSRT, tops) gs = +toTops :: Map CLabel CAFSet -> (TopSRT, [[CmmDecl]]) + -> [(CAFSet, CmmDecl)] -> IO (TopSRT, [[CmmDecl]]) +toTops topCAFEnv (topSRT, tops) gs = do let setSRT (topSRT, rst) g = do (topSRT, gs) <- setInfoTableSRT topCAFEnv topSRT g return (topSRT, gs : rst) - (topSRT, gs') <- runFuelIO (hsc_OptFuel hsc_env) $ foldM setSRT (topSRT, []) gs + (topSRT, gs') <- runUniqSM $ foldM setSRT (topSRT, []) gs return (topSRT, concat gs' : tops) diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index f50d850b3a..6eb92666af 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -4,7 +4,7 @@ module CmmProcPoint ( ProcPointSet, Status(..) , callProcPoints, minimalProcPointSet - , addProcPointProtocols, splitAtProcPoints, procPointAnalysis + , splitAtProcPoints, procPointAnalysis ) where @@ -13,22 +13,17 @@ import Prelude hiding (last, unzip, succ, zip) import BlockId import CLabel import Cmm +import PprCmm () import CmmUtils -import CmmContFlowOpt import CmmInfo -import CmmLive -import Constants import Data.List (sortBy) import Maybes -import MkGraph import Control.Monad -import OptimizationFuel import Outputable import Platform -import UniqSet import UniqSupply -import Compiler.Hoopl +import Hoopl import qualified Data.Map as Map @@ -103,34 +98,50 @@ instance Outputable Status where (hsep $ punctuate comma $ map ppr $ setElems ps) ppr ProcPoint = text "<procpt>" -lattice :: DataflowLattice Status -lattice = DataflowLattice "direct proc-point reachability" unreached add_to - where unreached = ReachedBy setEmpty - add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint) - add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint) -- because of previous case - add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) = - let union = setUnion p' p - in if setSize union > setSize p then (SomeChange, ReachedBy union) - else (NoChange, ReachedBy p) -------------------------------------------------- +-- Proc point analysis + +procPointAnalysis :: ProcPointSet -> CmmGraph -> UniqSM (BlockEnv Status) +-- Once you know what the proc-points are, figure out +-- what proc-points each block is reachable from +procPointAnalysis procPoints g = + -- pprTrace "procPointAnalysis" (ppr procPoints) $ + dataflowAnalFwdBlocks g initProcPoints $ analFwd lattice forward + where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints] + -- transfer equations forward :: FwdTransfer CmmNode Status -forward = mkFTransfer3 first middle ((mkFactBase lattice . ) . last) - where first :: CmmNode C O -> Status -> Status - first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id - first _ x = x +forward = mkFTransfer3 first middle last + where + first :: CmmNode C O -> Status -> Status + first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id + first _ x = x - middle _ x = x + middle _ x = x - last :: CmmNode O C -> Status -> [(Label, Status)] - last (CmmCall {cml_cont = Just k}) _ = [(k, ProcPoint)] - last (CmmForeignCall {succ = k}) _ = [(k, ProcPoint)] - last l x = map (\id -> (id, x)) (successors l) + last :: CmmNode O C -> Status -> FactBase Status + last l x = mkFactBase lattice $ map (\id -> (id, x)) (successors l) --- It is worth distinguishing two sets of proc points: --- those that are induced by calls in the original graph --- and those that are introduced because they're reachable from multiple proc points. +lattice :: DataflowLattice Status +lattice = DataflowLattice "direct proc-point reachability" unreached add_to + where unreached = ReachedBy setEmpty + add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint) + add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint) + -- because of previous case + add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) + | setSize union > setSize p = (SomeChange, ReachedBy union) + | otherwise = (NoChange, ReachedBy p) + where + union = setUnion p' p + +---------------------------------------------------------------------- + +-- It is worth distinguishing two sets of proc points: those that are +-- induced by calls in the original graph and those that are +-- introduced because they're reachable from multiple proc points. +-- +-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds]. callProcPoints :: CmmGraph -> ProcPointSet callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g where add :: CmmBlock -> BlockSet -> BlockSet @@ -139,21 +150,17 @@ callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g CmmForeignCall {succ=k} -> setInsert k set _ -> set -minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph -> FuelUniqSM ProcPointSet +minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph + -> UniqSM ProcPointSet -- Given the set of successors of calls (which must be proc-points) -- figure out the minimal set of necessary proc-points -minimalProcPointSet platform callProcPoints g = extendPPSet platform g (postorderDfs g) callProcPoints +minimalProcPointSet platform callProcPoints g + = extendPPSet platform g (postorderDfs g) callProcPoints -procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelUniqSM (BlockEnv Status) --- Once you know what the proc-points are, figure out --- what proc-points each block is reachable from -procPointAnalysis procPoints g = - liftM snd $ dataflowPassFwd g initProcPoints $ analFwd lattice forward - where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints] - -extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelUniqSM ProcPointSet +extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet extendPPSet platform g blocks procPoints = do env <- procPointAnalysis procPoints g + -- pprTrace "extensPPSet" (ppr env) $ return () let add block pps = let id = entryLabel block in case mapLookup id env of Just ProcPoint -> setInsert id pps @@ -179,183 +186,13 @@ extendPPSet platform g blocks procPoints = pps -> extendPPSet g blocks (foldl extendBlockSet procPoints' pps) -} - case newPoint of Just id -> - if setMember id procPoints' then panic "added old proc pt" - else extendPPSet platform g blocks (setInsert id procPoints') - Nothing -> return procPoints' - - ------------------------------------------------------------------------- --- Computing Proc-Point Protocols -- ------------------------------------------------------------------------- - -{- - -There is one major trick, discovered by Michael Adams, which is that -we want to choose protocols in a way that enables us to optimize away -some continuations. The optimization is very much like branch-chain -elimination, except that it involves passing results as well as -control. The idea is that if a call's continuation k does nothing but -CopyIn its results and then goto proc point P, the call's continuation -may be changed to P, *provided* P's protocol is identical to the -protocol for the CopyIn. We choose protocols to make this so. - -Here's an explanatory example; we begin with the source code (lines -separate basic blocks): - - ..1..; - x, y = g(); - goto P; - ------- - P: ..2..; - -Zipperization converts this code as follows: - - ..1..; - call g() returns to k; - ------- - k: CopyIn(x, y); - goto P; - ------- - P: ..2..; - -What we'd like to do is assign P the same CopyIn protocol as k, so we -can eliminate k: - - ..1..; - call g() returns to P; - ------- - P: CopyIn(x, y); ..2..; - -Of course, P may be the target of more than one continuation, and -different continuations may have different protocols. Michael Adams -implemented a voting mechanism, but he thinks a simple greedy -algorithm would be just as good, so that's what we do. - --} + case newPoint of + Just id -> + if setMember id procPoints' + then panic "added old proc pt" + else extendPPSet platform g blocks (setInsert id procPoints') + Nothing -> return procPoints' -data Protocol = Protocol Convention [CmmFormal] Area - deriving Eq -instance Outputable Protocol where - ppr (Protocol c fs a) = text "Protocol" <+> ppr c <+> ppr fs <+> ppr a - --- | Function 'optimize_calls' chooses protocols only for those proc --- points that are relevant to the optimization explained above. --- The others are assigned by 'add_unassigned', which is not yet clever. - -addProcPointProtocols :: ProcPointSet -> ProcPointSet -> CmmGraph -> FuelUniqSM CmmGraph -addProcPointProtocols callPPs procPoints g = - do liveness <- cmmLiveness g - (protos, g') <- optimize_calls liveness g - blocks'' <- add_CopyOuts protos procPoints g' - return $ ofBlockMap (g_entry g) blocks'' - where optimize_calls liveness g = -- see Note [Separate Adams optimization] - do let (protos, blocks') = - foldGraphBlocks maybe_add_call (mapEmpty, mapEmpty) g - protos' = add_unassigned liveness procPoints protos - let g' = ofBlockMap (g_entry g) (add_CopyIns callPPs protos' blocks') - return (protos', removeUnreachableBlocks g') - maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock) - -> (BlockEnv Protocol, BlockEnv CmmBlock) - -- ^ If the block is a call whose continuation goes to a proc point - -- whose protocol either matches the continuation's or is not yet set, - -- redirect the call (cf 'newblock') and set the protocol if necessary - maybe_add_call block (protos, blocks) = - case lastNode block of - CmmCall tgt (Just k) args res s - | Just proto <- mapLookup k protos, - Just pee <- branchesToProcPoint k - -> let newblock = replaceLastNode block (CmmCall tgt (Just pee) - args res s) - changed_blocks = insertBlock newblock blocks - unchanged_blocks = insertBlock block blocks - in case mapLookup pee protos of - Nothing -> (mapInsert pee proto protos, changed_blocks) - Just proto' -> - if proto == proto' then (protos, changed_blocks) - else (protos, unchanged_blocks) - _ -> (protos, insertBlock block blocks) - - branchesToProcPoint :: BlockId -> Maybe BlockId - -- ^ Tells whether the named block is just a branch to a proc point - branchesToProcPoint id = - let block = mapLookup id (toBlockMap g) `orElse` - panic "branch out of graph" - in case blockToNodeList block of - (_, [], JustC (CmmBranch pee)) | setMember pee procPoints -> Just pee - _ -> Nothing - --- | For now, following a suggestion by Ben Lippmeier, we pass all --- live variables as arguments, hoping that a clever register --- allocator might help. - -add_unassigned :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol -> - BlockEnv Protocol -add_unassigned = pass_live_vars_as_args - -pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet -> - BlockEnv Protocol -> BlockEnv Protocol -pass_live_vars_as_args _liveness procPoints protos = protos' - where protos' = setFold addLiveVars protos procPoints - addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol - addLiveVars id protos = - case mapLookup id protos of - Just _ -> protos - Nothing -> let live = emptyRegSet - --lookupBlockEnv _liveness id `orElse` - --panic ("no liveness at block " ++ show id) - formals = uniqSetToList live - prot = Protocol Private formals $ CallArea $ Young id - in mapInsert id prot protos - - --- | Add copy-in instructions to each proc point that did not arise from a call --- instruction. (Proc-points that arise from calls already have their copy-in instructions.) - -add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock -add_CopyIns callPPs protos blocks = mapFold maybe_insert_CopyIns mapEmpty blocks - where maybe_insert_CopyIns block blocks - | not $ setMember bid callPPs - , Just (Protocol c fs _area) <- mapLookup bid protos - = let nodes = copyInSlot c fs - (h, m, l) = blockToNodeList block - in insertBlock (blockOfNodeList (h, nodes ++ m, l)) blocks - | otherwise = insertBlock block blocks - where bid = entryLabel block - - --- | Add a CopyOut node before each procpoint. --- If the predecessor is a call, then the copy outs should already be done by the callee. --- Note: If we need to add copy-out instructions, they may require stack space, --- so we accumulate a map from the successors to the necessary stack space, --- then update the successors after we have finished inserting the copy-outs. - -add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph -> - FuelUniqSM (BlockEnv CmmBlock) -add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty) g - where mb_copy_out :: CmmBlock -> FuelUniqSM (BlockEnv CmmBlock) -> - FuelUniqSM (BlockEnv CmmBlock) - mb_copy_out b z | entryLabel b == g_entry g = skip b z - mb_copy_out b z = - case lastNode b of - CmmCall {} -> skip b z -- copy out done by callee - CmmForeignCall {} -> skip b z -- copy out done by callee - _ -> copy_out b z - copy_out b z = foldr trySucc init (successors b) >>= finish - where init = (\bmap -> (b, bmap)) `liftM` z - trySucc succId z = - if setMember succId procPoints then - case mapLookup succId protos of - Nothing -> z - Just (Protocol c fs _area) -> insert z succId $ copyOutSlot c fs - else z - insert z succId m = - do (b, bmap) <- z - (b, bs) <- insertBetween b m succId - -- pprTrace "insert for succ" (ppr succId <> ppr m) $ do - return $ (b, foldl (flip insertBlock) bmap bs) - finish (b, bmap) = return $ insertBlock b bmap - skip b bs = insertBlock b `liftM` bs -- At this point, we have found a set of procpoints, each of which should be -- the entry point of a procedure. @@ -370,10 +207,9 @@ add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty) -- ToDo: use the _ret naming convention that the old code generator -- used. -- EZY splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status -> - CmmDecl -> FuelUniqSM [CmmDecl] + CmmDecl -> UniqSM [CmmDecl] splitAtProcPoints entry_label callPPs procPoints procMap - (CmmProc (TopInfo {info_tbl=info_tbl, - stack_info=stack_info}) + (CmmProc (TopInfo {info_tbl=info_tbl}) top_l g@(CmmGraph {g_entry=entry})) = do -- Build a map from procpoints to the blocks they reach let addBlock b graphEnv = @@ -384,15 +220,18 @@ splitAtProcPoints entry_label callPPs procPoints procMap [] -> graphEnv [id] -> add graphEnv id bid b _ -> panic "Each block should be reachable from only one ProcPoint" - Nothing -> pprPanic "block not reached by a proc point?" (ppr bid) + Nothing -> graphEnv where bid = entryLabel b add graphEnv procId bid b = mapInsert procId graph' graphEnv where graph = mapLookup procId graphEnv `orElse` mapEmpty graph' = mapInsert bid b graph + graphEnv <- return $ foldGraphBlocks addBlock emptyBlockMap g + -- Build a map from proc point BlockId to pairs of: -- * Labels for their new procedures - -- * Labels for the info tables of their new procedures (only if the proc point is a callPP) + -- * Labels for the info tables of their new procedures (only if + -- the proc point is a callPP) -- Due to common blockification, we may overestimate the set of procpoints. let add_label map pp = Map.insert pp lbls map where lbls | pp == entry = (entry_label, Just entry_info_lbl) @@ -401,30 +240,15 @@ splitAtProcPoints entry_label callPPs procPoints procMap entry_info_lbl = cit_lbl info_tbl procLabels = foldl add_label Map.empty (filter (flip mapMember (toBlockMap g)) (setElems procPoints)) - -- For each procpoint, we need to know the SP offset on entry. - -- If the procpoint is: - -- - continuation of a call, the SP offset is in the call - -- - otherwise, 0 (and left out of the spEntryMap) - let add_sp_off :: CmmBlock -> BlockEnv CmmStackInfo -> BlockEnv CmmStackInfo - add_sp_off b env = - case lastNode b of - CmmCall {cml_cont = Just succ, cml_ret_args = off, cml_ret_off = updfr_off} -> - mapInsert succ (StackInfo { arg_space = off, updfr_space = Just updfr_off}) env - CmmForeignCall {succ = succ, updfr = updfr_off} -> - mapInsert succ (StackInfo { arg_space = wORD_SIZE, updfr_space = Just updfr_off}) env - _ -> env - spEntryMap = foldGraphBlocks add_sp_off (mapInsert entry stack_info emptyBlockMap) g - getStackInfo id = mapLookup id spEntryMap `orElse` StackInfo {arg_space = 0, updfr_space = Nothing} -- In each new graph, add blocks jumping off to the new procedures, -- and replace branches to procpoints with branches to the jump-off blocks let add_jump_block (env, bs) (pp, l) = do bid <- liftM mkBlockId getUniqueM - let b = blockOfNodeList (JustC (CmmEntry bid), [], JustC jump) - StackInfo {arg_space = argSpace, updfr_space = off} = getStackInfo pp - jump = CmmCall (CmmLit (CmmLabel l)) Nothing argSpace 0 - (off `orElse` 0) -- Jump's shouldn't need the offset... + let b = blockJoin (CmmEntry bid) emptyBlock jump + jump = CmmCall (CmmLit (CmmLabel l)) Nothing 0 0 0 return (mapInsert pp bid env, b : bs) - add_jumps (newGraphEnv) (ppId, blockEnv) = + + add_jumps newGraphEnv (ppId, blockEnv) = do let needed_jumps = -- find which procpoints we currently branch to mapFold add_if_branch_to_pp [] blockEnv add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)] @@ -441,17 +265,16 @@ splitAtProcPoints entry_label callPPs procPoints procMap foldM add_jump_block (mapEmpty, []) needed_jumps -- update the entry block let b = expectJust "block in env" $ mapLookup ppId blockEnv - off = getStackInfo ppId blockEnv' = mapInsert ppId b blockEnv -- replace branches to procpoints with branches to jumps blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv' -- add the jump blocks to the graph blockEnv''' = foldl (flip insertBlock) blockEnv'' jumpBlocks - let g' = (off, ofBlockMap ppId blockEnv''') + let g' = ofBlockMap ppId blockEnv''' -- pprTrace "g' pre jumps" (ppr g') $ do return (mapInsert ppId g' newGraphEnv) graphEnv <- foldM add_jumps emptyBlockMap $ mapToList graphEnv - let to_proc (bid, (stack_info, g)) = case expectJust "pp label" $ Map.lookup bid procLabels of + let to_proc (bid, g) = case expectJust "pp label" $ Map.lookup bid procLabels of (lbl, Just info_lbl) | bid == entry -> CmmProc (TopInfo {info_tbl=info_tbl, stack_info=stack_info}) @@ -462,15 +285,22 @@ splitAtProcPoints entry_label callPPs procPoints procMap (lbl, Nothing) -> CmmProc (TopInfo {info_tbl=CmmNonInfoTable, stack_info=stack_info}) lbl (replacePPIds g) - -- References to procpoint IDs can now be replaced with the infotable's label - replacePPIds g = mapGraphNodes (id, mapExp repl, mapExp repl) g + where + stack_info = panic "No StackInfo" + + -- References to procpoint IDs can now be replaced with the + -- infotable's label + replacePPIds g = {-# SCC "replacePPIds" #-} + mapGraphNodes (id, mapExp repl, mapExp repl) g where repl e@(CmmLit (CmmBlock bid)) = case Map.lookup bid procLabels of Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl) _ -> e repl e = e - -- The C back end expects to see return continuations before the call sites. - -- Here, we sort them in reverse order -- it gets reversed later. + + -- The C back end expects to see return continuations before the + -- call sites. Here, we sort them in reverse order -- it gets + -- reversed later. let (_, block_order) = foldl add_block_num (0::Int, emptyBlockMap) (postorderDfs g) add_block_num (i, map) block = (i+1, mapInsert (entryLabel block) i map) sort_fn (bid, _) (bid', _) = @@ -482,6 +312,27 @@ splitAtProcPoints entry_label callPPs procPoints procMap procs splitAtProcPoints _ _ _ _ t@(CmmData _ _) = return [t] + +-- Only called from CmmProcPoint.splitAtProcPoints. NB. does a +-- recursive lookup, see comment below. +replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph +replaceBranches env cmmg + = {-# SCC "replaceBranches" #-} + ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg + where + f block = replaceLastNode block $ last (lastNode block) + + last :: CmmNode O C -> CmmNode O C + last (CmmBranch id) = CmmBranch (lookup id) + last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi) + last (CmmSwitch e tbl) = CmmSwitch e (map (fmap lookup) tbl) + last l@(CmmCall {}) = l + last l@(CmmForeignCall {}) = l + lookup id = fmap lookup (mapLookup id env) `orElse` id + -- XXX: this is a recursive lookup, it follows chains + -- until the lookup returns Nothing, at which point we + -- return the last BlockId + ---------------------------------------------------------------- {- diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs index ecf3f7e0c3..cf349a0334 100644 --- a/compiler/cmm/CmmRewriteAssignments.hs +++ b/compiler/cmm/CmmRewriteAssignments.hs @@ -18,23 +18,23 @@ module CmmRewriteAssignments import Cmm import CmmUtils import CmmOpt -import OptimizationFuel import StgCmmUtils -import Control.Monad +import UniqSupply import Platform import UniqFM import Unique import BlockId -import Compiler.Hoopl hiding (Unique) +import Hoopl import Data.Maybe +import Control.Monad import Prelude hiding (succ, zip) ---------------------------------------------------------------- --- Main function -rewriteAssignments :: Platform -> CmmGraph -> FuelUniqSM CmmGraph +rewriteAssignments :: Platform -> CmmGraph -> UniqSM CmmGraph rewriteAssignments platform g = do -- Because we need to act on forwards and backwards information, we -- first perform usage analysis and bake this information into the @@ -213,7 +213,7 @@ usageTransfer = mkBTransfer3 first middle last increaseUsage f r = addToUFM_C combine f r SingleUse where combine _ _ = ManyUse -usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap +usageRewrite :: BwdRewrite UniqSM (WithRegUsage CmmNode) UsageMap usageRewrite = mkBRewrite3 first middle last where first _ _ = return Nothing middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O)) @@ -226,7 +226,7 @@ usageRewrite = mkBRewrite3 first middle last last _ _ = return Nothing type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode) -annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage) +annotateUsage :: CmmGraph -> UniqSM (CmmGraphWithRegUsage) annotateUsage vanilla_g = let g = modifyGraph liftRegUsage vanilla_g in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $ @@ -404,8 +404,8 @@ clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False -- ToDo: Also catch MachOp case clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _) | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?) -clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr - where f (CmmLoad (CmmStackSlot (CallArea a') o') t) +clobbers (CmmStackSlot a o, rhs) (_, expr) = f expr + where f (CmmLoad (CmmStackSlot a' o') t) = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t)) f (CmmLoad e _) = containsStackSlot e f (CmmMachOp _ es) = or (map f es) @@ -416,9 +416,6 @@ clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es) containsStackSlot (CmmStackSlot{}) = True containsStackSlot _ = False -clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr - where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l' - f _ = False clobbers _ (_, e) = f e where f (CmmLoad (CmmStackSlot _ _) _) = False f (CmmLoad{}) = True -- conservative @@ -432,7 +429,7 @@ clobbers _ (_, e) = f e -- [ I32 ] -- [ F64 ] -- s' -w'- o' -type CallSubArea = (AreaId, Int, Int) -- area, offset, width +type CallSubArea = (Area, Int, Int) -- area, offset, width overlaps :: CallSubArea -> CallSubArea -> Bool overlaps (a, _, _) (a', _, _) | a /= a' = False overlaps (_, o, w) (_, o', w') = @@ -457,7 +454,7 @@ invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap invalidateVolatile k m = mapUFM p m where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize where exp CmmLit{} = True - exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _) + exp (CmmLoad (CmmStackSlot (Young k') _) _) | k' == k = False exp (CmmLoad (CmmStackSlot _ _) _) = True exp (CmmMachOp _ es) = and (map exp es) @@ -527,7 +524,7 @@ assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase ass -- values from the assignment map, due to reassignment of the local -- register.) This is probably not locally sound. -assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap +assignmentRewrite :: FwdRewrite UniqSM (WithRegUsage CmmNode) AssignmentMap assignmentRewrite = mkFRewrite3 first middle last where first _ _ = return Nothing @@ -596,10 +593,6 @@ assignmentRewrite = mkFRewrite3 first middle last where rep = typeWidth (localRegType r) _ -> old -- See Note [Soundness of store rewriting] - inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _) - = case lookupUFM assign r of - Just (AlwaysInline x) -> x - _ -> old inlineExp _ old = old inlinable :: CmmNode e x -> Bool @@ -612,7 +605,7 @@ assignmentRewrite = mkFRewrite3 first middle last -- in literals, which we can inline more aggressively, and inlining -- gives us opportunities for more folding. However, we don't need any -- facts to do MachOp folding. -machOpFoldRewrite :: Platform -> FwdRewrite FuelUniqSM (WithRegUsage CmmNode) a +machOpFoldRewrite :: Platform -> FwdRewrite UniqSM (WithRegUsage CmmNode) a machOpFoldRewrite platform = mkFRewrite3 first middle last where first _ _ = return Nothing middle :: WithRegUsage CmmNode O O -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O O diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs deleted file mode 100644 index 9e762fe48a..0000000000 --- a/compiler/cmm/CmmSpillReload.hs +++ /dev/null @@ -1,166 +0,0 @@ -{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts #-} --- Norman likes local bindings --- If this module lives on I'd like to get rid of this flag in due course - -{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} - --- TODO: Get rid of this flag: -{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} - -module CmmSpillReload - ( dualLivenessWithInsertion - ) -where - -import BlockId -import Cmm -import CmmUtils -import CmmLive -import OptimizationFuel - -import Control.Monad -import Outputable hiding (empty) -import qualified Outputable as PP -import UniqSet - -import Compiler.Hoopl hiding (Unique) -import Data.Maybe -import Prelude hiding (succ, zip) - -{- Note [Overview of spill/reload] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The point of this module is to insert spills and reloads to establish -the invariant that at a call or any proc point with an established -protocol all live variables not expected in registers are sitting on the -stack. We use a backward dual liveness analysis (both traditional -register liveness as well as register slot liveness on the stack) to -insert spills and reloads. It should be followed by a forward -transformation to sink reloads as deeply as possible, so as to reduce -register pressure: this transformation is performed by -CmmRewriteAssignments. - -A variable can be expected to be live in a register, live on the -stack, or both. This analysis ensures that spills and reloads are -inserted as needed to make sure that every live variable needed -after a call is available on the stack. Spills are placed immediately -after their reaching definitions, but reloads are placed immediately -after a return from a call (the entry point.) - -Note that we offer no guarantees about the consistency of the value -in memory and the value in the register, except that they are -equal across calls/procpoints. If the variable is changed, this -mapping breaks: but as the original value of the register may still -be useful in a different context, the memory location is not updated. --} - -data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet } - -changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive -changeStack f live = live { on_stack = f (on_stack live) } -changeRegs f live = live { in_regs = f (in_regs live) } - -dualLiveLattice :: DataflowLattice DualLive -dualLiveLattice = DataflowLattice "variables live in registers and on stack" empty add - where empty = DualLive emptyRegSet emptyRegSet - add _ (OldFact old) (NewFact new) = (changeIf $ change1 || change2, DualLive stack regs) - where (change1, stack) = add1 (on_stack old) (on_stack new) - (change2, regs) = add1 (in_regs old) (in_regs new) - add1 old new = if sizeUniqSet join > sizeUniqSet old then (True, join) else (False, old) - where join = unionUniqSets old new - -dualLivenessWithInsertion :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph -dualLivenessWithInsertion procPoints g = - liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice - (dualLiveTransfers (g_entry g) procPoints) - (insertSpillsAndReloads g procPoints) - --- Note [Live registers on entry to procpoints] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- Remember that the transfer function is only ever run on the rewritten --- version of a graph, and the rewrite function for spills and reloads --- enforces the invariant that no local registers are live on entry to --- a procpoint. Accordingly, we check for this invariant here. An old --- version of this code incorrectly claimed that any live registers were --- live on the stack before entering the function: this is wrong, but --- didn't cause bugs because it never actually was invoked. - -dualLiveTransfers :: BlockId -> BlockSet -> (BwdTransfer CmmNode DualLive) -dualLiveTransfers entry procPoints = mkBTransfer3 first middle last - where first :: CmmNode C O -> DualLive -> DualLive - first (CmmEntry id) live -- See Note [Live registers on entry to procpoints] - | id == entry || setMember id procPoints = noLiveOnEntry id (in_regs live) live - | otherwise = live - - middle :: CmmNode O O -> DualLive -> DualLive - middle m = changeStack updSlots - . changeRegs updRegs - where -- Reuse middle of liveness analysis from CmmLive - updRegs = case getBTransfer3 xferLive of (_, middle, _) -> middle m - - updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m - spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r - spill live _ = live - reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r - reload live _ = live - -- Ensure the assignment refers to the entirety of the - -- register slot (and not just a slice). - check (RegSlot (LocalReg _ ty), o, w) x - | o == w && w == widthInBytes (typeWidth ty) = x - check _ _ = panic "dualLiveTransfers: slices unsupported" - - -- Register analysis is identical to liveness analysis from CmmLive. - last :: CmmNode O C -> FactBase DualLive -> DualLive - last l fb = changeRegs (gen_kill l) $ case l of - CmmCall {cml_cont=Nothing} -> empty - CmmCall {cml_cont=Just k} -> keep_stack_only k - CmmForeignCall {succ=k} -> keep_stack_only k - _ -> joinOutFacts dualLiveLattice l fb - where empty = fact_bot dualLiveLattice - lkp k = fromMaybe empty (lookupFact k fb) - keep_stack_only k = DualLive (on_stack (lkp k)) emptyRegSet - -insertSpillsAndReloads :: CmmGraph -> BlockSet -> CmmBwdRewrite DualLive -insertSpillsAndReloads graph procPoints = deepBwdRw3 first middle nothing - -- Beware: deepBwdRw with one polymorphic function seems more reasonable here, - -- but GHC miscompiles it, see bug #4044. - where first :: CmmNode C O -> Fact O DualLive -> CmmReplGraph C O - first e@(CmmEntry id) live = return $ - if id /= (g_entry graph) && setMember id procPoints then - case map reload (uniqSetToList (in_regs live)) of - [] -> Nothing - is -> Just $ mkFirst e <*> mkMiddles is - else Nothing - -- EZY: There was some dead code for handling the case where - -- we were not splitting procedures. Check Git history if - -- you're interested (circa e26ea0f41). - - middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O - -- Don't add spills next to reloads. - middle (CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) _ | reg == reg' = return Nothing - -- Spill if register is live on stack. - middle m@(CmmAssign (CmmLocal reg) _) live - | reg `elemRegSet` on_stack live = return (Just (mkMiddles [m, spill reg])) - middle _ _ = return Nothing - - nothing _ _ = return Nothing - -spill, reload :: LocalReg -> CmmNode O O -spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) -reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r) - ---------------------- --- prettyprinting - -ppr_regs :: String -> RegSet -> SDoc -ppr_regs s regs = text s <+> commafy (map ppr $ uniqSetToList regs) - where commafy xs = hsep $ punctuate comma xs - -instance Outputable DualLive where - ppr (DualLive {in_regs = regs, on_stack = stack}) = - if isEmptyUniqSet regs && isEmptyUniqSet stack then - text "<nothing-live>" - else - nest 2 $ fsep [if isEmptyUniqSet regs then PP.empty - else (ppr_regs "live in regs =" regs), - if isEmptyUniqSet stack then PP.empty - else (ppr_regs "live on stack =" stack)] diff --git a/compiler/cmm/CmmStackLayout.hs b/compiler/cmm/CmmStackLayout.hs index c7fedad05b..726f98e8a3 100644 --- a/compiler/cmm/CmmStackLayout.hs +++ b/compiler/cmm/CmmStackLayout.hs @@ -35,7 +35,6 @@ import CmmProcPoint import Maybes import MkGraph (stackStubExpr) import Control.Monad -import OptimizationFuel import Outputable import SMRep (ByteOff) diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 0756c87583..f2e4d8e183 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -60,13 +60,14 @@ module CmmUtils( -- * Operations that probably don't belong here modifyGraph, - lastNode, replaceLastNode, insertBetween, + lastNode, replaceLastNode, ofBlockMap, toBlockMap, insertBlock, ofBlockList, toBlockList, bodyToBlockList, foldGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1, analFwd, analBwd, analRewFwd, analRewBwd, - dataflowPassFwd, dataflowPassBwd + dataflowPassFwd, dataflowPassBwd, dataflowAnalFwd, dataflowAnalBwd, + dataflowAnalFwdBlocks ) where #include "HsVersions.h" @@ -79,7 +80,6 @@ import Cmm import BlockId import CLabel import Outputable -import OptimizationFuel as F import Unique import UniqSupply import Constants( wORD_SIZE, tAG_MASK ) @@ -88,8 +88,7 @@ import Util import Data.Word import Data.Maybe import Data.Bits -import Control.Monad -import Compiler.Hoopl hiding ( Unique ) +import Hoopl --------------------------------------------------- -- @@ -402,13 +401,13 @@ mkLiveness (reg:regs) modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n' modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)} -toBlockMap :: CmmGraph -> LabelMap CmmBlock +toBlockMap :: CmmGraph -> BlockEnv CmmBlock toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body -ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph +ofBlockMap :: BlockId -> BlockEnv CmmBlock -> CmmGraph ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO} -insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock +insertBlock :: CmmBlock -> BlockEnv CmmBlock -> BlockEnv CmmBlock insertBlock block map = ASSERT (isNothing $ mapLookup id map) mapInsert id block map @@ -418,7 +417,8 @@ toBlockList :: CmmGraph -> [CmmBlock] toBlockList g = mapElems $ toBlockMap g ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph -ofBlockList entry blocks = CmmGraph {g_entry=entry, g_graph=GMany NothingO body NothingO} +ofBlockList entry blocks = CmmGraph { g_entry = entry + , g_graph = GMany NothingO body NothingO } where body = foldr addBlock emptyBody blocks bodyToBlockList :: Body CmmNode -> [CmmBlock] @@ -429,97 +429,77 @@ mapGraphNodes :: ( CmmNode C O -> CmmNode C O , CmmNode O C -> CmmNode O C) -> CmmGraph -> CmmGraph mapGraphNodes funs@(mf,_,_) g = - ofBlockMap (entryLabel $ mf $ CmmEntry $ g_entry g) $ mapMap (blockMapNodes3 funs) $ toBlockMap g + ofBlockMap (entryLabel $ mf $ CmmEntry $ g_entry g) $ mapMap (mapBlock3' funs) $ toBlockMap g mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGraph -mapGraphNodes1 f g = modifyGraph (graphMapBlocks (blockMapNodes f)) g +mapGraphNodes1 f = modifyGraph (mapGraph f) foldGraphBlocks :: (CmmBlock -> a -> a) -> a -> CmmGraph -> a foldGraphBlocks k z g = mapFold k z $ toBlockMap g postorderDfs :: CmmGraph -> [CmmBlock] -postorderDfs g = postorder_dfs_from (toBlockMap g) (g_entry g) - -------------------------------------------------- --- Manipulating CmmBlocks - -lastNode :: CmmBlock -> CmmNode O C -lastNode block = foldBlockNodesF3 (nothing, nothing, const) block () - where nothing :: a -> b -> () - nothing _ _ = () - -replaceLastNode :: Block CmmNode e C -> CmmNode O C -> Block CmmNode e C -replaceLastNode block last = blockOfNodeList (first, middle, JustC last) - where (first, middle, _) = blockToNodeList block - ----------------------------------------------------------------------- ------ Splicing between blocks --- Given a middle node, a block, and a successor BlockId, --- we can insert the middle node between the block and the successor. --- We return the updated block and a list of new blocks that must be added --- to the graph. --- The semantics is a bit tricky. We consider cases on the last node: --- o For a branch, we can just insert before the branch, --- but sometimes the optimizer does better if we actually insert --- a fresh basic block, enabling some common blockification. --- o For a conditional branch, switch statement, or call, we must insert --- a new basic block. --- o For a jump or return, this operation is impossible. - -insertBetween :: MonadUnique m => CmmBlock -> [CmmNode O O] -> BlockId -> m (CmmBlock, [CmmBlock]) -insertBetween b ms succId = insert $ lastNode b - where insert :: MonadUnique m => CmmNode O C -> m (CmmBlock, [CmmBlock]) - insert (CmmBranch bid) = - if bid == succId then - do (bid', bs) <- newBlocks - return (replaceLastNode b (CmmBranch bid'), bs) - else panic "tried invalid block insertBetween" - insert (CmmCondBranch c t f) = - do (t', tbs) <- if t == succId then newBlocks else return $ (t, []) - (f', fbs) <- if f == succId then newBlocks else return $ (f, []) - return (replaceLastNode b (CmmCondBranch c t' f'), tbs ++ fbs) - insert (CmmSwitch e ks) = - do (ids, bs) <- mapAndUnzipM mbNewBlocks ks - return (replaceLastNode b (CmmSwitch e ids), join bs) - insert (CmmCall {}) = - panic "unimp: insertBetween after a call -- probably not a good idea" - insert (CmmForeignCall {}) = - panic "unimp: insertBetween after a foreign call -- probably not a good idea" - - newBlocks :: MonadUnique m => m (BlockId, [CmmBlock]) - newBlocks = do id <- liftM mkBlockId $ getUniqueM - return $ (id, [blockOfNodeList (JustC (CmmEntry id), ms, JustC (CmmBranch succId))]) - mbNewBlocks :: MonadUnique m => Maybe BlockId -> m (Maybe BlockId, [CmmBlock]) - mbNewBlocks (Just k) = if k == succId then liftM fstJust newBlocks - else return (Just k, []) - mbNewBlocks Nothing = return (Nothing, []) - fstJust (id, bs) = (Just id, bs) +postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g) ------------------------------------------------- -- Running dataflow analysis and/or rewrites -- Constructing forward and backward analysis-only pass -analFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdPass m n f -analBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdPass m n f +analFwd :: DataflowLattice f -> FwdTransfer n f -> FwdPass UniqSM n f +analBwd :: DataflowLattice f -> BwdTransfer n f -> BwdPass UniqSM n f analFwd lat xfer = analRewFwd lat xfer noFwdRewrite analBwd lat xfer = analRewBwd lat xfer noBwdRewrite -- Constructing forward and backward analysis + rewrite pass -analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdRewrite m n f -> FwdPass m n f -analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdRewrite m n f -> BwdPass m n f +analRewFwd :: DataflowLattice f -> FwdTransfer n f + -> FwdRewrite UniqSM n f + -> FwdPass UniqSM n f + +analRewBwd :: DataflowLattice f + -> BwdTransfer n f + -> BwdRewrite UniqSM n f + -> BwdPass UniqSM n f analRewFwd lat xfer rew = FwdPass {fp_lattice = lat, fp_transfer = xfer, fp_rewrite = rew} analRewBwd lat xfer rew = BwdPass {bp_lattice = lat, bp_transfer = xfer, bp_rewrite = rew} -- Running forward and backward dataflow analysis + optional rewrite -dataflowPassFwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> FwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f) +dataflowPassFwd :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> FwdPass UniqSM n f + -> UniqSM (GenCmmGraph n, BlockEnv f) dataflowPassFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) return (CmmGraph {g_entry=entry, g_graph=graph}, facts) -dataflowPassBwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> BwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f) +dataflowAnalFwd :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> FwdPass UniqSM n f + -> BlockEnv f +dataflowAnalFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = + analyzeFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) + +dataflowAnalFwdBlocks :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> FwdPass UniqSM n f + -> UniqSM (BlockEnv f) +dataflowAnalFwdBlocks (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do +-- (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) +-- return facts + return (analyzeFwdBlocks fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)) + +dataflowAnalBwd :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> BwdPass UniqSM n f + -> BlockEnv f +dataflowAnalBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = + analyzeBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts) + +dataflowPassBwd :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> BwdPass UniqSM n f + -> UniqSM (GenCmmGraph n, BlockEnv f) dataflowPassBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = do (graph, facts, NothingO) <- analyzeAndRewriteBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts) return (CmmGraph {g_entry=entry, g_graph=graph}, facts) diff --git a/compiler/cmm/Hoopl.hs b/compiler/cmm/Hoopl.hs new file mode 100644 index 0000000000..0eca85cb8a --- /dev/null +++ b/compiler/cmm/Hoopl.hs @@ -0,0 +1,125 @@ +module Hoopl ( + module Compiler.Hoopl, + module Hoopl.Dataflow, + deepFwdRw, deepFwdRw3, + deepBwdRw, deepBwdRw3, + thenFwdRw + ) where + +import Compiler.Hoopl hiding + ( Unique, + FwdTransfer(..), FwdRewrite(..), FwdPass(..), + BwdTransfer(..), BwdRewrite(..), BwdPass(..), + noFwdRewrite, noBwdRewrite, + analyzeAndRewriteFwd, analyzeAndRewriteBwd, + mkFactBase, Fact, + mkBRewrite, mkBRewrite3, mkBTransfer, mkBTransfer3, + mkFRewrite, mkFRewrite3, mkFTransfer, mkFTransfer3, + deepFwdRw, deepFwdRw3, thenFwdRw, iterFwdRw, + deepBwdRw, deepBwdRw3, thenBwdRw, iterBwdRw + ) + +import Hoopl.Dataflow +import Control.Monad +import UniqSupply + +deepFwdRw3 :: (n C O -> f -> UniqSM (Maybe (Graph n C O))) + -> (n O O -> f -> UniqSM (Maybe (Graph n O O))) + -> (n O C -> f -> UniqSM (Maybe (Graph n O C))) + -> (FwdRewrite UniqSM n f) +deepFwdRw :: (forall e x . n e x -> f -> UniqSM (Maybe (Graph n e x))) -> FwdRewrite UniqSM n f +deepFwdRw3 f m l = iterFwdRw $ mkFRewrite3 f m l +deepFwdRw f = deepFwdRw3 f f f + +-- N.B. rw3, rw3', and rw3a are triples of functions. +-- But rw and rw' are single functions. +thenFwdRw :: forall n f. + FwdRewrite UniqSM n f + -> FwdRewrite UniqSM n f + -> FwdRewrite UniqSM n f +thenFwdRw rw3 rw3' = wrapFR2 thenrw rw3 rw3' + where + thenrw :: forall e x t t1. + (t -> t1 -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f))) + -> (t -> t1 -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f))) + -> t + -> t1 + -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f)) + thenrw rw rw' n f = rw n f >>= fwdRes + where fwdRes Nothing = rw' n f + fwdRes (Just gr) = return $ Just $ fadd_rw rw3' gr + +iterFwdRw :: forall n f. + FwdRewrite UniqSM n f + -> FwdRewrite UniqSM n f +iterFwdRw rw3 = wrapFR iter rw3 + where iter :: forall a e x t. + (t -> a -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f))) + -> t + -> a + -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f)) + iter rw n = (liftM $ liftM $ fadd_rw (iterFwdRw rw3)) . rw n + +-- | Function inspired by 'rew' in the paper +_frewrite_cps :: ((Graph n e x, FwdRewrite UniqSM n f) -> UniqSM a) + -> UniqSM a + -> (forall e x . n e x -> f -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f))) + -> n e x + -> f + -> UniqSM a +_frewrite_cps j n rw node f = + do mg <- rw node f + case mg of Nothing -> n + Just gr -> j gr + + + +-- | Function inspired by 'add' in the paper +fadd_rw :: FwdRewrite UniqSM n f + -> (Graph n e x, FwdRewrite UniqSM n f) + -> (Graph n e x, FwdRewrite UniqSM n f) +fadd_rw rw2 (g, rw1) = (g, rw1 `thenFwdRw` rw2) + + + +deepBwdRw3 :: + (n C O -> f -> UniqSM (Maybe (Graph n C O))) + -> (n O O -> f -> UniqSM (Maybe (Graph n O O))) + -> (n O C -> FactBase f -> UniqSM (Maybe (Graph n O C))) + -> (BwdRewrite UniqSM n f) +deepBwdRw :: (forall e x . n e x -> Fact x f -> UniqSM (Maybe (Graph n e x))) + -> BwdRewrite UniqSM n f +deepBwdRw3 f m l = iterBwdRw $ mkBRewrite3 f m l +deepBwdRw f = deepBwdRw3 f f f + + +thenBwdRw :: forall n f. BwdRewrite UniqSM n f -> BwdRewrite UniqSM n f -> BwdRewrite UniqSM n f +thenBwdRw rw1 rw2 = wrapBR2 f rw1 rw2 + where f :: forall t t1 t2 e x. + t + -> (t1 -> t2 -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f))) + -> (t1 -> t2 -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f))) + -> t1 + -> t2 + -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f)) + f _ rw1 rw2' n f = do + res1 <- rw1 n f + case res1 of + Nothing -> rw2' n f + Just gr -> return $ Just $ badd_rw rw2 gr + +iterBwdRw :: forall n f. BwdRewrite UniqSM n f -> BwdRewrite UniqSM n f +iterBwdRw rw = wrapBR f rw + where f :: forall t e x t1 t2. + t + -> (t1 -> t2 -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f))) + -> t1 + -> t2 + -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f)) + f _ rw' n f = liftM (liftM (badd_rw (iterBwdRw rw))) (rw' n f) + +-- | Function inspired by 'add' in the paper +badd_rw :: BwdRewrite UniqSM n f + -> (Graph n e x, BwdRewrite UniqSM n f) + -> (Graph n e x, BwdRewrite UniqSM n f) +badd_rw rw2 (g, rw1) = (g, rw1 `thenBwdRw` rw2) diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs new file mode 100644 index 0000000000..9745eac9d8 --- /dev/null +++ b/compiler/cmm/Hoopl/Dataflow.hs @@ -0,0 +1,887 @@ +-- +-- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones, +-- and Norman Ramsey +-- +-- Modifications copyright (c) The University of Glasgow 2012 +-- +-- This module is a specialised and optimised version of +-- Compiler.Hoopl.Dataflow in the hoopl package. In particular it is +-- specialised to the UniqSM monad. +-- + +{-# LANGUAGE RankNTypes, ScopedTypeVariables, GADTs, EmptyDataDecls, PatternGuards, TypeFamilies, MultiParamTypeClasses #-} +#if __GLASGOW_HASKELL__ >= 703 +{-# OPTIONS_GHC -fprof-auto-top #-} +#endif +#if __GLASGOW_HASKELL__ >= 701 +{-# LANGUAGE Trustworthy #-} +#endif + +module Hoopl.Dataflow + ( DataflowLattice(..), OldFact(..), NewFact(..), Fact, mkFactBase + , ChangeFlag(..) + , FwdPass(..), FwdTransfer, mkFTransfer, mkFTransfer3, getFTransfer3 + -- * Respecting Fuel + + -- $fuel + , FwdRewrite, mkFRewrite, mkFRewrite3, getFRewrite3, noFwdRewrite + , wrapFR, wrapFR2 + , BwdPass(..), BwdTransfer, mkBTransfer, mkBTransfer3, getBTransfer3 + , wrapBR, wrapBR2 + , BwdRewrite, mkBRewrite, mkBRewrite3, getBRewrite3, noBwdRewrite + , analyzeAndRewriteFwd, analyzeAndRewriteBwd + , analyzeFwd, analyzeFwdBlocks, analyzeBwd + ) +where + +import UniqSupply + +import Data.Maybe +import Data.Array + +import Compiler.Hoopl hiding + ( mkBRewrite3, mkFRewrite3, noFwdRewrite, noBwdRewrite + , analyzeAndRewriteBwd, analyzeAndRewriteFwd + ) +import Compiler.Hoopl.Internals + ( wrapFR, wrapFR2 + , wrapBR, wrapBR2 + , splice + ) + + +-- ----------------------------------------------------------------------------- + +noRewrite :: a -> b -> UniqSM (Maybe c) +noRewrite _ _ = return Nothing + +noFwdRewrite :: FwdRewrite UniqSM n f +noFwdRewrite = FwdRewrite3 (noRewrite, noRewrite, noRewrite) + +-- | Functions passed to 'mkFRewrite3' should not be aware of the fuel supply. +-- The result returned by 'mkFRewrite3' respects fuel. +mkFRewrite3 :: forall n f. + (n C O -> f -> UniqSM (Maybe (Graph n C O))) + -> (n O O -> f -> UniqSM (Maybe (Graph n O O))) + -> (n O C -> f -> UniqSM (Maybe (Graph n O C))) + -> FwdRewrite UniqSM n f +mkFRewrite3 f m l = FwdRewrite3 (lift f, lift m, lift l) + where lift :: forall t t1 a. (t -> t1 -> UniqSM (Maybe a)) + -> t -> t1 -> UniqSM (Maybe (a, FwdRewrite UniqSM n f)) + {-# INLINE lift #-} + lift rw node fact = do + a <- rw node fact + case a of + Nothing -> return Nothing + Just a -> return (Just (a,noFwdRewrite)) + +noBwdRewrite :: BwdRewrite UniqSM n f +noBwdRewrite = BwdRewrite3 (noRewrite, noRewrite, noRewrite) + +mkBRewrite3 :: forall n f. + (n C O -> f -> UniqSM (Maybe (Graph n C O))) + -> (n O O -> f -> UniqSM (Maybe (Graph n O O))) + -> (n O C -> FactBase f -> UniqSM (Maybe (Graph n O C))) + -> BwdRewrite UniqSM n f +mkBRewrite3 f m l = BwdRewrite3 (lift f, lift m, lift l) + where lift :: forall t t1 a. (t -> t1 -> UniqSM (Maybe a)) + -> t -> t1 -> UniqSM (Maybe (a, BwdRewrite UniqSM n f)) + {-# INLINE lift #-} + lift rw node fact = do + a <- rw node fact + case a of + Nothing -> return Nothing + Just a -> return (Just (a,noBwdRewrite)) + +----------------------------------------------------------------------------- +-- Analyze and rewrite forward: the interface +----------------------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +-- be no other entry point, or all goes horribly wrong... +analyzeAndRewriteFwd + :: forall n f e x . NonLocal n => + FwdPass UniqSM n f + -> MaybeC e [Label] + -> Graph n e x -> Fact e f + -> UniqSM (Graph n e x, FactBase f, MaybeO x f) +analyzeAndRewriteFwd pass entries g f = + do (rg, fout) <- arfGraph pass (fmap targetLabels entries) g f + let (g', fb) = normalizeGraph rg + return (g', fb, distinguishedExitFact g' fout) + +distinguishedExitFact :: forall n e x f . Graph n e x -> Fact x f -> MaybeO x f +distinguishedExitFact g f = maybe g + where maybe :: Graph n e x -> MaybeO x f + maybe GNil = JustO f + maybe (GUnit {}) = JustO f + maybe (GMany _ _ x) = case x of NothingO -> NothingO + JustO _ -> JustO f + +---------------------------------------------------------------- +-- Forward Implementation +---------------------------------------------------------------- + +type Entries e = MaybeC e [Label] + +arfGraph :: forall n f e x . NonLocal n => + FwdPass UniqSM n f -> + Entries e -> Graph n e x -> Fact e f -> UniqSM (DG f n e x, Fact x f) +arfGraph pass@FwdPass { fp_lattice = lattice, + fp_transfer = transfer, + fp_rewrite = rewrite } entries g in_fact = graph g in_fact + where + {- nested type synonyms would be so lovely here + type ARF thing = forall e x . thing e x -> f -> m (DG f n e x, Fact x f) + type ARFX thing = forall e x . thing e x -> Fact e f -> m (DG f n e x, Fact x f) + -} + graph :: Graph n e x -> Fact e f -> UniqSM (DG f n e x, Fact x f) + block :: forall e x . + Block n e x -> f -> UniqSM (DG f n e x, Fact x f) + + body :: [Label] -> LabelMap (Block n C C) + -> Fact C f -> UniqSM (DG f n C C, Fact C f) + -- Outgoing factbase is restricted to Labels *not* in + -- in the Body; the facts for Labels *in* + -- the Body are in the 'DG f n C C' + + cat :: forall e a x f1 f2 f3. + (f1 -> UniqSM (DG f n e a, f2)) + -> (f2 -> UniqSM (DG f n a x, f3)) + -> (f1 -> UniqSM (DG f n e x, f3)) + + graph GNil f = return (dgnil, f) + graph (GUnit blk) f = block blk f + graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f + where + ebcat :: MaybeO e (Block n O C) -> Body n -> Fact e f -> UniqSM (DG f n e C, Fact C f) + exit :: MaybeO x (Block n C O) -> Fact C f -> UniqSM (DG f n C x, Fact x f) + exit (JustO blk) f = arfx block blk f + exit NothingO f = return (dgnilC, f) + ebcat entry bdy f = c entries entry f + where c :: MaybeC e [Label] -> MaybeO e (Block n O C) + -> Fact e f -> UniqSM (DG f n e C, Fact C f) + c NothingC (JustO entry) f = (block entry `cat` body (successors entry) bdy) f + c (JustC entries) NothingO f = body entries bdy f + c _ _ _ = error "bogus GADT pattern match failure" + + -- Lift from nodes to blocks + block BNil f = return (dgnil, f) + block (BlockCO n b) f = (node n `cat` block b) f + block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f + block (BlockOC b n) f = (block b `cat` node n) f + + block (BMiddle n) f = node n f + block (BCat b1 b2) f = (block b1 `cat` block b2) f + block (BHead h n) f = (block h `cat` node n) f + block (BTail n t) f = (node n `cat` block t) f + + {-# INLINE node #-} + node :: forall e x . (ShapeLifter e x) + => n e x -> f -> UniqSM (DG f n e x, Fact x f) + node n f + = do { grw <- frewrite rewrite n f + ; case grw of + Nothing -> return ( singletonDG f n + , ftransfer transfer n f ) + Just (g, rw) -> + let pass' = pass { fp_rewrite = rw } + f' = fwdEntryFact n f + in arfGraph pass' (fwdEntryLabel n) g f' } + + -- | Compose fact transformers and concatenate the resulting + -- rewritten graphs. + {-# INLINE cat #-} + cat ft1 ft2 f = do { (g1,f1) <- ft1 f + ; (g2,f2) <- ft2 f1 + ; let !g = g1 `dgSplice` g2 + ; return (g, f2) } + + arfx :: forall x . + (Block n C x -> f -> UniqSM (DG f n C x, Fact x f)) + -> (Block n C x -> Fact C f -> UniqSM (DG f n C x, Fact x f)) + arfx arf thing fb = + arf thing $ fromJust $ lookupFact (entryLabel thing) $ joinInFacts lattice fb + -- joinInFacts adds debugging information + + + -- Outgoing factbase is restricted to Labels *not* in + -- in the Body; the facts for Labels *in* + -- the Body are in the 'DG f n C C' + body entries blockmap init_fbase + = fixpoint Fwd lattice do_block entries blockmap init_fbase + where + lattice = fp_lattice pass + do_block :: forall x . Block n C x -> FactBase f + -> UniqSM (DG f n C x, Fact x f) + do_block b fb = block b entryFact + where entryFact = getFact lattice (entryLabel b) fb + + +-- Join all the incoming facts with bottom. +-- We know the results _shouldn't change_, but the transfer +-- functions might, for example, generate some debugging traces. +joinInFacts :: DataflowLattice f -> FactBase f -> FactBase f +joinInFacts (lattice @ DataflowLattice {fact_bot = bot, fact_join = fj}) fb = + mkFactBase lattice $ map botJoin $ mapToList fb + where botJoin (l, f) = (l, snd $ fj l (OldFact bot) (NewFact f)) + +forwardBlockList :: (NonLocal n) + => [Label] -> Body n -> [Block n C C] +-- This produces a list of blocks in order suitable for forward analysis, +-- along with the list of Labels it may depend on for facts. +forwardBlockList entries blks = postorder_dfs_from blks entries + +---------------------------------------------------------------- +-- Forward Analysis only +---------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +-- be no other entry point, or all goes horribly wrong... +analyzeFwd + :: forall n f e . NonLocal n => + FwdPass UniqSM n f + -> MaybeC e [Label] + -> Graph n e C -> Fact e f + -> FactBase f +analyzeFwd FwdPass { fp_lattice = lattice, + fp_transfer = FwdTransfer3 (ftr, mtr, ltr) } + entries g in_fact = graph g in_fact + where + graph :: Graph n e C -> Fact e f -> FactBase f + graph (GMany entry blockmap NothingO) + = case (entries, entry) of + (NothingC, JustO entry) -> block entry `cat` body (successors entry) + (JustC entries, NothingO) -> body entries + _ -> error "bogus GADT pattern match failure" + where + body :: [Label] -> Fact C f -> Fact C f + body entries f + = fixpointAnal Fwd lattice do_block entries blockmap f + where + do_block :: forall x . Block n C x -> FactBase f -> Fact x f + do_block b fb = block b entryFact + where entryFact = getFact lattice (entryLabel b) fb + + -- NB. eta-expand block, GHC can't do this by itself. See #5809. + block :: forall e x . Block n e x -> f -> Fact x f + block BNil f = f + block (BlockCO n b) f = (ftr n `cat` block b) f + block (BlockCC l b n) f = (ftr l `cat` (block b `cat` ltr n)) f + block (BlockOC b n) f = (block b `cat` ltr n) f + + block (BMiddle n) f = mtr n f + block (BCat b1 b2) f = (block b1 `cat` block b2) f + block (BHead h n) f = (block h `cat` mtr n) f + block (BTail n t) f = (mtr n `cat` block t) f + + {-# INLINE cat #-} + cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3) + cat ft1 ft2 = \f -> ft2 $! ft1 f + +-- | if the graph being analyzed is open at the entry, there must +-- be no other entry point, or all goes horribly wrong... +analyzeFwdBlocks + :: forall n f e . NonLocal n => + FwdPass UniqSM n f + -> MaybeC e [Label] + -> Graph n e C -> Fact e f + -> FactBase f +analyzeFwdBlocks FwdPass { fp_lattice = lattice, + fp_transfer = FwdTransfer3 (ftr, _, ltr) } + entries g in_fact = graph g in_fact + where + graph :: Graph n e C -> Fact e f -> FactBase f + graph (GMany entry blockmap NothingO) + = case (entries, entry) of + (NothingC, JustO entry) -> block entry `cat` body (successors entry) + (JustC entries, NothingO) -> body entries + _ -> error "bogus GADT pattern match failure" + where + body :: [Label] -> Fact C f -> Fact C f + body entries f + = fixpointAnal Fwd lattice do_block entries blockmap f + where + do_block :: forall x . Block n C x -> FactBase f -> Fact x f + do_block b fb = block b entryFact + where entryFact = getFact lattice (entryLabel b) fb + + -- NB. eta-expand block, GHC can't do this by itself. See #5809. + block :: forall e x . Block n e x -> f -> Fact x f + block BNil f = f + block (BlockCO n _) f = ftr n f + block (BlockCC l _ n) f = (ftr l `cat` ltr n) f + block (BlockOC _ n) f = ltr n f + block _ _ = error "analyzeFwdBlocks" + + {-# INLINE cat #-} + cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3) + cat ft1 ft2 = \f -> ft2 $! ft1 f + +---------------------------------------------------------------- +-- Backward Analysis only +---------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +-- be no other entry point, or all goes horribly wrong... +analyzeBwd + :: forall n f e . NonLocal n => + BwdPass UniqSM n f + -> MaybeC e [Label] + -> Graph n e C -> Fact C f + -> FactBase f +analyzeBwd BwdPass { bp_lattice = lattice, + bp_transfer = BwdTransfer3 (ftr, mtr, ltr) } + entries g in_fact = graph g in_fact + where + graph :: Graph n e C -> Fact C f -> FactBase f + graph (GMany entry blockmap NothingO) + = case (entries, entry) of + (NothingC, JustO entry) -> body (successors entry) + (JustC entries, NothingO) -> body entries + _ -> error "bogus GADT pattern match failure" + where + body :: [Label] -> Fact C f -> Fact C f + body entries f + = fixpointAnal Bwd lattice do_block entries blockmap f + where + do_block :: forall x . Block n C x -> Fact x f -> FactBase f + do_block b fb = mapSingleton (entryLabel b) (block b fb) + + -- NB. eta-expand block, GHC can't do this by itself. See #5809. + block :: forall e x . Block n e x -> Fact x f -> f + block BNil f = f + block (BlockCO n b) f = (ftr n `cat` block b) f + block (BlockCC l b n) f = ((ftr l `cat` block b) `cat` ltr n) f + block (BlockOC b n) f = (block b `cat` ltr n) f + + block (BMiddle n) f = mtr n f + block (BCat b1 b2) f = (block b1 `cat` block b2) f + block (BHead h n) f = (block h `cat` mtr n) f + block (BTail n t) f = (mtr n `cat` block t) f + + {-# INLINE cat #-} + cat :: forall f1 f2 f3 . (f2 -> f3) -> (f1 -> f2) -> (f1 -> f3) + cat ft1 ft2 = \f -> ft1 $! ft2 f + +----------------------------------------------------------------------------- +-- Backward analysis and rewriting: the interface +----------------------------------------------------------------------------- + + +-- | if the graph being analyzed is open at the exit, I don't +-- quite understand the implications of possible other exits +analyzeAndRewriteBwd + :: NonLocal n + => BwdPass UniqSM n f + -> MaybeC e [Label] -> Graph n e x -> Fact x f + -> UniqSM (Graph n e x, FactBase f, MaybeO e f) +analyzeAndRewriteBwd pass entries g f = + do (rg, fout) <- arbGraph pass (fmap targetLabels entries) g f + let (g', fb) = normalizeGraph rg + return (g', fb, distinguishedEntryFact g' fout) + +distinguishedEntryFact :: forall n e x f . Graph n e x -> Fact e f -> MaybeO e f +distinguishedEntryFact g f = maybe g + where maybe :: Graph n e x -> MaybeO e f + maybe GNil = JustO f + maybe (GUnit {}) = JustO f + maybe (GMany e _ _) = case e of NothingO -> NothingO + JustO _ -> JustO f + + +----------------------------------------------------------------------------- +-- Backward implementation +----------------------------------------------------------------------------- + +arbGraph :: forall n f e x . + NonLocal n => + BwdPass UniqSM n f -> + Entries e -> Graph n e x -> Fact x f -> UniqSM (DG f n e x, Fact e f) +arbGraph pass@BwdPass { bp_lattice = lattice, + bp_transfer = transfer, + bp_rewrite = rewrite } entries g in_fact = graph g in_fact + where + {- nested type synonyms would be so lovely here + type ARB thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, f) + type ARBX thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, Fact e f) + -} + graph :: Graph n e x -> Fact x f -> UniqSM (DG f n e x, Fact e f) + block :: forall e x . Block n e x -> Fact x f -> UniqSM (DG f n e x, f) + body :: [Label] -> Body n -> Fact C f -> UniqSM (DG f n C C, Fact C f) + node :: forall e x . (ShapeLifter e x) + => n e x -> Fact x f -> UniqSM (DG f n e x, f) + cat :: forall e a x info info' info''. + (info' -> UniqSM (DG f n e a, info'')) + -> (info -> UniqSM (DG f n a x, info')) + -> (info -> UniqSM (DG f n e x, info'')) + + graph GNil f = return (dgnil, f) + graph (GUnit blk) f = block blk f + graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f + where + ebcat :: MaybeO e (Block n O C) -> Body n -> Fact C f -> UniqSM (DG f n e C, Fact e f) + exit :: MaybeO x (Block n C O) -> Fact x f -> UniqSM (DG f n C x, Fact C f) + exit (JustO blk) f = arbx block blk f + exit NothingO f = return (dgnilC, f) + ebcat entry bdy f = c entries entry f + where c :: MaybeC e [Label] -> MaybeO e (Block n O C) + -> Fact C f -> UniqSM (DG f n e C, Fact e f) + c NothingC (JustO entry) f = (block entry `cat` body (successors entry) bdy) f + c (JustC entries) NothingO f = body entries bdy f + c _ _ _ = error "bogus GADT pattern match failure" + + -- Lift from nodes to blocks + block BNil f = return (dgnil, f) + block (BlockCO n b) f = (node n `cat` block b) f + block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f + block (BlockOC b n) f = (block b `cat` node n) f + + block (BMiddle n) f = node n f + block (BCat b1 b2) f = (block b1 `cat` block b2) f + block (BHead h n) f = (block h `cat` node n) f + block (BTail n t) f = (node n `cat` block t) f + + {-# INLINE node #-} + node n f + = do { bwdres <- brewrite rewrite n f + ; case bwdres of + Nothing -> return (singletonDG entry_f n, entry_f) + where entry_f = btransfer transfer n f + Just (g, rw) -> + do { let pass' = pass { bp_rewrite = rw } + ; (g, f) <- arbGraph pass' (fwdEntryLabel n) g f + ; return (g, bwdEntryFact lattice n f)} } + + -- | Compose fact transformers and concatenate the resulting + -- rewritten graphs. + {-# INLINE cat #-} + cat ft1 ft2 f = do { (g2,f2) <- ft2 f + ; (g1,f1) <- ft1 f2 + ; let !g = g1 `dgSplice` g2 + ; return (g, f1) } + + arbx :: forall x . + (Block n C x -> Fact x f -> UniqSM (DG f n C x, f)) + -> (Block n C x -> Fact x f -> UniqSM (DG f n C x, Fact C f)) + + arbx arb thing f = do { (rg, f) <- arb thing f + ; let fb = joinInFacts (bp_lattice pass) $ + mapSingleton (entryLabel thing) f + ; return (rg, fb) } + -- joinInFacts adds debugging information + + -- Outgoing factbase is restricted to Labels *not* in + -- in the Body; the facts for Labels *in* + -- the Body are in the 'DG f n C C' + body entries blockmap init_fbase + = fixpoint Bwd (bp_lattice pass) do_block entries blockmap init_fbase + where + do_block :: forall x. Block n C x -> Fact x f -> UniqSM (DG f n C x, LabelMap f) + do_block b f = do (g, f) <- block b f + return (g, mapSingleton (entryLabel b) f) + + +{- + +The forward and backward cases are not dual. In the forward case, the +entry points are known, and one simply traverses the body blocks from +those points. In the backward case, something is known about the exit +points, but this information is essentially useless, because we don't +actually have a dual graph (that is, one with edges reversed) to +compute with. (Even if we did have a dual graph, it would not avail +us---a backward analysis must include reachable blocks that don't +reach the exit, as in a procedure that loops forever and has side +effects.) + +-} + +----------------------------------------------------------------------------- +-- fixpoint +----------------------------------------------------------------------------- + +data Direction = Fwd | Bwd + +-- | fixpointing for analysis-only +-- +fixpointAnal :: forall n f. NonLocal n + => Direction + -> DataflowLattice f + -> (Block n C C -> Fact C f -> Fact C f) + -> [Label] + -> LabelMap (Block n C C) + -> Fact C f -> FactBase f + +fixpointAnal direction DataflowLattice{ fact_bot = _, fact_join = join } + do_block entries blockmap init_fbase + = loop start init_fbase + where + blocks = sortBlocks direction entries blockmap + n = length blocks + block_arr = {-# SCC "block_arr" #-} listArray (0,n-1) blocks + start = {-# SCC "start" #-} [0..n-1] + dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks + + loop + :: IntHeap -- blocks still to analyse + -> FactBase f -- current factbase (increases monotonically) + -> FactBase f + + loop [] fbase = fbase + loop (ix:todo) fbase = + let + blk = block_arr ! ix + + out_facts = {-# SCC "do_block" #-} do_block blk fbase + + !(todo', fbase') = {-# SCC "mapFoldWithKey" #-} + mapFoldWithKey (updateFact join dep_blocks) + (todo,fbase) out_facts + in + -- trace ("analysing: " ++ show (entryLabel blk)) $ + -- trace ("fbase': " ++ show (mapKeys fbase')) $ return () + -- trace ("changed: " ++ show changed) $ return () + -- trace ("to analyse: " ++ show to_analyse) $ return () + + loop todo' fbase' + + +-- | fixpointing for combined analysis/rewriting +-- +fixpoint :: forall n f. NonLocal n + => Direction + -> DataflowLattice f + -> (Block n C C -> Fact C f -> UniqSM (DG f n C C, Fact C f)) + -> [Label] + -> LabelMap (Block n C C) + -> (Fact C f -> UniqSM (DG f n C C, Fact C f)) + +fixpoint direction DataflowLattice{ fact_bot = _, fact_join = join } + do_block entries blockmap init_fbase + = do + -- trace ("fixpoint: " ++ show (case direction of Fwd -> True; Bwd -> False) ++ " " ++ show (mapKeys blockmap) ++ show entries ++ " " ++ show (mapKeys init_fbase)) $ return() + (fbase, newblocks) <- loop start init_fbase mapEmpty + -- trace ("fixpoint DONE: " ++ show (mapKeys fbase) ++ show (mapKeys newblocks)) $ return() + return (GMany NothingO newblocks NothingO, + mapDeleteList (mapKeys blockmap) fbase) + -- The successors of the Graph are the the Labels + -- for which we have facts and which are *not* in + -- the blocks of the graph + where + blocks = sortBlocks direction entries blockmap + n = length blocks + block_arr = {-# SCC "block_arr" #-} listArray (0,n-1) blocks + start = {-# SCC "start" #-} [0..n-1] + dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks + + loop + :: IntHeap + -> FactBase f -- current factbase (increases monotonically) + -> LabelMap (DBlock f n C C) -- transformed graph + -> UniqSM (FactBase f, LabelMap (DBlock f n C C)) + + loop [] fbase newblocks = return (fbase, newblocks) + loop (ix:todo) fbase !newblocks = do + let blk = block_arr ! ix + + -- trace ("analysing: " ++ show (entryLabel blk)) $ return () + (rg, out_facts) <- do_block blk fbase + let !(todo', fbase') = + mapFoldWithKey (updateFact join dep_blocks) + (todo,fbase) out_facts + + -- trace ("fbase': " ++ show (mapKeys fbase')) $ return () + -- trace ("changed: " ++ show changed) $ return () + -- trace ("to analyse: " ++ show to_analyse) $ return () + + let newblocks' = case rg of + GMany _ blks _ -> mapUnion blks newblocks + + loop todo' fbase' newblocks' + + +{- Note [TxFactBase invariants] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The TxFactBase is used only during a fixpoint iteration (or "sweep"), +and accumulates facts (and the transformed code) during the fixpoint +iteration. + +* tfb_fbase increases monotonically, across all sweeps + +* At the beginning of each sweep + tfb_cha = NoChange + tfb_lbls = {} + +* During each sweep we process each block in turn. Processing a block + is done thus: + 1. Read from tfb_fbase the facts for its entry label (forward) + or successors labels (backward) + 2. Transform those facts into new facts for its successors (forward) + or entry label (backward) + 3. Augment tfb_fbase with that info + We call the labels read in step (1) the "in-labels" of the sweep + +* The field tfb_lbls is the set of in-labels of all blocks that have + been processed so far this sweep, including the block that is + currently being processed. tfb_lbls is initialised to {}. It is a + subset of the Labels of the *original* (not transformed) blocks. + +* The tfb_cha field is set to SomeChange iff we decide we need to + perform another iteration of the fixpoint loop. It is initialsed to NoChange. + + Specifically, we set tfb_cha to SomeChange in step (3) iff + (a) The fact in tfb_fbase for a block L changes + (b) L is in tfb_lbls + Reason: until a label enters the in-labels its accumuated fact in tfb_fbase + has not been read, hence cannot affect the outcome + +Note [Unreachable blocks] +~~~~~~~~~~~~~~~~~~~~~~~~~ +A block that is not in the domain of tfb_fbase is "currently unreachable". +A currently-unreachable block is not even analyzed. Reason: consider +constant prop and this graph, with entry point L1: + L1: x:=3; goto L4 + L2: x:=4; goto L4 + L4: if x>3 goto L2 else goto L5 +Here L2 is actually unreachable, but if we process it with bottom input fact, +we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4. + +* If a currently-unreachable block is not analyzed, then its rewritten + graph will not be accumulated in tfb_rg. And that is good: + unreachable blocks simply do not appear in the output. + +* Note that clients must be careful to provide a fact (even if bottom) + for each entry point. Otherwise useful blocks may be garbage collected. + +* Note that updateFact must set the change-flag if a label goes from + not-in-fbase to in-fbase, even if its fact is bottom. In effect the + real fact lattice is + UNR + bottom + the points above bottom + +* Even if the fact is going from UNR to bottom, we still call the + client's fact_join function because it might give the client + some useful debugging information. + +* All of this only applies for *forward* ixpoints. For the backward + case we must treat every block as reachable; it might finish with a + 'return', and therefore have no successors, for example. +-} + + +----------------------------------------------------------------------------- +-- Pieces that are shared by fixpoint and fixpoint_anal +----------------------------------------------------------------------------- + +-- | Sort the blocks into the right order for analysis. +sortBlocks :: NonLocal n => Direction -> [Label] -> LabelMap (Block n C C) + -> [Block n C C] +sortBlocks direction entries blockmap + = case direction of Fwd -> fwd + Bwd -> reverse fwd + where fwd = forwardBlockList entries blockmap + +-- | construct a mapping from L -> block indices. If the fact for L +-- changes, re-analyse the given blocks. +mkDepBlocks :: NonLocal n => Direction -> [Block n C C] -> LabelMap [Int] +mkDepBlocks Fwd blocks = go blocks 0 mapEmpty + where go [] !_ m = m + go (b:bs) !n m = go bs (n+1) $! mapInsert (entryLabel b) [n] m +mkDepBlocks Bwd blocks = go blocks 0 mapEmpty + where go [] !_ m = m + go (b:bs) !n m = go bs (n+1) $! go' (successors b) m + where go' [] m = m + go' (l:ls) m = go' ls (mapInsertWith (++) l [n] m) + + +-- | After some new facts have been generated by analysing a block, we +-- fold this function over them to generate (a) a list of block +-- indices to (re-)analyse, and (b) the new FactBase. +-- +updateFact :: JoinFun f -> LabelMap [Int] + -> Label -> f -- out fact + -> (IntHeap, FactBase f) + -> (IntHeap, FactBase f) + +updateFact fact_join dep_blocks lbl new_fact (todo, fbase) + = case lookupFact lbl fbase of + Nothing -> let !z = mapInsert lbl new_fact fbase in (changed, z) + -- Note [no old fact] + Just old_fact -> + case fact_join lbl (OldFact old_fact) (NewFact new_fact) of + (NoChange, _) -> (todo, fbase) + (_, f) -> let !z = mapInsert lbl f fbase in (changed, z) + where + changed = foldr insertIntHeap todo $ + mapFindWithDefault [] lbl dep_blocks + +{- +Note [no old fact] + +We know that the new_fact is >= _|_, so we don't need to join. However, +if the new fact is also _|_, and we have already analysed its block, +we don't need to record a change. So there's a tradeoff here. It turns +out that always recording a change is faster. +-} + +----------------------------------------------------------------------------- +-- DG: an internal data type for 'decorated graphs' +-- TOTALLY internal to Hoopl; each block is decorated with a fact +----------------------------------------------------------------------------- + +type DG f = Graph' (DBlock f) +data DBlock f n e x = DBlock f (Block n e x) -- ^ block decorated with fact + +instance NonLocal n => NonLocal (DBlock f n) where + entryLabel (DBlock _ b) = entryLabel b + successors (DBlock _ b) = successors b + +--- constructors + +dgnil :: DG f n O O +dgnilC :: DG f n C C +dgSplice :: NonLocal n => DG f n e a -> DG f n a x -> DG f n e x + +---- observers + +normalizeGraph :: forall n f e x . + NonLocal n => DG f n e x + -> (Graph n e x, FactBase f) + -- A Graph together with the facts for that graph + -- The domains of the two maps should be identical + +normalizeGraph g = (mapGraphBlocks dropFact g, facts g) + where dropFact :: DBlock t t1 t2 t3 -> Block t1 t2 t3 + dropFact (DBlock _ b) = b + facts :: DG f n e x -> FactBase f + facts GNil = noFacts + facts (GUnit _) = noFacts + facts (GMany _ body exit) = bodyFacts body `mapUnion` exitFacts exit + exitFacts :: MaybeO x (DBlock f n C O) -> FactBase f + exitFacts NothingO = noFacts + exitFacts (JustO (DBlock f b)) = mapSingleton (entryLabel b) f + bodyFacts :: LabelMap (DBlock f n C C) -> FactBase f + bodyFacts body = mapFoldWithKey f noFacts body + where f :: forall t a x. (NonLocal t) => Label -> DBlock a t C x -> LabelMap a -> LabelMap a + f lbl (DBlock f _) fb = mapInsert lbl f fb + +--- implementation of the constructors (boring) + +dgnil = GNil +dgnilC = GMany NothingO emptyBody NothingO + +dgSplice = splice fzCat + where fzCat :: DBlock f n e O -> DBlock t n O x -> DBlock f n e x + fzCat (DBlock f b1) (DBlock _ b2) = DBlock f $! b1 `blockAppend` b2 + -- NB. strictness, this function is hammered. + +---------------------------------------------------------------- +-- Utilities +---------------------------------------------------------------- + +-- Lifting based on shape: +-- - from nodes to blocks +-- - from facts to fact-like things +-- Lowering back: +-- - from fact-like things to facts +-- Note that the latter two functions depend only on the entry shape. +class ShapeLifter e x where + singletonDG :: f -> n e x -> DG f n e x + fwdEntryFact :: NonLocal n => n e x -> f -> Fact e f + fwdEntryLabel :: NonLocal n => n e x -> MaybeC e [Label] + ftransfer :: FwdTransfer n f -> n e x -> f -> Fact x f + frewrite :: FwdRewrite m n f -> n e x + -> f -> m (Maybe (Graph n e x, FwdRewrite m n f)) +-- @ end node.tex + bwdEntryFact :: NonLocal n => DataflowLattice f -> n e x -> Fact e f -> f + btransfer :: BwdTransfer n f -> n e x -> Fact x f -> f + brewrite :: BwdRewrite m n f -> n e x + -> Fact x f -> m (Maybe (Graph n e x, BwdRewrite m n f)) + +instance ShapeLifter C O where + singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil)) + fwdEntryFact n f = mapSingleton (entryLabel n) f + bwdEntryFact lat n fb = getFact lat (entryLabel n) fb + ftransfer (FwdTransfer3 (ft, _, _)) n f = ft n f + btransfer (BwdTransfer3 (bt, _, _)) n f = bt n f + frewrite (FwdRewrite3 (fr, _, _)) n f = fr n f + brewrite (BwdRewrite3 (br, _, _)) n f = br n f + fwdEntryLabel n = JustC [entryLabel n] + +instance ShapeLifter O O where + singletonDG f = gUnitOO . DBlock f . BMiddle + fwdEntryFact _ f = f + bwdEntryFact _ _ f = f + ftransfer (FwdTransfer3 (_, ft, _)) n f = ft n f + btransfer (BwdTransfer3 (_, bt, _)) n f = bt n f + frewrite (FwdRewrite3 (_, fr, _)) n f = fr n f + brewrite (BwdRewrite3 (_, br, _)) n f = br n f + fwdEntryLabel _ = NothingC + +instance ShapeLifter O C where + singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n)) + fwdEntryFact _ f = f + bwdEntryFact _ _ f = f + ftransfer (FwdTransfer3 (_, _, ft)) n f = ft n f + btransfer (BwdTransfer3 (_, _, bt)) n f = bt n f + frewrite (FwdRewrite3 (_, _, fr)) n f = fr n f + brewrite (BwdRewrite3 (_, _, br)) n f = br n f + fwdEntryLabel _ = NothingC + +{- +class ShapeLifter e x where + singletonDG :: f -> n e x -> DG f n e x + +instance ShapeLifter C O where + singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil)) + +instance ShapeLifter O O where + singletonDG f = gUnitOO . DBlock f . BMiddle + +instance ShapeLifter O C where + singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n)) +-} + +-- Fact lookup: the fact `orelse` bottom +getFact :: DataflowLattice f -> Label -> FactBase f -> f +getFact lat l fb = case lookupFact l fb of Just f -> f + Nothing -> fact_bot lat + + + +{- Note [Respects fuel] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-} +-- $fuel +-- A value of type 'FwdRewrite' or 'BwdRewrite' /respects fuel/ if +-- any function contained within the value satisfies the following properties: +-- +-- * When fuel is exhausted, it always returns 'Nothing'. +-- +-- * When it returns @Just g rw@, it consumes /exactly/ one unit +-- of fuel, and new rewrite 'rw' also respects fuel. +-- +-- Provided that functions passed to 'mkFRewrite', 'mkFRewrite3', +-- 'mkBRewrite', and 'mkBRewrite3' are not aware of the fuel supply, +-- the results respect fuel. +-- +-- It is an /unchecked/ run-time error for the argument passed to 'wrapFR', +-- 'wrapFR2', 'wrapBR', or 'warpBR2' to return a function that does not respect fuel. + +-- ----------------------------------------------------------------------------- +-- a Heap of Int + +-- We should really use a proper Heap here, but my attempts to make +-- one have not succeeded in beating the simple ordered list. Another +-- alternative is IntSet (using deleteFindMin), but that was also +-- slower than the ordered list in my experiments --SDM 25/1/2012 + +type IntHeap = [Int] -- ordered + +insertIntHeap :: Int -> [Int] -> [Int] +insertIntHeap x [] = [x] +insertIntHeap x (y:ys) + | x < y = x : y : ys + | x == y = x : ys + | otherwise = y : insertIntHeap x ys diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 04586b1029..ecd4d4f985 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -1,29 +1,19 @@ {-# LANGUAGE GADTs #-} --- ToDo: remove -fno-warn-warnings-deprecations -{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} --- ToDo: remove -fno-warn-incomplete-patterns -{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} - --- Module for building CmmAGraphs. - --- As the CmmAGraph is a wrapper over Graph CmmNode O x, it is different --- from Hoopl's AGraph. The current clients expect functions with the --- same names Hoopl uses, so this module cannot be in the same namespace --- as Compiler.Hoopl. - module MkGraph - ( CmmAGraph - , emptyAGraph, (<*>), catAGraphs, outOfLine - , mkLabel, mkMiddle, mkLast - , withFreshLabel, withUnique, lgraphOfAGraph, labelAGraph + ( CmmAGraph, CgStmt(..) + , (<*>), catAGraphs + , mkLabel, mkMiddle, mkLast, outOfLine + , lgraphOfAGraph, labelAGraph , stackStubExpr - , mkNop, mkAssign, mkStore, mkCall, mkCmmCall, mkSafeCall, mkUnsafeCall, mkFinalCall - , mkJump, mkDirectJump, mkForeignJump, mkJumpGC, mkCbranch, mkSwitch - , mkReturn, mkReturnSimple, mkComment, mkCallEntry - , mkBranch, mkCmmIfThenElse, mkCmmIfThen, mkCmmWhileDo - , copyInOflow, copyInSlot, copyOutOflow, copyOutSlot + , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo + , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC + , mkCbranch, mkSwitch + , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch + , copyInOflow, copyOutOflow + , noExtraStack + , toCall, Transfer(..) ) where @@ -31,250 +21,232 @@ import BlockId import Cmm import CmmCallConv (assignArgumentsPos, ParamLocation(..)) + import Compiler.Hoopl hiding (Unique, (<*>), mkFirst, mkMiddle, mkLast, mkLabel, mkBranch, Shape(..)) -import qualified Compiler.Hoopl as H -import Compiler.Hoopl.GHC (uniqueToLbl) import FastString import ForeignCall import Outputable import Prelude hiding (succ) import SMRep (ByteOff) -import StaticFlags -import Unique import UniqSupply -import Util +import OrdList #include "HsVersions.h" -{- -A 'CmmAGraph' is an abstract version of a 'Graph CmmNode O x' from module -'Cmm'. The difference is that the 'CmmAGraph' can be eigher open of closed at -exit and it can supply fresh Labels and Uniques. - -It also supports a splicing operation <*>, which is different from the Hoopl's -<*>, because it splices two CmmAGraphs. Specifically, it can splice Graph -O C and Graph O x. In this case, the open beginning of the second graph is -thrown away. In the debug mode this sequence is checked to be empty or -containing a branch (see note [Branch follows branch]). - -When an CmmAGraph open at exit is being converted to a CmmGraph, the output -exit sequence is considered unreachable. If the graph consist of one block -only, if it not the case and we crash. Otherwise we just throw the exit -sequence away (and in debug mode we test that it really was unreachable). --} - -{- -Node [Branch follows branch] -============================ -Why do we say it's ok for a Branch to follow a Branch? -Because the standard constructor mkLabel has fall-through -semantics. So if you do a mkLabel, you finish the current block, -giving it a label, and start a new one that branches to that label. -Emitting a Branch at this point is fine: - goto L1; L2: ...stuff... --} - -data CmmGraphOC = Opened (Graph CmmNode O O) - | Closed (Graph CmmNode O C) -type CmmAGraph = UniqSM CmmGraphOC -- Graph open at entry - -{- -MS: I began with - newtype CmmAGraph = forall x. AG (UniqSM (Graph CmmNode O x)) -but that does not work well, because we cannot take the graph -out of the monad -- we do not know the type of what we would take -out and pattern matching does not help, as we cannot pattern match -on a graph inside the monad. --} -data Transfer = Call | Jump | Ret deriving Eq +----------------------------------------------------------------------------- +-- Building Graphs + + +-- | CmmAGraph is a chunk of code consisting of: +-- +-- * ordinary statements (assignments, stores etc.) +-- * jumps +-- * labels +-- * out-of-line labelled blocks +-- +-- The semantics is that control falls through labels and out-of-line +-- blocks. Everything after a jump up to the next label is by +-- definition unreachable code, and will be discarded. +-- +-- Two CmmAGraphs can be stuck together with <*>, with the meaning that +-- control flows from the first to the second. +-- +-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends) +-- by providing a label for the entry point; see 'labelAGraph'. +-- +type CmmAGraph = OrdList CgStmt + +data CgStmt + = CgLabel BlockId + | CgStmt (CmmNode O O) + | CgLast (CmmNode O C) + | CgFork BlockId CmmAGraph + +flattenCmmAGraph :: BlockId -> CmmAGraph -> CmmGraph +flattenCmmAGraph id stmts = + CmmGraph { g_entry = id, + g_graph = GMany NothingO body NothingO } + where + (block, blocks) = flatten (fromOL stmts) + entry = blockJoinHead (CmmEntry id) block + body = foldr addBlock emptyBody (entry:blocks) + + flatten :: [CgStmt] -> (Block CmmNode O C, [Block CmmNode C C]) + flatten [] = panic "flatten []" + + -- A label at the end of a function or fork: this label must not be reachable, + -- but it might be referred to from another BB that also isn't reachable. + -- Eliminating these has to be done with a dead-code analysis. For now, + -- we just make it into a well-formed block by adding a recursive jump. + flatten [CgLabel id] + = (goto_id, [blockJoinHead (CmmEntry id) goto_id] ) + where goto_id = blockJoinTail emptyBlock (CmmBranch id) + + -- A jump/branch: throw away all the code up to the next label, because + -- it is unreachable. Be careful to keep forks that we find on the way. + flatten (CgLast stmt : stmts) + = case dropWhile isOrdinaryStmt stmts of + [] -> + ( sing, [] ) + [CgLabel id] -> + ( sing, [blockJoin (CmmEntry id) emptyBlock (CmmBranch id)] ) + (CgLabel id : stmts) -> + ( sing, blockJoinHead (CmmEntry id) block : blocks ) + where (block,blocks) = flatten stmts + (CgFork fork_id stmts : ss) -> + flatten (CgFork fork_id stmts : CgLast stmt : ss) + _ -> panic "MkGraph.flatten" + where + sing = blockJoinTail emptyBlock stmt + + flatten (s:ss) = + case s of + CgStmt stmt -> (blockCons stmt block, blocks) + CgLabel id -> (blockJoinTail emptyBlock (CmmBranch id), + blockJoinHead (CmmEntry id) block : blocks) + CgFork fork_id stmts -> + (block, blockJoinHead (CmmEntry fork_id) fork_block : fork_blocks ++ blocks) + where (fork_block, fork_blocks) = flatten (fromOL stmts) + _ -> panic "MkGraph.flatten" + where (block,blocks) = flatten ss + +isOrdinaryStmt :: CgStmt -> Bool +isOrdinaryStmt (CgStmt _) = True +isOrdinaryStmt (CgLast _) = True +isOrdinaryStmt _ = False + + ---------- AGraph manipulation -emptyAGraph :: CmmAGraph (<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph +(<*>) = appOL + catAGraphs :: [CmmAGraph] -> CmmAGraph +catAGraphs = concatOL + +-- | created a sequence "goto id; id:" as an AGraph +mkLabel :: BlockId -> CmmAGraph +mkLabel bid = unitOL (CgLabel bid) + +-- | creates an open AGraph from a given node +mkMiddle :: CmmNode O O -> CmmAGraph +mkMiddle middle = unitOL (CgStmt middle) -mkLabel :: BlockId -> CmmAGraph -- created a sequence "goto id; id:" as an AGraph -mkMiddle :: CmmNode O O -> CmmAGraph -- creates an open AGraph from a given node -mkLast :: CmmNode O C -> CmmAGraph -- created a closed AGraph from a given node +-- | created a closed AGraph from a given node +mkLast :: CmmNode O C -> CmmAGraph +mkLast last = unitOL (CgLast last) -withFreshLabel :: String -> (BlockId -> CmmAGraph) -> CmmAGraph -withUnique :: (Unique -> CmmAGraph) -> CmmAGraph +-- | A labelled code block; should end in a last node +outOfLine :: BlockId -> CmmAGraph -> CmmAGraph +outOfLine l g = unitOL (CgFork l g) +-- | allocate a fresh label for the entry point lgraphOfAGraph :: CmmAGraph -> UniqSM CmmGraph - -- ^ allocate a fresh label for the entry point +lgraphOfAGraph g = do u <- getUniqueM + return (flattenCmmAGraph (mkBlockId u) g) + +-- | use the given BlockId as the label of the entry point labelAGraph :: BlockId -> CmmAGraph -> UniqSM CmmGraph - -- ^ use the given BlockId as the label of the entry point +labelAGraph lbl ag = return (flattenCmmAGraph lbl ag) ---------- No-ops mkNop :: CmmAGraph +mkNop = nilOL + mkComment :: FastString -> CmmAGraph +#ifdef DEBUG +-- SDM: generating all those comments takes time, this saved about 4% for me +mkComment fs = mkMiddle $ CmmComment fs +#else +mkComment _ = nilOL +#endif ---------- Assignment and store mkAssign :: CmmReg -> CmmExpr -> CmmAGraph -mkStore :: CmmExpr -> CmmExpr -> CmmAGraph +mkAssign l r = mkMiddle $ CmmAssign l r ----------- Calls -mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] -> - UpdFrameOffset -> CmmAGraph -mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> - UpdFrameOffset -> CmmAGraph - -- Native C-- calling convention -mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset -> Bool -> CmmAGraph -mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph -mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset -> CmmAGraph - -- Never returns; like exit() or barf() +mkStore :: CmmExpr -> CmmExpr -> CmmAGraph +mkStore l r = mkMiddle $ CmmStore l r ---------- Control transfer -mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph -mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph +mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkJump e actuals updfr_off = + lastWithArgs Jump Old NativeNodeCall actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkDirectJump e actuals updfr_off = + lastWithArgs Jump Old NativeDirectCall actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkJumpGC e actuals updfr_off = + lastWithArgs Jump Old GC actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset + -> CmmAGraph +mkForeignJump conv e actuals updfr_off = + mkForeignJumpExtra conv e actuals updfr_off noExtraStack + +mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual] + -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)]) + -> CmmAGraph +mkForeignJumpExtra conv e actuals updfr_off extra_stack = + lastWithArgsAndExtraStack Jump Old conv actuals updfr_off extra_stack $ + toCall e Nothing updfr_off 0 + +mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph +mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot) + +mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph +mkSwitch e tbl = mkLast $ CmmSwitch e tbl + mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturn e actuals updfr_off = + lastWithArgs Ret Old NativeReturn actuals updfr_off $ + toCall e Nothing updfr_off 0 + mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturnSimple actuals updfr_off = + mkReturn e actuals updfr_off + where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord mkBranch :: BlockId -> CmmAGraph -mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph -mkCmmIfThen :: CmmExpr -> CmmAGraph -> CmmAGraph -mkCmmWhileDo :: CmmExpr -> CmmAGraph -> CmmAGraph - -outOfLine :: CmmAGraph -> CmmAGraph --- ^ The argument is an CmmAGraph that must have an --- empty entry sequence and be closed at the end. --- The result is a new CmmAGraph that is open at the --- end and goes directly from entry to exit, with the --- original graph sitting to the side out-of-line. --- --- Example: mkMiddle (x = 3) --- <*> outOfLine (mkLabel L <*> ...stuff...) --- <*> mkMiddle (y = x) --- Control will flow directly from x=3 to y=x; --- the block starting with L is "on the side". --- --- N.B. algebraically forall g g' : g <*> outOfLine g' == outOfLine g' <*> g +mkBranch bid = mkLast (CmmBranch bid) + +mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset + -> CmmAGraph +mkFinalCall f _ actuals updfr_off = + lastWithArgs Call Old NativeDirectCall actuals updfr_off $ + toCall f Nothing updfr_off 0 + +mkCallReturnsTo :: CmmExpr -> Convention -> [CmmActual] + -> BlockId + -> ByteOff + -> UpdFrameOffset + -> (ByteOff, [(CmmExpr,ByteOff)]) + -> CmmAGraph +mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do + lastWithArgsAndExtraStack Call (Young ret_lbl) callConv actuals + updfr_off extra_stack $ + toCall f (Just ret_lbl) updfr_off ret_off + +mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph +mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as + -------------------------------------------------------------------------- --- ================ IMPLEMENTATION ================-- - --------------------------------------------------- --- Raw CmmAGraph handling - -emptyAGraph = return $ Opened emptyGraph -ag <*> ah = do g <- ag - h <- ah - return (case (g, h) of - (Opened g, Opened h) -> Opened $ g H.<*> h - (Opened g, Closed h) -> Closed $ g H.<*> h - (Closed g, Opened GNil) -> Closed g - (Closed g, Opened (GUnit e)) -> note_unreachable e $ Closed g - (Closed g, Opened (GMany (JustO e) b x)) -> note_unreachable e $ Opened $ g H.|*><*| GMany NothingO b x - (Closed g, Closed (GMany (JustO e) b x)) -> note_unreachable e $ Closed $ g H.|*><*| GMany NothingO b x - :: CmmGraphOC) -catAGraphs = foldl (<*>) emptyAGraph - -outOfLine ag = withFreshLabel "outOfLine" $ \l -> - do g <- ag - return (case g of - Closed (GMany (JustO e) b _) -> note_unreachable e $ Opened $ - GMany (JustO $ BLast $ CmmBranch l) b (JustO $ BFirst $ CmmEntry l) - _ -> panic "outOfLine" - :: CmmGraphOC) - -note_unreachable :: Block CmmNode O x -> a -> a -note_unreachable block graph = - ASSERT (block_is_empty_or_label) -- Note [Branch follows branch] - graph - where block_is_empty_or_label :: Bool - block_is_empty_or_label = case blockToNodeList block of - (NothingC, [], NothingC) -> True - (NothingC, [], JustC (CmmBranch _)) -> True - _ -> False - -mkLabel bid = return $ Opened $ H.mkLast (CmmBranch bid) |*><*| H.mkFirst (CmmEntry bid) -mkMiddle middle = return $ Opened $ H.mkMiddle middle -mkLast last = return $ Closed $ H.mkLast last - -withUnique f = getUniqueM >>= f -withFreshLabel _name f = getUniqueM >>= f . uniqueToLbl . intToUnique . getKey -lgraphOfAGraph g = do u <- getUniqueM - labelAGraph (mkBlockId u) g - -labelAGraph lbl ag = do g <- ag - return $ CmmGraph {g_entry=lbl, g_graph=H.mkFirst (CmmEntry lbl) H.<*> closed g} - where closed :: CmmGraphOC -> Graph CmmNode O C - closed (Closed g) = g - closed (Opened g@(GMany entry body (JustO exit))) = - ASSERT (entryLabel exit `notElem` map entryLabel (postorder_dfs g)) - GMany entry body NothingO - closed (Opened _) = panic "labelAGraph" - --------------------------------------------------- --- CmmAGraph constructions - -mkNop = emptyAGraph -mkComment fs = mkMiddle $ CmmComment fs -mkStore l r = mkMiddle $ CmmStore l r - --- NEED A COMPILER-DEBUGGING FLAG HERE --- Sanity check: any value assigned to a pointer must be non-zero. --- If it's 0, cause a crash immediately. -mkAssign l r = if opt_StubDeadValues then assign l r <*> check l else assign l r - where assign l r = mkMiddle (CmmAssign l r) - check (CmmGlobal _) = mkNop - check l@(CmmLocal reg) = -- if a ptr arg is NULL, cause a crash! - if isGcPtrType ty then - mkCmmIfThen (CmmMachOp (MO_Eq w) [r, stackStubExpr w]) - (assign l (CmmLoad (CmmLit (CmmInt 0 w)) ty)) - else mkNop - where ty = localRegType reg - w = typeWidth ty - r = CmmReg l -- Why are we inserting extra blocks that simply branch to the successors? -- Because in addition to the branch instruction, @mkBranch@ will insert -- a necessary adjustment to the stack pointer. -mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot) -mkSwitch e tbl = mkLast $ CmmSwitch e tbl - -mkSafeCall t fs as upd i = withFreshLabel "safe call" $ body - where - body k = - ( mkStore (CmmStackSlot (CallArea (Young k)) (widthInBytes wordWidth)) - (CmmLit (CmmBlock k)) - <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i}) - <*> mkLabel k) -mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as -mkBranch bid = mkLast (CmmBranch bid) - -mkCmmIfThenElse e tbranch fbranch = - withFreshLabel "end of if" $ \endif -> - withFreshLabel "start of then" $ \tid -> - withFreshLabel "start of else" $ \fid -> - mkCbranch e tid fid <*> - mkLabel tid <*> tbranch <*> mkBranch endif <*> - mkLabel fid <*> fbranch <*> mkLabel endif - -mkCmmIfThen e tbranch - = withFreshLabel "end of if" $ \endif -> - withFreshLabel "start of then" $ \tid -> - mkCbranch e tid endif <*> - mkLabel tid <*> tbranch <*> mkLabel endif - -mkCmmWhileDo e body = - withFreshLabel "loop test" $ \test -> - withFreshLabel "loop head" $ \head -> - withFreshLabel "end while" $ \endwhile -> - -- Forrest Baskett's while-loop layout - mkBranch test <*> mkLabel head <*> body - <*> mkLabel test <*> mkCbranch e head endwhile - <*> mkLabel endwhile -- For debugging purposes, we can stub out dead stack slots: stackStubExpr :: Width -> CmmExpr @@ -286,12 +258,9 @@ stackStubExpr w = CmmLit (CmmInt 0 w) -- Therefore, for copying arguments and results, we provide different -- functions to pass the arguments in an overflow area and to pass them in spill slots. copyInOflow :: Convention -> Area -> [CmmFormal] -> (Int, CmmAGraph) -copyInSlot :: Convention -> [CmmFormal] -> [CmmNode O O] -copyOutSlot :: Convention -> [LocalReg] -> [CmmNode O O] copyInOflow conv area formals = (offset, catAGraphs $ map mkMiddle nodes) where (offset, nodes) = copyIn oneCopyOflowI conv area formals -copyInSlot c f = snd $ copyIn oneCopySlotI c (panic "no area for copying to slots") f type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) -> (ByteOff, [CmmNode O O]) @@ -312,104 +281,86 @@ copyIn oflow conv area formals = adjust rst x@(_, RegisterParam _) = x : rst -- Copy-in one arg, using overflow space if needed. -oneCopyOflowI, oneCopySlotI :: SlotCopier +oneCopyOflowI :: SlotCopier oneCopyOflowI area (reg, off) (n, ms) = (max n off, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) : ms) where ty = localRegType reg --- Copy-in one arg, using spill slots if needed -- used for calling conventions at --- a procpoint that is not a return point. The offset is irrelevant here... -oneCopySlotI _ (reg, _) (n, ms) = - (n, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg) w) ty) : ms) - where ty = localRegType reg - w = widthInBytes (typeWidth ty) - - -- Factoring out the common parts of the copyout functions yielded something -- more complicated: -copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset -> - (Int, CmmAGraph) +data Transfer = Call | Jump | Ret deriving Eq + +copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] + -> UpdFrameOffset + -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff + -> (Int, CmmAGraph) + -- Generate code to move the actual parameters into the locations --- required by the calling convention. This includes a store for the return address. +-- required by the calling convention. This includes a store for the +-- return address. -- --- The argument layout function ignores the pointer to the info table, so we slot that --- in here. When copying-out to a young area, we set the info table for return --- and adjust the offsets of the other parameters. --- If this is a call instruction, we adjust the offsets of the other parameters. -copyOutOflow conv transfer area@(CallArea a) actuals updfr_off - = foldr co (init_offset, emptyAGraph) args' +-- The argument layout function ignores the pointer to the info table, +-- so we slot that in here. When copying-out to a young area, we set +-- the info table for return and adjust the offsets of the other +-- parameters. If this is a call instruction, we adjust the offsets +-- of the other parameters. +copyOutOflow conv transfer area actuals updfr_off + (extra_stack_off, extra_stack_stuff) + = foldr co (init_offset, mkNop) (args' ++ stack_params) where co (v, RegisterParam r) (n, ms) = (n, mkAssign (CmmGlobal r) v <*> ms) co (v, StackParam off) (n, ms) = (max n off, mkStore (CmmStackSlot area off) v <*> ms) + stack_params = [ (e, StackParam (off + init_offset)) + | (e,off) <- extra_stack_stuff ] + (setRA, init_offset) = - case a of Young id -> id `seq` -- Generate a store instruction for - -- the return address if making a call + case area of + Young id -> id `seq` -- Generate a store instruction for + -- the return address if making a call if transfer == Call then ([(CmmLit (CmmBlock id), StackParam init_offset)], widthInBytes wordWidth) else ([], 0) - Old -> ([], updfr_off) + Old -> ([], updfr_off) + + arg_offset = init_offset + extra_stack_off args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it args = assignArgumentsPos conv cmmExprType actuals args' = foldl adjust setRA args - where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst + where adjust rst (v, StackParam off) = (v, StackParam (off + arg_offset)) : rst adjust rst x@(_, RegisterParam _) = x : rst -copyOutOflow _ _ (RegSlot _) _ _ = panic "cannot copy arguments into a register slot" --- Args passed only in registers and stack slots; no overflow space. --- No return address may apply! -copyOutSlot conv actuals = foldr co [] args - where co (v, RegisterParam r) ms = CmmAssign (CmmGlobal r) (toExp v) : ms - co (v, StackParam off) ms = CmmStore (CmmStackSlot (RegSlot v) off) (toExp v) : ms - toExp r = CmmReg (CmmLocal r) - args = assignArgumentsPos conv localRegType actuals mkCallEntry :: Convention -> [CmmFormal] -> (Int, CmmAGraph) -mkCallEntry conv formals = copyInOflow conv (CallArea Old) formals +mkCallEntry conv formals = copyInOflow conv Old formals -lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset -> - (ByteOff -> CmmAGraph) -> CmmAGraph +lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] + -> UpdFrameOffset + -> (ByteOff -> CmmAGraph) + -> CmmAGraph lastWithArgs transfer area conv actuals updfr_off last = - let (outArgs, copies) = copyOutOflow conv transfer area actuals updfr_off in + lastWithArgsAndExtraStack transfer area conv actuals + updfr_off noExtraStack last + +lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual] + -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)]) + -> (ByteOff -> CmmAGraph) + -> CmmAGraph +lastWithArgsAndExtraStack transfer area conv actuals updfr_off + extra_stack last = + let (outArgs, copies) = copyOutOflow conv transfer area actuals + updfr_off extra_stack in copies <*> last outArgs --- The area created for the jump and return arguments is the same area as the --- procedure entry. -old :: Area -old = CallArea Old -toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff -> CmmAGraph +noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)]) +noExtraStack = (0,[]) + +toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff + -> CmmAGraph toCall e cont updfr_off res_space arg_space = mkLast $ CmmCall e cont arg_space res_space updfr_off -mkJump e actuals updfr_off = - lastWithArgs Jump old NativeNodeCall actuals updfr_off $ toCall e Nothing updfr_off 0 -mkDirectJump e actuals updfr_off = - lastWithArgs Jump old NativeDirectCall actuals updfr_off $ toCall e Nothing updfr_off 0 -mkJumpGC e actuals updfr_off = - lastWithArgs Jump old GC actuals updfr_off $ toCall e Nothing updfr_off 0 -mkForeignJump conv e actuals updfr_off = - lastWithArgs Jump old conv actuals updfr_off $ toCall e Nothing updfr_off 0 -mkReturn e actuals updfr_off = - lastWithArgs Ret old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 - -- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord -mkReturnSimple actuals updfr_off = - lastWithArgs Ret old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 - where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord - -mkFinalCall f _ actuals updfr_off = - lastWithArgs Call old NativeDirectCall actuals updfr_off $ toCall f Nothing updfr_off 0 - -mkCmmCall f results actuals = mkCall f (NativeDirectCall, NativeReturn) results actuals - --- I'm dropping the SRT, but that should be okay: we plan to reconstruct it later. -mkCall f (callConv, retConv) results actuals updfr_off = - withFreshLabel "call successor" $ \k -> - let area = CallArea $ Young k - (off, copyin) = copyInOflow retConv area results - copyout = lastWithArgs Call area callConv actuals updfr_off - (toCall f (Just k) updfr_off off) - in (copyout <*> mkLabel k <*> copyin) diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs index fc4706c8c4..aa83afbf8d 100644 --- a/compiler/cmm/OldCmm.hs +++ b/compiler/cmm/OldCmm.hs @@ -9,9 +9,7 @@ module OldCmm ( CmmGroup, GenCmmGroup, RawCmmGroup, CmmDecl, RawCmmDecl, ListGraph(..), - - CmmInfo(..), CmmInfoTable(..), ClosureTypeInfo(..), UpdateFrame(..), - + CmmInfoTable(..), ClosureTypeInfo(..), CmmStatic(..), CmmStatics(..), CmmFormal, CmmActual, cmmMapGraph, cmmTopMapGraph, @@ -49,24 +47,6 @@ import ForeignCall -- with assembly-language labels. ----------------------------------------------------------------------------- --- Info Tables ------------------------------------------------------------------------------ - -data CmmInfo - = CmmInfo - (Maybe BlockId) -- GC target. Nothing <=> CPS won't do stack check - -- JD: NOT USED BY NEW CODE GEN - (Maybe UpdateFrame) -- Update frame - CmmInfoTable -- Info table - --- | A frame that is to be pushed before entry to the function. --- Used to handle 'update' frames. -data UpdateFrame - = UpdateFrame - CmmExpr -- Frame header. Behaves like the target of a 'jump'. - [CmmExpr] -- Frame remainder. Behaves like the arguments of a 'jump'. - ------------------------------------------------------------------------------ -- Cmm, CmmDecl, CmmBasicBlock ----------------------------------------------------------------------------- @@ -85,8 +65,8 @@ data UpdateFrame newtype ListGraph i = ListGraph [GenBasicBlock i] -- | Cmm with the info table as a data type -type CmmGroup = GenCmmGroup CmmStatics CmmInfo (ListGraph CmmStmt) -type CmmDecl = GenCmmDecl CmmStatics CmmInfo (ListGraph CmmStmt) +type CmmGroup = GenCmmGroup CmmStatics CmmInfoTable (ListGraph CmmStmt) +type CmmDecl = GenCmmDecl CmmStatics CmmInfoTable (ListGraph CmmStmt) -- | Cmm with the info tables converted to a list of 'CmmStatic' along with the info -- table label. If we are building without tables-next-to-code there will be no statics @@ -225,16 +205,9 @@ instance UserOfLocalRegs CmmCallTarget where foldRegsUsed f set (CmmCallee e _) = foldRegsUsed f set e foldRegsUsed f set (CmmPrim _ mStmts) = foldRegsUsed f set mStmts -instance UserOfSlots CmmCallTarget where - foldSlotsUsed f set (CmmCallee e _) = foldSlotsUsed f set e - foldSlotsUsed _ set (CmmPrim {}) = set - instance UserOfLocalRegs a => UserOfLocalRegs (CmmHinted a) where foldRegsUsed f set a = foldRegsUsed f set (hintlessCmm a) -instance UserOfSlots a => UserOfSlots (CmmHinted a) where - foldSlotsUsed f set a = foldSlotsUsed f set (hintlessCmm a) - instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmHinted a) where foldRegsDefd f set a = foldRegsDefd f set (hintlessCmm a) diff --git a/compiler/cmm/OldCmmLint.hs b/compiler/cmm/OldCmmLint.hs new file mode 100644 index 0000000000..72e40ce4f8 --- /dev/null +++ b/compiler/cmm/OldCmmLint.hs @@ -0,0 +1,209 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2004-2006 +-- +-- CmmLint: checking the correctness of Cmm statements and expressions +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -fno-warn-tabs #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and +-- detab the module (please do the detabbing in a separate patch). See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces +-- for details + +module OldCmmLint ( + cmmLint, cmmLintTop + ) where + +import BlockId +import OldCmm +import CLabel +import Outputable +import OldPprCmm() +import Constants +import FastString +import Platform + +import Data.Maybe + +-- ----------------------------------------------------------------------------- +-- Exported entry points: + +cmmLint :: (Outputable d, Outputable h) + => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops + +cmmLintTop :: (Outputable d, Outputable h) + => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top + +runCmmLint :: Outputable a + => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint _ l p = + case unCL (l p) of + Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), + nest 2 err, + ptext $ sLit ("Program was:"), + nest 2 (ppr p)]) + Right _ -> Nothing + +lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint () +lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks)) + = addLintInfo (text "in proc " <> pprCLabel platform lbl) $ + let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks + in mapM_ (lintCmmBlock platform labels) blocks + +lintCmmDecl _ (CmmData {}) + = return () + +lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint () +lintCmmBlock platform labels (BasicBlock id stmts) + = addLintInfo (text "in basic block " <> ppr id) $ + mapM_ (lintCmmStmt platform labels) stmts + +-- ----------------------------------------------------------------------------- +-- lintCmmExpr + +-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking +-- byte/word mismatches. + +lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType +lintCmmExpr platform (CmmLoad expr rep) = do + _ <- lintCmmExpr platform expr + -- Disabled, if we have the inlining phase before the lint phase, + -- we can have funny offsets due to pointer tagging. -- EZY + -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ + -- cmmCheckWordAddress expr + return rep +lintCmmExpr platform expr@(CmmMachOp op args) = do + tys <- mapM (lintCmmExpr platform) args + if map (typeWidth . cmmExprType) args == machOpArgReps op + then cmmCheckMachOp op args tys + else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) +lintCmmExpr platform (CmmRegOff reg offset) + = lintCmmExpr platform (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) + where rep = typeWidth (cmmRegType reg) +lintCmmExpr _ expr = + return (cmmExprType expr) + +-- Check for some common byte/word mismatches (eg. Sp + 1) +cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType +cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys + = cmmCheckMachOp op [reg, lit] tys +cmmCheckMachOp op _ tys + = return (machOpResultType op tys) + +isOffsetOp :: MachOp -> Bool +isOffsetOp (MO_Add _) = True +isOffsetOp (MO_Sub _) = True +isOffsetOp _ = False + +-- This expression should be an address from which a word can be loaded: +-- check for funny-looking sub-word offsets. +_cmmCheckWordAddress :: CmmExpr -> CmmLint () +_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 + = cmmLintDubiousWordOffset e +_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 + = cmmLintDubiousWordOffset e +_cmmCheckWordAddress _ + = return () + +-- No warnings for unaligned arithmetic with the node register, +-- which is used to extract fields from tagged constructor closures. +notNodeReg :: CmmExpr -> Bool +notNodeReg (CmmReg reg) | reg == nodeReg = False +notNodeReg _ = True + +lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint () +lintCmmStmt platform labels = lint + where lint (CmmNop) = return () + lint (CmmComment {}) = return () + lint stmt@(CmmAssign reg expr) = do + erep <- lintCmmExpr platform expr + let reg_ty = cmmRegType reg + if (erep `cmmEqType_ignoring_ptrhood` reg_ty) + then return () + else cmmLintAssignErr stmt erep reg_ty + lint (CmmStore l r) = do + _ <- lintCmmExpr platform l + _ <- lintCmmExpr platform r + return () + lint (CmmCall target _res args _) = + do lintTarget platform labels target + mapM_ (lintCmmExpr platform . hintlessCmm) args + lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e + lint (CmmSwitch e branches) = do + mapM_ checkTarget $ catMaybes branches + erep <- lintCmmExpr platform e + if (erep `cmmEqType_ignoring_ptrhood` bWord) + then return () + else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <> + text " :: " <> ppr erep) + lint (CmmJump e _) = lintCmmExpr platform e >> return () + lint (CmmReturn) = return () + lint (CmmBranch id) = checkTarget id + checkTarget id = if setMember id labels then return () + else cmmLintErr (text "Branch to nonexistent id" <+> ppr id) + +lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint () +lintTarget platform _ (CmmCallee e _) = do _ <- lintCmmExpr platform e + return () +lintTarget _ _ (CmmPrim _ Nothing) = return () +lintTarget platform labels (CmmPrim _ (Just stmts)) + = mapM_ (lintCmmStmt platform labels) stmts + + +checkCond :: CmmExpr -> CmmLint () +checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return () +checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values +checkCond expr + = cmmLintErr (hang (text "expression is not a conditional:") 2 + (ppr expr)) + +-- ----------------------------------------------------------------------------- +-- CmmLint monad + +-- just a basic error monad: + +newtype CmmLint a = CmmLint { unCL :: Either SDoc a } + +instance Monad CmmLint where + CmmLint m >>= k = CmmLint $ case m of + Left e -> Left e + Right a -> unCL (k a) + return a = CmmLint (Right a) + +cmmLintErr :: SDoc -> CmmLint a +cmmLintErr msg = CmmLint (Left msg) + +addLintInfo :: SDoc -> CmmLint a -> CmmLint a +addLintInfo info thing = CmmLint $ + case unCL thing of + Left err -> Left (hang info 2 err) + Right a -> Right a + +cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a +cmmLintMachOpErr expr argsRep opExpectsRep + = cmmLintErr (text "in MachOp application: " $$ + nest 2 (ppr expr) $$ + (text "op is expecting: " <+> ppr opExpectsRep) $$ + (text "arguments provide: " <+> ppr argsRep)) + +cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr stmt e_ty r_ty + = cmmLintErr (text "in assignment: " $$ + nest 2 (vcat [ppr stmt, + text "Reg ty:" <+> ppr r_ty, + text "Rhs ty:" <+> ppr e_ty])) + + + +cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a +cmmLintDubiousWordOffset expr + = cmmLintErr (text "offset is not a multiple of words: " $$ + nest 2 (ppr expr)) diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs index d6a12221fb..9990fd26a4 100644 --- a/compiler/cmm/OldPprCmm.hs +++ b/compiler/cmm/OldPprCmm.hs @@ -63,10 +63,6 @@ instance Outputable instr => Outputable (GenBasicBlock instr) where instance Outputable CmmStmt where ppr s = pprStmt s -instance Outputable CmmInfo where - ppr i = pprInfo i - - -- -------------------------------------------------------------------------- instance Outputable CmmSafety where ppr CmmUnsafe = ptext (sLit "_unsafe_call_") @@ -74,21 +70,6 @@ instance Outputable CmmSafety where ppr (CmmSafe srt) = ppr srt -- -------------------------------------------------------------------------- --- Info tables. The current pretty printer needs refinement --- but will work for now. --- --- For ideas on how to refine it, they used to be printed in the --- style of C--'s 'stackdata' declaration, just inside the proc body, --- and were labelled with the procedure name ++ "_info". -pprInfo :: CmmInfo -> SDoc -pprInfo (CmmInfo _gc_target update_frame info_table) = - vcat [{-ptext (sLit "gc_target: ") <> - maybe (ptext (sLit "<none>")) ppr gc_target,-} - ptext (sLit "update_frame: ") <> - maybe (ptext (sLit "<none>")) pprUpdateFrame update_frame, - ppr info_table] - --- -------------------------------------------------------------------------- -- Basic blocks look like assembly blocks. -- lbl: stmt ; stmt ; .. pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc @@ -156,18 +137,6 @@ pprStmt stmt = case stmt of instance (Outputable a) => Outputable (CmmHinted a) where ppr (CmmHinted a k) = ppr (a, k) -pprUpdateFrame :: UpdateFrame -> SDoc -pprUpdateFrame (UpdateFrame expr args) = - hcat [ ptext (sLit "jump") - , space - , if isTrivialCmmExpr expr - then pprExpr expr - else case expr of - CmmLoad (CmmReg _) _ -> pprExpr expr - _ -> parens (pprExpr expr) - , space - , parens ( commafy $ map ppr args ) ] - -- -------------------------------------------------------------------------- -- goto local label. [1], section 6.6 -- diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs deleted file mode 100644 index a85b11bcc6..0000000000 --- a/compiler/cmm/OptimizationFuel.hs +++ /dev/null @@ -1,142 +0,0 @@ -{-# LANGUAGE TypeFamilies #-} --- | Optimisation fuel is used to control the amount of work the optimiser does. --- --- Every optimisation step consumes a certain amount of fuel and stops when --- it runs out of fuel. This can be used e.g. to debug optimiser bugs: Run --- the optimiser with varying amount of fuel to find out the exact number of --- steps where a bug is introduced in the output. -module OptimizationFuel - ( OptimizationFuel, amountOfFuel, tankFilledTo, unlimitedFuel, anyFuelLeft, oneLessFuel - , OptFuelState, initOptFuelState - , FuelConsumer, FuelUsingMonad, FuelState - , fuelGet, fuelSet, lastFuelPass, setFuelPass - , fuelExhausted, fuelDec1, tryWithFuel - , runFuelIO, runInfiniteFuelIO, fuelConsumingPass - , FuelUniqSM - , liftUniq - ) -where - -import Data.IORef -import Control.Monad -import StaticFlags (opt_Fuel) -import UniqSupply -import Panic -import Util - -import Compiler.Hoopl -import Compiler.Hoopl.GHC (getFuel, setFuel) - -#include "HsVersions.h" - - --- We limit the number of transactions executed using a record of flags --- stored in an HscEnv. The flags store the name of the last optimization --- pass and the amount of optimization fuel remaining. -data OptFuelState = - OptFuelState { pass_ref :: IORef String - , fuel_ref :: IORef OptimizationFuel - } -initOptFuelState :: IO OptFuelState -initOptFuelState = - do pass_ref' <- newIORef "unoptimized program" - fuel_ref' <- newIORef (tankFilledTo opt_Fuel) - return OptFuelState {pass_ref = pass_ref', fuel_ref = fuel_ref'} - -type FuelConsumer a = OptimizationFuel -> (a, OptimizationFuel) - -tankFilledTo :: Int -> OptimizationFuel -amountOfFuel :: OptimizationFuel -> Int - -anyFuelLeft :: OptimizationFuel -> Bool -oneLessFuel :: OptimizationFuel -> OptimizationFuel -unlimitedFuel :: OptimizationFuel - -newtype OptimizationFuel = OptimizationFuel Int - deriving Show - -tankFilledTo = OptimizationFuel -amountOfFuel (OptimizationFuel f) = f - -anyFuelLeft (OptimizationFuel f) = f > 0 -oneLessFuel (OptimizationFuel f) = ASSERT (f > 0) (OptimizationFuel (f - 1)) -unlimitedFuel = OptimizationFuel infiniteFuel - -data FuelState = FuelState { fs_fuel :: OptimizationFuel, fs_lastpass :: String } -newtype FuelUniqSM a = FUSM { unFUSM :: FuelState -> UniqSM (a, FuelState) } - -fuelConsumingPass :: String -> FuelConsumer a -> FuelUniqSM a -fuelConsumingPass name f = do setFuelPass name - fuel <- fuelGet - let (a, fuel') = f fuel - fuelSet fuel' - return a - -runFuelIO :: OptFuelState -> FuelUniqSM a -> IO a -runFuelIO fs (FUSM f) = - do pass <- readIORef (pass_ref fs) - fuel <- readIORef (fuel_ref fs) - u <- mkSplitUniqSupply 'u' - let (a, FuelState fuel' pass') = initUs_ u $ f (FuelState fuel pass) - writeIORef (pass_ref fs) pass' - writeIORef (fuel_ref fs) fuel' - return a - --- ToDo: Do we need the pass_ref when we are doing infinite fueld --- transformations? -runInfiniteFuelIO :: OptFuelState -> FuelUniqSM a -> IO a -runInfiniteFuelIO fs (FUSM f) = - do pass <- readIORef (pass_ref fs) - u <- mkSplitUniqSupply 'u' - let (a, FuelState _ pass') = initUs_ u $ f (FuelState unlimitedFuel pass) - writeIORef (pass_ref fs) pass' - return a - -instance Monad FuelUniqSM where - FUSM f >>= k = FUSM (\s -> f s >>= \(a, s') -> unFUSM (k a) s') - return a = FUSM (\s -> return (a, s)) - -instance MonadUnique FuelUniqSM where - getUniqueSupplyM = liftUniq getUniqueSupplyM - getUniqueM = liftUniq getUniqueM - getUniquesM = liftUniq getUniquesM - -liftUniq :: UniqSM x -> FuelUniqSM x -liftUniq x = FUSM (\s -> x >>= (\u -> return (u, s))) - -class Monad m => FuelUsingMonad m where - fuelGet :: m OptimizationFuel - fuelSet :: OptimizationFuel -> m () - lastFuelPass :: m String - setFuelPass :: String -> m () - -fuelExhausted :: FuelUsingMonad m => m Bool -fuelExhausted = fuelGet >>= return . anyFuelLeft - -fuelDec1 :: FuelUsingMonad m => m () -fuelDec1 = fuelGet >>= fuelSet . oneLessFuel - -tryWithFuel :: FuelUsingMonad m => a -> m (Maybe a) -tryWithFuel r = do f <- fuelGet - if anyFuelLeft f then fuelSet (oneLessFuel f) >> return (Just r) - else return Nothing - -instance FuelUsingMonad FuelUniqSM where - fuelGet = extract fs_fuel - lastFuelPass = extract fs_lastpass - fuelSet fuel = FUSM (\s -> return ((), s { fs_fuel = fuel })) - setFuelPass pass = FUSM (\s -> return ((), s { fs_lastpass = pass })) - -extract :: (FuelState -> a) -> FuelUniqSM a -extract f = FUSM (\s -> return (f s, s)) - -instance FuelMonad FuelUniqSM where - getFuel = liftM amountOfFuel fuelGet - setFuel = fuelSet . tankFilledTo - --- Don't bother to checkpoint the unique supply; it doesn't matter -instance CheckpointMonad FuelUniqSM where - type Checkpoint FuelUniqSM = FuelState - checkpoint = FUSM $ \fuel -> return (fuel, fuel) - restart fuel = FUSM $ \_ -> return ((), fuel) - diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 183708c08e..dee6ee881e 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -146,8 +146,6 @@ pprConvention Slow = text "<slow-convention>" pprConvention GC = text "<gc-convention>" pprConvention PrimOpCall = text "<primop-call-convention>" pprConvention PrimOpReturn = text "<primop-ret-convention>" -pprConvention (Foreign c) = ppr c -pprConvention (Private {}) = text "<private-convention>" pprForeignConvention :: ForeignConvention -> SDoc pprForeignConvention (ForeignConvention c as rs) = ppr c <> ppr as <> ppr rs diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 7503127555..119f2b7239 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -237,12 +237,8 @@ pprLocalReg (LocalReg uniq rep) -- Stack areas pprArea :: Area -> SDoc -pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ] -pprArea (CallArea id) = pprAreaId id - -pprAreaId :: AreaId -> SDoc -pprAreaId Old = text "old" -pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ] +pprArea Old = text "old" +pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ] -- needs to be kept in syn with CmmExpr.hs.GlobalReg -- diff --git a/compiler/cmm/SMRep.lhs b/compiler/cmm/SMRep.lhs index ce30799bf6..8b3308ef97 100644 --- a/compiler/cmm/SMRep.lhs +++ b/compiler/cmm/SMRep.lhs @@ -21,6 +21,7 @@ module SMRep ( StgWord, StgHalfWord, hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS, WordOff, ByteOff, + roundUpToWords, -- * Closure repesentation SMRep(..), -- CmmInfo sees the rep; no one else does @@ -57,6 +58,7 @@ import FastString import Data.Char( ord ) import Data.Word +import Data.Bits \end{code} @@ -69,6 +71,9 @@ import Data.Word \begin{code} type WordOff = Int -- Word offset, or word count type ByteOff = Int -- Byte offset, or byte count + +roundUpToWords :: ByteOff -> ByteOff +roundUpToWords n = (n + (wORD_SIZE - 1)) .&. (complement (wORD_SIZE - 1)) \end{code} StgWord is a type representing an StgWord on the target platform. @@ -93,6 +98,7 @@ hALF_WORD_SIZE_IN_BITS = 32 #endif \end{code} + %************************************************************************ %* * \subsubsection[SMRep-datatype]{@SMRep@---storage manager representation} diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes index 81882c8c0e..0e6a2341f2 100644 --- a/compiler/cmm/cmm-notes +++ b/compiler/cmm/cmm-notes @@ -24,27 +24,10 @@ More notes (June 11) * Check in ClosureInfo:
-- NB: Results here should line up with the results of SMRep.rtsClosureType
-* Possible refactoring: Nuke AGraph in favour of
- mkIfThenElse :: Expr -> Graph -> Graph -> FCode Graph
- or even
- mkIfThenElse :: HasUniques m => Expr -> Graph -> Graph -> m Graph
- (Remmber that the .cmm file parser must use this function)
-
- or parameterise FCode over its envt; the CgState part seem useful for both
-
* "Remove redundant reloads" in CmmSpillReload should be redundant; since
insertLateReloads is now gone, every reload is reloading a live variable.
Test and nuke.
-* Stack layout is very like register assignment: find non-conflicting assigments.
- In particular we can use colouring or linear scan (etc).
-
- We'd fine-grain interference (on a word by word basis) to get maximum overlap.
- But that may make very big interference graphs. So linear scan might be
- more attactive.
-
- NB: linear scan does on-the-fly live range splitting.
-
* When stubbing dead slots be careful not to write into an area that
overlaps with an area that's in use. So stubbing needs to *follow*
stack layout.
@@ -81,10 +64,6 @@ Things to do: Old.Cmm. We should abstract it to work on both representations, it needs only to
convert a CmmInfoTable to [CmmStatic].
- - The MkGraph currenty uses a different semantics for <*> than Hoopl. Maybe
- we could convert codeGen/StgCmm* clients to the Hoopl's semantics?
- It's all deeply unsatisfactory.
-
- Improve performance of Hoopl.
A nofib comparison of -fasm vs -fnewcodegen nofib compilation parameters
@@ -101,18 +80,12 @@ Things to do: When compiling nofib, ghc-head + libraries compiled with -fnew-codegen
is 31.4% slower (http://fox.auryn.cz/msrc/0517_hoopl/32bit.oldghcoldgen.zipghcoldgen.txt).
- So we generate a bit better code, but it takes us longer!
-
EZY: Also importantly, Hoopl uses dramatically more memory than the
old code generator.
- Are all blockToNodeList and blockOfNodeList really needed? Maybe we could
splice blocks instead?
- In the CmmContFlowOpt.blockConcat, using Dataflow seems too clumsy. Still,
- a block catenation function would be probably nicer than blockToNodeList
- / blockOfNodeList combo.
-
- lowerSafeForeignCall seems too lowlevel. Just use Dataflow. After that
delete splitEntrySeq from HooplUtils.
@@ -129,10 +102,6 @@ Things to do: - NB that CmmProcPoint line 283 has a hack that works around a GADT-related
bug in 6.10.
- - SDM (2010-02-26) can we remove the Foreign constructor from Convention?
- Reason: we never generate code for a function with the Foreign
- calling convention, and the code for calling foreign calls is generated
-
- AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
EZY (2011-04-16): The mini-inliner has been generalized and ported,
but the constant folding and other optimizations need to still be
@@ -161,16 +130,6 @@ Things to do: - Top-level SRT threading is a bit ugly
- - Add type/newtype for CmmModule = [CmmGroup] -- A module
- CmmGroup = [CmmTop] -- A .o file
- CmmTop = Proc | Data -- A procedure or data
-
- - This is a *change*: currently a CmmGroup is one function's-worth of code
- regardless of SplitObjs. Question: can we *always* generate M.o if there
- is just one element in the list (rather than M/M1.o, M/M2.o etc)
-
- One SRT per group.
-
- See "CAFs" below; we want to totally refactor the way SRTs are calculated
- Pull out Areas into its own module
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 6c77255a62..7cdb1b6f7e 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -36,7 +36,7 @@ import CgBindery import CgCallConv import CgUtils import CgMonad -import CmmBuildInfoTables +import CmmUtils import OldCmm import CLabel @@ -66,10 +66,9 @@ emitClosureCodeAndInfoTable cl_info args body -- Convert from 'ClosureInfo' to 'CmmInfo'. -- Not used for return points. (The 'smRepClosureTypeInt' call would panic.) -mkCmmInfo :: ClosureInfo -> FCode CmmInfo +mkCmmInfo :: ClosureInfo -> FCode CmmInfoTable mkCmmInfo cl_info - = return (CmmInfo gc_target Nothing $ - CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info, + = return (CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info, cit_rep = closureSMRep cl_info, cit_prof = prof, cit_srt = closureSRT cl_info }) @@ -79,14 +78,6 @@ mkCmmInfo cl_info ty_descr_w8 = stringToWord8s (closureTypeDescr cl_info) val_descr_w8 = stringToWord8s (closureValDescr cl_info) - -- The gc_target is to inform the CPS pass when it inserts a stack check. - -- Since that pass isn't used yet we'll punt for now. - -- When the CPS pass is fully integrated, this should - -- be replaced by the label that any heap check jumped to, - -- so that branch can be shared by both the heap (from codeGen) - -- and stack checks (from the CPS pass). - gc_target = panic "TODO: gc_target" - ------------------------------------------------------------------------- -- -- Generating the info table and code for a return point @@ -105,8 +96,7 @@ emitReturnTarget name stmts ; blks <- cgStmtsToBlocks stmts ; frame <- mkStackLayout ; let smrep = mkStackRep (mkLiveness frame) - info = CmmInfo gc_target Nothing info_tbl - info_tbl = CmmInfoTable { cit_lbl = info_lbl + info = CmmInfoTable { cit_lbl = info_lbl , cit_prof = NoProfilingInfo , cit_rep = smrep , cit_srt = srt_info } @@ -118,14 +108,6 @@ emitReturnTarget name stmts info_lbl = mkReturnInfoLabel uniq entry_lbl = mkReturnPtLabel uniq - -- The gc_target is to inform the CPS pass when it inserts a stack check. - -- Since that pass isn't used yet we'll punt for now. - -- When the CPS pass is fully integrated, this should - -- be replaced by the label that any heap check jumped to, - -- so that branch can be shared by both the heap (from codeGen) - -- and stack checks (from the CPS pass). - gc_target = panic "TODO: gc_target" - -- Build stack layout information from the state of the 'FCode' monad. -- Should go away once 'codeGen' starts using the CPS conversion -- pass to handle the stack. Until then, this is really just @@ -375,8 +357,8 @@ funInfoTable info_ptr emitInfoTableAndCode :: CLabel -- Label of entry or ret - -> CmmInfo -- ...the info table - -> [CmmFormal] -- ...args + -> CmmInfoTable -- ...the info table + -> [CmmFormal] -- ...args -> [CmmBasicBlock] -- ...and body -> Code diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs index b96898f591..71da9e9ae0 100644 --- a/compiler/codeGen/CgMonad.lhs +++ b/compiler/codeGen/CgMonad.lhs @@ -13,8 +13,8 @@ stuff fits into the Big Picture. module CgMonad ( Code, FCode, - initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, - returnFC, fixC, fixC_, checkedAbsC, + initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + returnFC, fixC, fixC_, checkedAbsC, stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC, newUnique, newUniqSupply, @@ -386,11 +386,12 @@ instance Monad FCode where {-# INLINE thenFC #-} {-# INLINE returnFC #-} -initC :: DynFlags -> Module -> FCode a -> IO a -initC dflags mod (FCode code) = do - uniqs <- mkSplitUniqSupply 'c' - case code (initCgInfoDown dflags mod) (initCgState uniqs) of - (res, _) -> return res +initC :: IO CgState +initC = do { uniqs <- mkSplitUniqSupply 'c' + ; return (initCgState uniqs) } + +runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) +runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st returnFC :: a -> FCode a returnFC val = FCode $ \_ state -> (val, state) @@ -708,7 +709,7 @@ emitDecl decl = do state <- getState setState $ state { cgs_tops = cgs_tops state `snocOL` decl } -emitProc :: CmmInfo -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code +emitProc :: CmmInfoTable -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code emitProc info lbl [] blocks = do let proc_block = CmmProc info lbl (ListGraph blocks) state <- getState @@ -720,7 +721,7 @@ emitSimpleProc :: CLabel -> Code -> Code emitSimpleProc lbl code = do stmts <- getCgStmts code blks <- cgStmtsToBlocks stmts - emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] blks + emitProc CmmNonInfoTable lbl [] blks -- Get all the CmmTops (there should be no stmts) -- Return a single Cmm which may be split from other Cmms by diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs index ce12d43bbf..c9b2bf8ab0 100644 --- a/compiler/codeGen/CodeGen.lhs +++ b/compiler/codeGen/CodeGen.lhs @@ -30,7 +30,7 @@ import CgHpc import CLabel import OldCmm -import OldPprCmm +import OldPprCmm () import StgSyn import PrelNames @@ -45,40 +45,52 @@ import TyCon import Module import ErrUtils import Panic +import Outputable import Util +import OrdList +import Stream (Stream, liftIO) +import qualified Stream + +import Data.IORef + codeGen :: DynFlags -> Module -- Module we are compiling -> [TyCon] -- Type constructors -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs -> HpcInfo -- Profiling info - -> IO [CmmGroup] + -> Stream IO CmmGroup () -- N.B. returning '[Cmm]' and not 'Cmm' here makes it -- possible for object splitting to split up the -- pieces later. -codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info = do - showPass dflags "CodeGen" - code_stuff <- - initC dflags this_mod $ do - cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds - cmm_tycons <- mapM cgTyCon data_tycons - cmm_init <- getCmm (mkModuleInit dflags cost_centre_info this_mod hpc_info) - return (cmm_init : cmm_binds ++ cmm_tycons) - -- Put datatype_stuff after code_stuff, because the - -- datatype closure table (for enumeration types) to - -- (say) PrelBase_True_closure, which is defined in - -- code_stuff - - -- Note [codegen-split-init] the cmm_init block must - -- come FIRST. This is because when -split-objs is on - -- we need to combine this block with its - -- initialisation routines; see Note - -- [pipeline-split-init]. - - dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff) - return code_stuff +codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info + + = do { liftIO $ showPass dflags "CodeGen" + + ; cgref <- liftIO $ newIORef =<< initC + ; let cg :: FCode CmmGroup -> Stream IO CmmGroup () + cg fcode = do + cmm <- liftIO $ do + st <- readIORef cgref + let (a,st') = runC dflags this_mod st fcode + + dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" $ ppr a + + -- NB. stub-out cgs_tops and cgs_stmts. This fixes + -- a big space leak. DO NOT REMOVE! + writeIORef cgref $! st'{ cgs_tops = nilOL, + cgs_stmts = nilOL } + return a + Stream.yield cmm + + ; cg (getCmm $ mkModuleInit dflags cost_centre_info this_mod hpc_info) + + ; mapM_ (cg . getCmm . cgTopBinding dflags) stg_binds + + ; mapM_ (cg . cgTyCon) data_tycons + } mkModuleInit :: DynFlags diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index 17a7062559..696af8107e 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -46,6 +46,13 @@ import TyCon import Module import ErrUtils import Outputable +import Stream + +import OrdList +import MkGraph + +import Data.IORef +import Control.Monad (when) import Util codeGen :: DynFlags @@ -54,39 +61,51 @@ codeGen :: DynFlags -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs -> HpcInfo - -> IO [CmmGroup] -- Output + -> Stream IO CmmGroup () -- Output as a stream, so codegen can + -- be interleaved with output codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info - = do { showPass dflags "New CodeGen" - --- Why? --- ; mapM_ (\x -> seq x (return ())) data_tycons - - ; code_stuff <- initC dflags this_mod $ do - { cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds - ; cmm_tycons <- mapM cgTyCon data_tycons - ; cmm_init <- getCmm (mkModuleInit cost_centre_info - this_mod hpc_info) - ; return (cmm_init : cmm_binds ++ cmm_tycons) - } + = do { liftIO $ showPass dflags "New CodeGen" + + -- cg: run the code generator, and yield the resulting CmmGroup + -- Using an IORef to store the state is a bit crude, but otherwise + -- we would need to add a state monad layer. + ; cgref <- liftIO $ newIORef =<< initC + ; let cg :: FCode () -> Stream IO CmmGroup () + cg fcode = do + cmm <- liftIO $ do + st <- readIORef cgref + let (a,st') = runC dflags this_mod st (getCmm fcode) + + -- NB. stub-out cgs_tops and cgs_stmts. This fixes + -- a big space leak. DO NOT REMOVE! + writeIORef cgref $! st'{ cgs_tops = nilOL, + cgs_stmts = mkNop } + return a + yield cmm + + -- Note [codegen-split-init] the cmm_init block must come + -- FIRST. This is because when -split-objs is on we need to + -- combine this block with its initialisation routines; see + -- Note [pipeline-split-init]. + ; cg (mkModuleInit cost_centre_info this_mod hpc_info) + + ; mapM_ (cg . cgTopBinding dflags) stg_binds + -- Put datatype_stuff after code_stuff, because the -- datatype closure table (for enumeration types) to -- (say) PrelBase_True_closure, which is defined in -- code_stuff - - -- N.B. returning '[Cmm]' and not 'Cmm' here makes it - -- possible for object splitting to split up the - -- pieces later. - - -- Note [codegen-split-init] the cmm_init block must - -- come FIRST. This is because when -split-objs is on - -- we need to combine this block with its - -- initialisation routines; see Note - -- [pipeline-split-init]. - - ; return code_stuff } - + ; let do_tycon tycon = do + -- Generate a table of static closures for an + -- enumeration type Note that the closure pointers are + -- tagged. + when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon) + mapM_ (cg . cgDataCon) (tyConDataCons tycon) + + ; mapM_ do_tycon data_tycons + } --------------------------------------------------------------- -- Top-level bindings @@ -108,7 +127,7 @@ cgTopBinding dflags (StgNonRec id rhs, _srts) ; info <- cgTopRhs id' rhs ; addBindC (cg_id info) info -- Add the *un-externalised* Id to the envt, -- so we find it when we look up occurrences - } + } cgTopBinding dflags (StgRec pairs, _srts) = do { let (bndrs, rhss) = unzip pairs @@ -117,7 +136,7 @@ cgTopBinding dflags (StgRec pairs, _srts) ; fixC_(\ new_binds -> do { addBindsC new_binds ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) - ; return () } + ; return () } -- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs -- to enclose the listFCs in cgTopBinding, but that tickled the @@ -187,65 +206,19 @@ mkModuleInit cost_centre_info this_mod hpc_info ; emitDecl (CmmData Data (Statics (mkPlainModuleInitLabel this_mod) [])) } + --------------------------------------------------------------- -- Generating static stuff for algebraic data types --------------------------------------------------------------- -{- [These comments are rather out of date] - -Macro Kind of constructor -CONST_INFO_TABLE@ Zero arity (no info -- compiler uses static closure) -CHARLIKE_INFO_TABLE Charlike (no info -- compiler indexes fixed array) -INTLIKE_INFO_TABLE Intlike; the one macro generates both info tbls -SPEC_INFO_TABLE SPECish, and bigger than or equal to MIN_UPD_SIZE -GEN_INFO_TABLE GENish (hence bigger than or equal to MIN_UPD_SIZE@) -Possible info tables for constructor con: - -* _con_info: - Used for dynamically let(rec)-bound occurrences of - the constructor, and for updates. For constructors - which are int-like, char-like or nullary, when GC occurs, - the closure tries to get rid of itself. - -* _static_info: - Static occurrences of the constructor macro: STATIC_INFO_TABLE. - -For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; -it's place is taken by the top level defn of the constructor. - -For charlike and intlike closures there is a fixed array of static -closures predeclared. --} - -cgTyCon :: TyCon -> FCode CmmGroup -- All constructors merged together -cgTyCon tycon - = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) - - -- Generate a table of static closures for an enumeration type - -- Put the table after the data constructor decls, because the - -- datatype closure table (for enumeration types) - -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff - -- Note that the closure pointers are tagged. - - -- N.B. comment says to put table after constructor decls, but - -- code puts it before --- NR 16 Aug 2007 - ; extra <- cgEnumerationTyCon tycon - - ; return (concat (extra ++ constrs)) - } - -cgEnumerationTyCon :: TyCon -> FCode [CmmGroup] +cgEnumerationTyCon :: TyCon -> FCode () cgEnumerationTyCon tycon - | isEnumerationTyCon tycon - = do { tbl <- getCmm $ - emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) - [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) - (tagForCon con) - | con <- tyConDataCons tycon] - ; return [tbl] } - | otherwise - = return [] + = emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) + (tagForCon con) + | con <- tyConDataCons tycon] + cgDataCon :: DataCon -> FCode () -- Generate the entry code, info tables, and (for niladic constructor) diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 3b166e3b6a..f98283f737 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -76,17 +76,17 @@ cgTopRhsClosure :: Id cgTopRhsClosure id ccs _ upd_flag srt args body = do { -- LAY OUT THE OBJECT let name = idName id - ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args - ; srt_info <- getSRTInfo srt + ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args + ; has_srt <- getSRTInfo srt ; mod_name <- getModuleName ; dflags <- getDynFlags ; let descr = closureDescription dflags mod_name name - closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr - closure_label = mkLocalClosureLabel name (idCafInfo id) + closure_info = mkClosureInfo True id lf_info 0 0 descr + closure_label = mkLocalClosureLabel name (idCafInfo id) cg_id_info = litIdInfo id lf_info (CmmLabel closure_label) caffy = idCafInfo id info_tbl = mkCmmInfo closure_info -- XXX short-cut - closure_rep = mkStaticClosureFields info_tbl ccs caffy [] + closure_rep = mkStaticClosureFields info_tbl ccs caffy has_srt [] -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) ; emitDataLits closure_label closure_rep @@ -110,7 +110,7 @@ cgBind (StgNonRec name rhs) ; emit (init <*> body) } cgBind (StgRec pairs) - = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits -> + = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits -> do { addBindsC $ fst new_binds_inits -- avoid premature deconstruction ; liftM unzip $ listFCs [ cgRhs b e | (b,e) <- pairs ] }) ; addBindsC new_binds @@ -162,8 +162,8 @@ cgRhs :: Id -> StgRhs -> FCode (CgIdInfo, CmmAGraph) cgRhs name (StgRhsCon cc con args) = buildDynCon name cc con args -cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) - = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body +cgRhs name (StgRhsClosure cc bi fvs upd_flag _srt args body) + = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag args body ------------------------------------------------------------------------ -- Non-constructor right hand sides @@ -171,7 +171,7 @@ cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo -> [NonVoid Id] -- Free vars - -> UpdateFlag -> SRT + -> UpdateFlag -> [Id] -- Args -> StgExpr -> FCode (CgIdInfo, CmmAGraph) @@ -215,8 +215,7 @@ for semi-obvious reasons. mkRhsClosure bndr cc bi [NonVoid the_fv] -- Just one free var upd_flag -- Updatable thunk - _srt - [] -- A thunk + [] -- A thunk body@(StgCase (StgApp scrutinee [{-no args-}]) _ _ _ _ -- ignore uniq, etc. (AlgAlt _) @@ -247,8 +246,7 @@ mkRhsClosure bndr cc bi mkRhsClosure bndr cc bi fvs upd_flag - _srt - [] -- No args; a thunk + [] -- No args; a thunk body@(StgApp fun_id args) | args `lengthIs` (arity-1) @@ -269,7 +267,7 @@ mkRhsClosure bndr cc bi arity = length fvs ---------- Default case ------------------ -mkRhsClosure bndr cc _ fvs upd_flag srt args body +mkRhsClosure bndr cc _ fvs upd_flag args body = do { -- LAY OUT THE OBJECT -- If the binder is itself a free variable, then don't store -- it in the closure. Instead, just bind it to Node on entry. @@ -288,17 +286,16 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body -- MAKE CLOSURE INFO FOR THIS CLOSURE ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args ; mod_name <- getModuleName - ; c_srt <- getSRTInfo srt - ; dflags <- getDynFlags - ; let name = idName bndr - descr = closureDescription dflags mod_name name - fv_details :: [(NonVoid Id, VirtualHpOffset)] + ; dflags <- getDynFlags + ; let name = idName bndr + descr = closureDescription dflags mod_name name + fv_details :: [(NonVoid Id, VirtualHpOffset)] (tot_wds, ptr_wds, fv_details) = mkVirtHeapOffsets (isLFThunk lf_info) (addIdReps (map stripNV reduced_fvs)) closure_info = mkClosureInfo False -- Not static bndr lf_info tot_wds ptr_wds - c_srt descr + descr -- BUILD ITS INFO TABLE AND CODE ; forkClosureBody $ @@ -345,8 +342,7 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload descr = closureDescription dflags mod_name (idName bndr) closure_info = mkClosureInfo False -- Not static bndr lf_info tot_wds ptr_wds - NoC_SRT -- No SRT for a std-form closure - descr + descr -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body ; let use_cc = curCCS; blame_cc = curCCS @@ -546,10 +542,10 @@ emitBlackHoleCode is_single_entry = do whenC eager_blackholing $ do tickyBlackHole (not is_single_entry) - emit (mkStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize) - (CmmReg (CmmGlobal CurrentTSO))) + emitStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize) + (CmmReg (CmmGlobal CurrentTSO)) emitPrimCall [] MO_WriteBarrier [] - emit (mkStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo))) + emitStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo)) setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode () -- Nota Bene: this function does not change Node (even if it's a CAF), @@ -598,7 +594,7 @@ pushUpdateFrame es body offset <- foldM push updfr es withUpdFrameOff offset body where push off e = - do emit (mkStore (CmmStackSlot (CallArea Old) base) e) + do emitStore (CmmStackSlot Old base) e return base where base = off + widthInBytes (cmmExprWidth e) @@ -666,13 +662,14 @@ link_caf _is_upd = do -- node is live, so save it. -- see Note [atomic CAF entry] in rts/sm/Storage.c - ; emit $ mkCmmIfThen - (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit]) $ + ; updfr <- getUpdFrameOff + ; emit =<< mkCmmIfThen + (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit]) -- re-enter R1. Doing this directly is slightly dodgy; we're -- assuming lots of things, like the stack pointer hasn't -- moved since we entered the CAF. - let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in - mkJump target [] 0 + (let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in + mkJump target [] updfr) ; return hp_rel } diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 483a67c1fa..8023abddec 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -657,7 +657,6 @@ data ClosureInfo -- the rest is just an unpacked CmmInfoTable. closureInfoLabel :: !CLabel, closureSMRep :: !SMRep, -- representation used by storage mgr - closureSRT :: !C_SRT, -- What SRT applies to this closure closureProf :: !ProfilingInfo } @@ -667,7 +666,7 @@ mkCmmInfo ClosureInfo {..} = CmmInfoTable { cit_lbl = closureInfoLabel , cit_rep = closureSMRep , cit_prof = closureProf - , cit_srt = closureSRT } + , cit_srt = NoC_SRT } -------------------------------------- @@ -678,16 +677,14 @@ mkClosureInfo :: Bool -- Is static -> Id -> LambdaFormInfo -> Int -> Int -- Total and pointer words - -> C_SRT - -> String -- String descriptor + -> String -- String descriptor -> ClosureInfo -mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info val_descr +mkClosureInfo is_static id lf_info tot_wds ptr_wds val_descr = ClosureInfo { closureName = name, closureLFInfo = lf_info, - closureInfoLabel = info_lbl, - closureSMRep = sm_rep, -- These four fields are a - closureSRT = srt_info, -- CmmInfoTable - closureProf = prof } -- --- + closureInfoLabel = info_lbl, -- These three fields are + closureSMRep = sm_rep, -- (almost) an info table + closureProf = prof } -- (we don't have an SRT yet) where name = idName id sm_rep = mkHeapRep is_static ptr_wds nonptr_wds (lfClosureType lf_info) @@ -920,15 +917,21 @@ cafBlackHoleInfoTable , cit_prof = NoProfilingInfo , cit_srt = NoC_SRT } -staticClosureNeedsLink :: CmmInfoTable -> Bool +staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool -- A static closure needs a link field to aid the GC when traversing -- the static closure graph. But it only needs such a field if either -- a) it has an SRT -- b) it's a constructor with one or more pointer fields -- In case (b), the constructor's fields themselves play the role -- of the SRT. -staticClosureNeedsLink info_tbl@CmmInfoTable{ cit_rep = smrep } +-- +-- At this point, the cit_srt field has not been calculated (that +-- happens right at the end of the Cmm pipeline), but we do have the +-- VarSet of CAFs that CoreToStg attached, and if that is empty there +-- will definitely not be an SRT. +-- +staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep } | isConRep smrep = not (isStaticNoCafCon smrep) - | otherwise = needsSRT (cit_srt info_tbl) -staticClosureNeedsLink _ = False + | otherwise = has_srt -- needsSRT (cit_srt info_tbl) +staticClosureNeedsLink _ _ = False diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index a7af5662e9..c348570a54 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -92,6 +92,7 @@ cgTopRhsCon id con args info_tbl dontCareCCS -- Because it's static data caffy -- Has CAF refs + False -- no SRT payload -- BUILD THE OBJECT diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index f128e3ad60..2edd09da12 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -27,7 +27,7 @@ module StgCmmEnv ( bindArgsToRegs, bindToReg, rebindToReg, bindArgToReg, idToReg, - getArgAmode, getNonVoidArgAmodes, + getArgAmode, getNonVoidArgAmodes, getCgIdInfo, maybeLetNoEscape, ) where @@ -212,7 +212,6 @@ getNonVoidArgAmodes (arg:args) ; amodes <- getNonVoidArgAmodes args ; return ( amode : amodes ) } - ------------------------------------------------------------------------ -- Interface functions for binding and re-binding names ------------------------------------------------------------------------ diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 9faad02f46..68bfb6d9fe 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -77,7 +77,7 @@ cgExpr (StgLetNoEscape _ _ binds expr) = ; let join_id = mkBlockId (uniqFromSupply us) ; cgLneBinds join_id binds ; cgExpr expr - ; emit $ mkLabel join_id} + ; emitLabel join_id} cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) = cgCase expr bndr srt alt_type alts @@ -130,7 +130,7 @@ cgLetNoEscapeRhs cgLetNoEscapeRhs join_id local_cc bndr rhs = do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info - ; emit (outOfLine $ mkLabel bid <*> rhs_body <*> mkBranch join_id) + ; emitOutOfLine bid $ rhs_body <*> mkBranch join_id ; return info } @@ -278,21 +278,69 @@ Hence: two basic plans for data GcPlan = GcInAlts -- Put a GC check at the start the case alternatives, [LocalReg] -- which binds these registers - SRT -- using this SRT - | NoGcInAlts -- The scrutinee is a primitive value, or a call to a + | NoGcInAlts -- The scrutinee is a primitive value, or a call to a -- primitive op which does no GC. Absorb the allocation -- of the case alternative(s) into the upstream check ------------------------------------- --- See Note [case on Bool] cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () + +cgCase (StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts + | isEnumerationTyCon tycon -- Note [case on bool] + = do { tag_expr <- do_enum_primop op args + + -- If the binder is not dead, convert the tag to a constructor + -- and assign it. + ; when (not (isDeadBinder bndr)) $ do + { tmp_reg <- bindArgToReg (NonVoid bndr) + ; emitAssign (CmmLocal tmp_reg) + (tagToClosure tycon tag_expr) } + + ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts Nothing + (NonVoid bndr) alts + ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1) + } + where + do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr + do_enum_primop TagToEnumOp [arg] -- No code! + = getArgAmode (NonVoid arg) + do_enum_primop primop args + = do tmp <- newTemp bWord + cgPrimOp [tmp] primop args + return (CmmReg (CmmLocal tmp)) + {- -cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] - | isBoolTy (idType bndr) - , isDeadBndr bndr - = +Note [case on bool] + +This special case handles code like + + case a <# b of + True -> + False -> + +If we let the ordinary case code handle it, we'll get something like + + tmp1 = a < b + tmp2 = Bool_closure_tbl[tmp1] + if (tmp2 & 7 != 0) then ... // normal tagged case + +but this junk won't optimise away. What we really want is just an +inline comparison: + + if (a < b) then ... + +So we add a special case to generate + + tmp1 = a < b + if (tmp1 == 0) then ... + +and later optimisations will further improve this. + +We should really change all these primops to return Int# instead, that +would make this special case go away. -} + -- Note [ticket #3132]: we might be looking at a case of a lifted Id -- that was cast to an unlifted type. The Id will always be bottom, -- but we don't want the code generator to fall over here. If we @@ -319,7 +367,7 @@ cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts do { when (not reps_compatible) $ panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" ; v_info <- getCgIdInfo v - ; emit (mkAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)) + ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info) ; _ <- bindArgsToRegs [NonVoid bndr] ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts } where @@ -330,8 +378,11 @@ cgCase scrut@(StgApp v []) _ _ (PrimAlt _) _ do { mb_cc <- maybeSaveCostCentre True ; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut) ; restoreCurrentCostCentre mb_cc - ; emit $ mkComment $ mkFastString "should be unreachable code" - ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)} + ; emitComment $ mkFastString "should be unreachable code" + ; l <- newLabelC + ; emitLabel l + ; emit (mkBranch l) + } {- case seq# a s of v @@ -349,7 +400,7 @@ cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr srt alt_type alts = -- handle seq#, same return convention as vanilla 'a'. cgCase (StgApp a []) bndr srt alt_type alts -cgCase scrut bndr srt alt_type alts +cgCase scrut bndr _srt alt_type alts = -- the general case do { up_hp_usg <- getVirtHp -- Upstream heap usage ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts @@ -359,7 +410,7 @@ cgCase scrut bndr srt alt_type alts | isSingleton alts = False | up_hp_usg > 0 = False | otherwise = True - gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts + gc_plan = if gcInAlts then GcInAlts alt_regs else NoGcInAlts ; mb_cc <- maybeSaveCostCentre simple_scrut ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) @@ -417,14 +468,14 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode () -- At this point the result of the case are in the binders cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) + = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs) cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) + = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs) -- Here bndrs are *already* in scope, so don't rebind them cgAlts gc_plan bndr (PrimAlt _) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + = do { tagged_cmms <- cgAltRhss gc_plan Nothing bndr alts ; let bndr_reg = CmmLocal (idToReg bndr) (DEFAULT,deflt) = head tagged_cmms @@ -433,20 +484,17 @@ cgAlts gc_plan bndr (PrimAlt _) alts tagged_cmms' = [(lit,code) | (LitAlt lit, code) <- tagged_cmms] - ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) } + ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt } cgAlts gc_plan bndr (AlgAlt tycon) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts - + = do { retry_lbl <- newLabelC + ; emitLabel retry_lbl -- Note [alg-alt heap checks] + + ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan (Just retry_lbl) + bndr alts + ; let fam_sz = tyConFamilySize tycon bndr_reg = CmmLocal (idToReg bndr) - mb_deflt = case tagged_cmms of - ((DEFAULT,rhs) : _) -> Just rhs - _other -> Nothing - -- DEFAULT is always first, if present - - branches = [ (dataConTagZ con, cmm) - | (DataAlt con, cmm) <- tagged_cmms ] -- Is the constructor tag in the node reg? ; if isSmallFamily fam_sz @@ -467,23 +515,68 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts cgAlts _ _ _ _ = panic "cgAlts" -- UbxTupAlt and PolyAlt have only one alternative + +-- Note [alg-alt heap check] +-- +-- In an algebraic case with more than one alternative, we will have +-- code like +-- +-- L0: +-- x = R1 +-- goto L1 +-- L1: +-- if (x & 7 >= 2) then goto L2 else goto L3 +-- L2: +-- Hp = Hp + 16 +-- if (Hp > HpLim) then goto L4 +-- ... +-- L4: +-- call gc() returns to L5 +-- L5: +-- x = R1 +-- goto L1 + ------------------- -cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] -cgAltRhss gc_plan bndr alts +cgAlgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] + -> FCode ( Maybe CmmAGraph + , [(ConTagZ, CmmAGraph)] ) +cgAlgAltRhss gc_plan retry_lbl bndr alts + = do { tagged_cmms <- cgAltRhss gc_plan retry_lbl bndr alts + + ; let { mb_deflt = case tagged_cmms of + ((DEFAULT,rhs) : _) -> Just rhs + _other -> Nothing + -- DEFAULT is always first, if present + + ; branches = [ (dataConTagZ con, cmm) + | (DataAlt con, cmm) <- tagged_cmms ] + } + + ; return (mb_deflt, branches) + } + + +------------------- +cgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] + -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan retry_lbl bndr alts = forkAlts (map cg_alt alts) where base_reg = idToReg bndr cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) cg_alt (con, bndrs, _uses, rhs) = getCodeR $ - maybeAltHeapCheck gc_plan $ + maybeAltHeapCheck gc_plan retry_lbl $ do { _ <- bindConArgs con base_reg bndrs ; cgExpr rhs ; return con } -maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a -maybeAltHeapCheck NoGcInAlts code = code -maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code +maybeAltHeapCheck :: GcPlan -> Maybe BlockId -> FCode a -> FCode a +maybeAltHeapCheck NoGcInAlts _ code = code +maybeAltHeapCheck (GcInAlts regs) mlbl code = + case mlbl of + Nothing -> altHeapCheck regs code + Just retry_lbl -> altHeapCheckReturnsTo regs retry_lbl code ----------------------------------------------------------------------------- -- Tail calls @@ -517,8 +610,8 @@ cgIdApp fun_id args cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () cgLneJump blk_id lne_regs args -- Join point; discard sequel = do { cmm_args <- getNonVoidArgAmodes args - ; emit (mkMultiAssign lne_regs cmm_args - <*> mkBranch blk_id) } + ; emitMultiAssign lne_regs cmm_args + ; emit (mkBranch blk_id) } cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () cgTailCall fun_id fun_info args = do @@ -529,27 +622,21 @@ cgTailCall fun_id fun_info args = do ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? EnterIt -> ASSERT( null args ) -- Discarding arguments - do { let fun' = CmmLoad fun (cmmExprType fun) - ; [ret,call] <- forkAlts [ - getCode $ emitReturn [fun], -- Is tagged; no need to untag - getCode $ do -- emit (mkAssign nodeReg fun) - emitCall (NativeNodeCall, NativeReturn) - (entryCode fun') [fun]] -- Not tagged - ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) } - - SlowCall -> do -- A slow function call via the RTS apply routines + emitEnter fun + + SlowCall -> do -- A slow function call via the RTS apply routines { tickySlowCall lf_info args - ; emit $ mkComment $ mkFastString "slowCall" + ; emitComment $ mkFastString "slowCall" ; slowCall fun args } -- A direct function call (possibly with some left-over arguments) DirectEntry lbl arity -> do { tickyDirectCall arity args ; if node_points then - do emit $ mkComment $ mkFastString "directEntry" - emit (mkAssign nodeReg fun) + do emitComment $ mkFastString "directEntry" + emitAssign nodeReg fun directCall lbl arity args - else do emit $ mkComment $ mkFastString "directEntry else" + else do emitComment $ mkFastString "directEntry else" directCall lbl arity args } JumpToIt {} -> panic "cgTailCall" -- ??? @@ -561,33 +648,67 @@ cgTailCall fun_id fun_info args = do node_points = nodeMustPointToIt lf_info -{- Note [case on Bool] - ~~~~~~~~~~~~~~~~~~~ -A case on a Boolean value does two things: - 1. It looks up the Boolean in a closure table and assigns the - result to the binder. - 2. It branches to the True or False case through analysis - of the closure assigned to the binder. -But the indirection through the closure table is unnecessary -if the assignment to the binder will be dead code (use isDeadBndr). +emitEnter :: CmmExpr -> FCode () +emitEnter fun = do + { adjustHpBackwards + ; sequel <- getSequel + ; updfr_off <- getUpdFrameOff + ; case sequel of + -- For a return, we have the option of generating a tag-test or + -- not. If the value is tagged, we can return directly, which + -- is quicker than entering the value. This is a code + -- size/speed trade-off: when optimising for speed rather than + -- size we could generate the tag test. + -- + -- Right now, we do what the old codegen did, and omit the tag + -- test, just generating an enter. + Return _ -> do + { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg + ; emit $ mkForeignJump NativeNodeCall entry + [cmmUntag fun] updfr_off + } + + -- The result will be scrutinised in the sequel. This is where + -- we generate a tag-test to avoid entering the closure if + -- possible. + -- + -- The generated code will be something like this: + -- + -- R1 = fun -- copyout + -- if (fun & 7 != 0) goto Lcall else goto Lret + -- Lcall: + -- call [fun] returns to Lret + -- Lret: + -- fun' = R1 -- copyin + -- ... + -- + -- Note in particular that the label Lret is used as a + -- destination by both the tag-test and the call. This is + -- becase Lret will necessarily be a proc-point, and we want to + -- ensure that we generate only one proc-point for this + -- sequence. + -- + AssignTo res_regs _ -> do + { lret <- newLabelC + ; lcall <- newLabelC + ; let area = Young lret + ; let (off, copyin) = copyInOflow NativeReturn area res_regs + (outArgs, copyout) = copyOutOflow NativeNodeCall Call area + [fun] updfr_off (0,[]) + -- refer to fun via nodeReg after the copyout, to avoid having + -- both live simultaneously; this sometimes enables fun to be + -- inlined in the RHS of the R1 assignment. + ; let entry = entryCode (closureInfoPtr (CmmReg nodeReg)) + the_call = toCall entry (Just lret) updfr_off off outArgs + ; emit $ + copyout <*> + mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*> + outOfLine lcall the_call <*> + mkLabel lret <*> + copyin + } + } -The following example illustrates how badly the code turns out: - STG: - case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 { - GHC.Types.False -> <true code> // sbH8 dead - GHC.Types.True -> <false code> // sbH8 dead - }; - Cmm: - _s7HD::F64 = F64[_sbH7::I64 + 7]; // MidAssign - _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64); // MidAssign - // emitReturn // MidComment - _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)]; // MidAssign - _ccsX::I64 = _sbH8::I64 & 7; // MidAssign - if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI; // LastCondBranch - -The assignments to _sbH8 and _ccsX are completely unnecessary. -Instead, we should branch based on the value of _ccsW. --} {- Note [Better Alt Heap Checks] If two function calls can share a return point, then they will also diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 5bc0f7af4e..c67e0e0c95 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -22,6 +22,7 @@ import StgCmmEnv import StgCmmMonad import StgCmmUtils import StgCmmClosure +import StgCmmLayout import BlockId import Cmm @@ -45,15 +46,16 @@ import Control.Monad -- Code generation for Foreign Calls ----------------------------------------------------------------------------- -cgForeignCall :: [LocalReg] -- r1,r2 where to put the results - -> [ForeignHint] - -> ForeignCall -- the op +-- | emit code for a foreign call, and return the results to the sequel. +-- +cgForeignCall :: ForeignCall -- the op -> [StgArg] -- x,y arguments + -> Type -- result type -> FCode () --- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z ) -cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args +cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty = do { cmm_args <- getFCallArgs stg_args + ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty ; let ((call_args, arg_hints), cmm_target) = case target of StaticTarget _ _ False -> @@ -63,7 +65,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a = case mPkgId of Nothing -> ForeignLabelInThisPackage Just pkgId -> ForeignLabelInPackage pkgId - size = call_size cmm_args + size = call_size cmm_args in ( unzip cmm_args , CmmLit (CmmLabel (mkForeignLabel lbl size labelSource IsFunction))) @@ -71,13 +73,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a DynamicTarget -> case cmm_args of (fn,_):rest -> (unzip rest, fn) [] -> panic "cgForeignCall []" - fc = ForeignConvention cconv arg_hints result_hints + fc = ForeignConvention cconv arg_hints res_hints call_target = ForeignTarget cmm_target fc - ; srt <- getSRTInfo NoSRT -- SLPJ: Not sure what SRT - -- is right here - -- JD: Does it matter in the new codegen? - ; emitForeignCall safety results call_target call_args srt CmmMayReturn } + -- we want to emit code for the call, and then emitReturn. + -- However, if the sequel is AssignTo, we shortcut a little + -- and generate a foreign call that assigns the results + -- directly. Otherwise we end up generating a bunch of + -- useless "r = r" assignments, which are not merely annoying: + -- they prevent the common block elimination from working correctly + -- in the case of a safe foreign call. + -- See Note [safe foreign call convention] + -- + ; sequel <- getSequel + ; case sequel of + AssignTo assign_to_these _ -> + do { emitForeignCall safety assign_to_these call_target + call_args CmmMayReturn + } + + _something_else -> + do { emitForeignCall safety res_regs call_target + call_args CmmMayReturn + ; emitReturn (map (CmmReg . CmmLocal) res_regs) + } + } where -- in the stdcall calling convention, the symbol needs @size appended -- to it, where size is the total number of bytes of arguments. We @@ -88,16 +108,83 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a | otherwise = Nothing -- ToDo: this might not be correct for 64-bit API - arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE + arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) + wORD_SIZE + +{- Note [safe foreign call convention] + +The simple thing to do for a safe foreign call would be the same as an +unsafe one: just + + emitForeignCall ... + emitReturn ... + +but consider what happens in this case + + case foo x y z of + (# s, r #) -> ... + +The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r] +as the result reg, and we generate + + r = foo(x,y,z) returns to L1 -- emitForeignCall + L1: + r = r -- emitReturn + goto L2 +L2: + ... + +Now L1 is a proc point (by definition, it is the continuation of the +safe foreign call). If L2 does a heap check, then L2 will also be a +proc point. + +Furthermore, the stack layout algorithm has to arrange to save r +somewhere between the call and the jump to L1, which is annoying: we +would have to treat r differently from the other live variables, which +have to be saved *before* the call. + +So we adopt a special convention for safe foreign calls: the results +are copied out according to the NativeReturn convention by the call, +and the continuation of the call should copyIn the results. (The +copyOut code is actually inserted when the safe foreign call is +lowered later). The result regs attached to the safe foreign call are +only used temporarily to hold the results before they are copied out. + +We will now generate this: + + r = foo(x,y,z) returns to L1 + L1: + r = R1 -- copyIn, inserted by mkSafeCall + goto L2 + L2: + ... r ... + +And when the safe foreign call is lowered later (see Note [lower safe +foreign calls]) we get this: + + suspendThread() + r = foo(x,y,z) + resumeThread() + R1 = r -- copyOut, inserted by lowerSafeForeignCall + jump L1 + L1: + r = R1 -- copyIn, inserted by mkSafeCall + goto L2 + L2: + ... r ... + +Now consider what happens if L2 does a heap check: the Adams +optimisation kicks in and commons up L1 with the heap-check +continuation, resulting in just one proc point instead of two. Yay! +-} + emitCCall :: [(CmmFormal,ForeignHint)] -> CmmExpr -> [(CmmActual,ForeignHint)] -> FCode () emitCCall hinted_results fn hinted_args - = emitForeignCall PlayRisky results target args - NoC_SRT -- No SRT b/c we PlayRisky - CmmMayReturn + = emitForeignCall PlayRisky results target args CmmMayReturn where (args, arg_hints) = unzip hinted_args (results, result_hints) = unzip hinted_results @@ -107,7 +194,7 @@ emitCCall hinted_results fn hinted_args emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode () emitPrimCall res op args - = emitForeignCall PlayRisky res (PrimTarget op) args NoC_SRT CmmMayReturn + = emitForeignCall PlayRisky res (PrimTarget op) args CmmMayReturn -- alternative entry point, used by CmmParse emitForeignCall @@ -115,11 +202,10 @@ emitForeignCall -> [CmmFormal] -- where to put the results -> ForeignTarget -- the op -> [CmmActual] -- arguments - -> C_SRT -- the SRT of the calls continuation -> CmmReturnInfo -- This can say "never returns" -- only RTS procedures do this -> FCode () -emitForeignCall safety results target args _srt _ret +emitForeignCall safety results target args _ret | not (playSafe safety) = do let (caller_save, caller_load) = callerSaveVolatileRegs emit caller_save @@ -129,7 +215,9 @@ emitForeignCall safety results target args _srt _ret | otherwise = do updfr_off <- getUpdFrameOff temp_target <- load_target_into_temp target - emit $ mkSafeCall temp_target results args updfr_off (playInterruptible safety) + emit =<< mkSafeCall temp_target results args updfr_off + (playInterruptible safety) + {- @@ -162,7 +250,7 @@ maybe_assign_temp e -- expressions, which are wrong here. -- this is a NonPtr because it only duplicates an existing reg <- newTemp (cmmExprType e) --TODO FIXME NOW - emit (mkAssign (CmmLocal reg) e) + emitAssign (CmmLocal reg) e return (CmmReg (CmmLocal reg)) -- ----------------------------------------------------------------------------- @@ -184,12 +272,12 @@ saveThreadState = emitSaveThreadState :: BlockId -> FCode () emitSaveThreadState bid = do -- CurrentTSO->stackobj->sp = Sp; - emit $ mkStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP) - (CmmStackSlot (CallArea (Young bid)) (widthInBytes (typeWidth gcWord))) + emitStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP) + (CmmStackSlot (Young bid) (widthInBytes (typeWidth gcWord))) emit closeNursery -- and save the current cost centre stack in the TSO when profiling: when opt_SccProfilingOn $ - emit (mkStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) + emitStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS -- CurrentNursery->free = Hp+1; closeNursery :: CmmAGraph diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 25161722f7..611304b5e0 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -10,7 +10,7 @@ module StgCmmHeap ( getVirtHp, setVirtHp, setRealHp, getHpRelOffset, hpRel, - entryHeapCheck, altHeapCheck, + entryHeapCheck, altHeapCheck, altHeapCheckReturnsTo, mkVirtHeapOffsets, mkVirtConstrOffsets, mkStaticClosureFields, mkStaticClosure, @@ -20,7 +20,6 @@ module StgCmmHeap ( #include "HsVersions.h" -import CmmType import StgSyn import CLabel import StgCmmLayout @@ -34,6 +33,7 @@ import StgCmmEnv import MkGraph +import Hoopl hiding ((<*>), mkBranch) import SMRep import Cmm import CmmUtils @@ -109,7 +109,7 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets -- ALLOCATE THE OBJECT ; base <- getHpRelOffset info_offset - ; emit (mkComment $ mkFastString "allocDynClosure") + ; emitComment $ mkFastString "allocDynClosure" ; emitSetDynHdr base info_ptr use_cc ; let (cmm_args, offsets) = unzip amodes_w_offsets ; hpStore base cmm_args offsets @@ -151,9 +151,10 @@ mkStaticClosureFields :: CmmInfoTable -> CostCentreStack -> CafInfo + -> Bool -- SRT is non-empty? -> [CmmLit] -- Payload -> [CmmLit] -- The full closure -mkStaticClosureFields info_tbl ccs caf_refs payload +mkStaticClosureFields info_tbl ccs caf_refs has_srt payload = mkStaticClosure info_lbl ccs payload padding static_link_field saved_info_field where @@ -178,8 +179,10 @@ mkStaticClosureFields info_tbl ccs caf_refs payload | otherwise = ASSERT(null payload) [mkIntCLit 0] static_link_field - | is_caf || staticClosureNeedsLink info_tbl = [static_link_value] - | otherwise = [] + | is_caf || staticClosureNeedsLink has_srt info_tbl + = [static_link_value] + | otherwise + = [] saved_info_field | is_caf = [mkIntCLit 0] @@ -335,11 +338,12 @@ entryHeapCheck cl_info offset nodeSet arity args code args' = map (CmmReg . CmmLocal) args setN = case nodeSet of - Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n) + Just _ -> mkNop -- No need to assign R1, it already + -- points to the closure Nothing -> mkAssign nodeReg $ CmmLit (CmmLabel $ staticClosureLabel cl_info) - {- Thunks: Set R1 = node, jump GCEnter1 + {- Thunks: jump GCEnter1 Function (fast): Set R1 = node, jump GCFun Function (slow): Set R1 = node, call generic_gc -} gc_call upd = setN <*> gc_lbl upd @@ -354,7 +358,10 @@ entryHeapCheck cl_info offset nodeSet arity args code - GC calls, but until then this fishy code works -} updfr_sz <- getUpdFrameOff - heapCheck True (gc_call updfr_sz) code + + loop_id <- newLabelC + emitLabel loop_id + heapCheck True (gc_call updfr_sz <*> mkBranch loop_id) code {- -- This code is slightly outdated now and we could easily keep the above @@ -400,21 +407,29 @@ entryHeapCheck cl_info offset nodeSet arity args code -} --------------------------------------------------------------- --- A heap/stack check at in a case alternative +-- ------------------------------------------------------------ +-- A heap/stack check in a case alternative altHeapCheck :: [LocalReg] -> FCode a -> FCode a altHeapCheck regs code + = do loop_id <- newLabelC + emitLabel loop_id + altHeapCheckReturnsTo regs loop_id code + +altHeapCheckReturnsTo :: [LocalReg] -> Label -> FCode a -> FCode a +altHeapCheckReturnsTo regs retry_lbl code = do updfr_sz <- getUpdFrameOff - heapCheck False (gc_call updfr_sz) code + gc_call_code <- gc_call updfr_sz + heapCheck False (gc_call_code <*> mkBranch retry_lbl) code where reg_exprs = map (CmmReg . CmmLocal) regs + -- Note [stg_gc arguments] gc_call sp = case rts_label regs of - Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp - Nothing -> mkCall generic_gc (GC, GC) [] [] sp + Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[]) + Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[]) rts_label [reg] | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1") @@ -432,6 +447,23 @@ altHeapCheck regs code rts_label _ = Nothing +-- Note [stg_gc arguments] +-- It might seem that we could avoid passing the arguments to the +-- stg_gc function, because they are already in the right registers. +-- While this is usually the case, it isn't always. Sometimes the +-- code generator has cleverly avoided the eval in a case, e.g. in +-- ffi/should_run/4221.hs we found +-- +-- case a_r1mb of z +-- FunPtr x y -> ... +-- +-- where a_r1mb is bound a top-level constructor, and is known to be +-- evaluated. The codegen just assigns x, y and z, and continues; +-- R1 is never assigned. +-- +-- So we'll have to rely on optimisations to eliminatethese +-- assignments where possible. + -- | The generic GC procedure; no params, no results generic_gc :: CmmExpr @@ -447,7 +479,7 @@ heapCheck checkStack do_gc code = getHeapUsage $ \ hpHw -> -- Emit heap checks, but be sure to do it lazily so -- that the conditionals on hpHw don't cause a black hole - do { emit $ do_checks checkStack hpHw do_gc + do { codeOnly $ do_checks checkStack hpHw do_gc ; tickyAllocHeap hpHw ; doGranAllocate hpHw ; setRealHp hpHw @@ -456,22 +488,25 @@ heapCheck checkStack do_gc code do_checks :: Bool -- Should we check the stack? -> WordOff -- Heap headroom -> CmmAGraph -- What to do on failure - -> CmmAGraph -do_checks checkStack alloc do_gc - = withFreshLabel "gc" $ \ loop_id -> - withFreshLabel "gc" $ \ gc_id -> - mkLabel loop_id - <*> (let hpCheck = if alloc == 0 then mkNop - else mkAssign hpReg bump_hp <*> - mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) - in if checkStack - then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck - else hpCheck) - <*> mkComment (mkFastString "outOfLine should follow:") - <*> outOfLine (mkLabel gc_id - <*> mkComment (mkFastString "outOfLine here") - <*> do_gc - <*> mkBranch loop_id) + -> FCode () +do_checks checkStack alloc do_gc = do + gc_id <- newLabelC + hp_check <- if alloc == 0 + then return mkNop + else do + ifthen <- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) + return (mkAssign hpReg bump_hp <*> ifthen) + + if checkStack + then emit =<< mkCmmIfThenElse sp_oflo (mkBranch gc_id) hp_check + else emit hp_check + + emit $ mkComment (mkFastString "outOfLine should follow:") + + emitOutOfLine gc_id $ + mkComment (mkFastString "outOfLine here") <*> + do_gc -- this is expected to jump back somewhere + -- Test for stack pointer exhaustion, then -- bump heap pointer, and test for heap exhaustion -- Note that we don't move the heap pointer unless the diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 86986efdfa..9593af1f50 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -15,7 +15,7 @@ module StgCmmLayout ( mkArgDescr, - emitCall, emitReturn, + emitCall, emitReturn, adjustHpBackwards, emitClosureProcAndInfoTable, emitClosureAndInfoTable, @@ -41,10 +41,12 @@ import StgCmmEnv import StgCmmTicky import StgCmmMonad import StgCmmUtils +import StgCmmProf import MkGraph import SMRep import Cmm +import CmmUtils import CLabel import StgSyn import Id @@ -52,6 +54,7 @@ import Name import TyCon ( PrimRep(..) ) import BasicTypes ( RepArity ) import StaticFlags +import Module import Constants import Util @@ -63,38 +66,60 @@ import FastString -- Call and return sequences ------------------------------------------------------------------------ -emitReturn :: [CmmExpr] -> FCode () --- Return multiple values to the sequel +-- | Return multiple values to the sequel +-- +-- If the sequel is @Return@ +-- +-- > return (x,y) +-- +-- If the sequel is @AssignTo [p,q]@ +-- +-- > p=x; q=y; -- --- If the sequel is Return --- return (x,y) --- If the sequel is AssignTo [p,q] --- p=x; q=y; +emitReturn :: [CmmExpr] -> FCode () emitReturn results = do { sequel <- getSequel; ; updfr_off <- getUpdFrameOff - ; emit $ mkComment $ mkFastString ("emitReturn: " ++ show sequel) + ; emitComment $ mkFastString ("emitReturn: " ++ show sequel) ; case sequel of Return _ -> do { adjustHpBackwards ; emit (mkReturnSimple results updfr_off) } AssignTo regs adjust -> do { if adjust then adjustHpBackwards else return () - ; emit (mkMultiAssign regs results) } + ; emitMultiAssign regs results } } + +-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@, +-- using the call/return convention @conv@, passing @args@, and +-- returning the results to the current sequel. +-- emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode () --- (cgCall fun args) makes a call to the entry-code of 'fun', --- passing 'args', and returning the results to the current sequel -emitCall convs@(callConv, _) fun args +emitCall convs fun args + = emitCallWithExtraStack convs fun args noExtraStack + + +-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the +-- entry-code of @fun@, using the call/return convention @conv@, +-- passing @args@, pushing some extra stack frames described by +-- @stack@, and returning the results to the current sequel. +-- +emitCallWithExtraStack + :: (Convention, Convention) -> CmmExpr -> [CmmExpr] + -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode () +emitCallWithExtraStack convs@(callConv, _) fun args extra_stack = do { adjustHpBackwards ; sequel <- getSequel ; updfr_off <- getUpdFrameOff - ; emit $ mkComment $ mkFastString ("emitCall: " ++ show sequel) - ; case sequel of - Return _ -> emit (mkForeignJump callConv fun args updfr_off) - AssignTo res_regs _ -> emit (mkCall fun convs res_regs args updfr_off) - } + ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel) + ; case sequel of + Return _ -> + emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack + AssignTo res_regs _ -> do + emit =<< mkCall fun convs res_regs args updfr_off extra_stack + } + adjustHpBackwards :: FCode () -- This function adjusts and heap pointers just before a tail call or @@ -127,59 +152,137 @@ adjustHpBackwards -- Making calls: directCall and slowCall ------------------------------------------------------------------------- +-- General plan is: +-- - we'll make *one* fast call, either to the function itself +-- (directCall) or to stg_ap_<pat>_fast (slowCall) +-- Any left-over arguments will be pushed on the stack, +-- +-- e.g. Sp[old+8] = arg1 +-- Sp[old+16] = arg2 +-- Sp[old+32] = stg_ap_pp_info +-- R2 = arg3 +-- R3 = arg4 +-- call f() return to Nothing updfr_off: 32 + + directCall :: CLabel -> RepArity -> [StgArg] -> FCode () -- (directCall f n args) -- calls f(arg1, ..., argn), and applies the result to the remaining args -- The function f has arity n, and there are guaranteed at least n args -- Both arity and args include void args directCall lbl arity stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) } + = do { argreps <- getArgRepsAmodes stg_args + ; direct_call "directCall" lbl arity argreps } + slowCall :: CmmExpr -> [StgArg] -> FCode () -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; slow_call fun cmm_args (argsReps stg_args) } + = do { dflags <- getDynFlags + ; argsreps <- getArgRepsAmodes stg_args + ; let (rts_fun, arity) = slowCallPattern (map fst argsreps) + ; call <- getCode $ direct_call "slow_call" + (mkRtsApFastLabel rts_fun) arity argsreps + ; emitComment $ mkFastString ("slow_call for " ++ + showSDoc dflags (ppr fun) ++ + " with pat " ++ unpackFS rts_fun) + ; emit (mkAssign nodeReg fun <*> call) + } + -------------- -direct_call :: String -> CLabel -> RepArity -> [CmmExpr] -> [ArgRep] -> FCode () --- NB1: (length args) may be less than (length reps), because --- the args exclude the void ones --- NB2: 'arity' refers to the *reps* -direct_call caller lbl arity args reps - | debugIsOn && arity > length reps -- Too few args +direct_call :: String -> CLabel -> RepArity -> [(ArgRep,Maybe CmmExpr)] -> FCode () +direct_call caller lbl arity args + | debugIsOn && arity > length args -- Too few args = do -- Caller should ensure that there enough args! - pprPanic "direct_call" (text caller <+> ppr arity - <+> ppr lbl <+> ppr (length reps) - <+> ppr args <+> ppr reps ) - - | null rest_reps -- Precisely the right number of arguments - = emitCall (NativeDirectCall, NativeReturn) target args - - | otherwise -- Over-saturated call - = ASSERT( arity == length initial_reps ) - do { pap_id <- newTemp gcWord - ; withSequel (AssignTo [pap_id] True) - (emitCall (NativeDirectCall, NativeReturn) target fast_args) - ; slow_call (CmmReg (CmmLocal pap_id)) - rest_args rest_reps } + pprPanic "direct_call" $ + text caller <+> ppr arity <+> + ppr lbl <+> ppr (length args) <+> + ppr (map snd args) <+> ppr (map fst args) + + | null rest_args -- Precisely the right number of arguments + = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args) + + | otherwise -- Note [over-saturated calls] + = emitCallWithExtraStack (NativeDirectCall, NativeReturn) + target (nonVArgs fast_args) (mkStkOffsets stack_args) where target = CmmLit (CmmLabel lbl) - (initial_reps, rest_reps) = splitAt arity reps - arg_arity = count isNonV initial_reps - (fast_args, rest_args) = splitAt arg_arity args + (fast_args, rest_args) = splitAt arity args + stack_args = slowArgs rest_args --------------- -slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode () -slow_call fun args reps - = do dflags <- getDynFlags - call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps - emit $ mkComment $ mkFastString ("slow_call for " ++ showSDoc dflags (ppr fun) ++ - " with pat " ++ unpackFS rts_fun) - emit (mkAssign nodeReg fun <*> call) + +-- When constructing calls, it is easier to keep the ArgReps and the +-- CmmExprs zipped together. However, a void argument has no +-- representation, so we need to use Maybe CmmExpr (the alternative of +-- using zeroCLit or even undefined would work, but would be ugly). +-- +getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)] +getArgRepsAmodes = mapM getArgRepAmode + where getArgRepAmode arg + | V <- rep = return (V, Nothing) + | otherwise = do expr <- getArgAmode (NonVoid arg) + return (rep, Just expr) + where rep = toArgRep (argPrimRep arg) + +nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr] +nonVArgs [] = [] +nonVArgs ((_,Nothing) : args) = nonVArgs args +nonVArgs ((_,Just arg) : args) = arg : nonVArgs args + +{- +Note [over-saturated calls] + +The natural thing to do for an over-saturated call would be to call +the function with the correct number of arguments, and then apply the +remaining arguments to the value returned, e.g. + + f a b c d (where f has arity 2) + --> + r = call f(a,b) + call r(c,d) + +but this entails + - saving c and d on the stack + - making a continuation info table + - at the continuation, loading c and d off the stack into regs + - finally, call r + +Note that since there are a fixed number of different r's +(e.g. stg_ap_pp_fast), we can also pre-compile continuations +that correspond to each of them, rather than generating a fresh +one for each over-saturated call. + +Not only does this generate much less code, it is faster too. We will +generate something like: + +Sp[old+16] = c +Sp[old+24] = d +Sp[old+32] = stg_ap_pp_info +call f(a,b) -- usual calling convention + +For the purposes of the CmmCall node, we count this extra stack as +just more arguments that we are passing on the stack (cml_args). +-} + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] +slowArgs [] = [] +slowArgs args -- careful: reps contains voids (V), but args does not + | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args + | otherwise = this_pat ++ slowArgs rest_args where - (rts_fun, arity) = slowCallPattern reps + (arg_pat, n) = slowCallPattern (map fst args) + (call_args, rest_args) = splitAt n args + + stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat + this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args + save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)] + save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") + + -- These cases were found to cover about 99% of all slow calls: slowCallPattern :: [ArgRep] -> (FastString, RepArity) @@ -202,6 +305,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- +-- Fix the byte-offsets of a bunch of things to push on the stack + +-- This is used for pushing slow-call continuations. +-- See Note [over-saturated calls]. + +mkStkOffsets + :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for + -> ( ByteOff -- OUTPUTS: Topmost allocated word + , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out) +mkStkOffsets things + = loop 0 [] (reverse things) + where + loop offset offs [] = (offset,offs) + loop offset offs ((_,Nothing):things) = loop offset offs things + -- ignore Void arguments + loop offset offs ((rep,Just thing):things) + = loop thing_off ((thing, thing_off):offs) things + where + thing_off = offset + argRepSizeW rep * wORD_SIZE + -- offset of thing is offset+size, because we're + -- growing the stack *downwards* as the offsets increase. + + +------------------------------------------------------------------------- -- Classifying arguments: ArgRep ------------------------------------------------------------------------- @@ -237,10 +364,7 @@ isNonV :: ArgRep -> Bool isNonV V = False isNonV _ = True -argsReps :: [StgArg] -> [ArgRep] -argsReps = map (toArgRep . argPrimRep) - -argRepSizeW :: ArgRep -> WordOff -- Size in words +argRepSizeW :: ArgRep -> WordOff -- Size in words argRepSizeW N = 1 argRepSizeW P = 1 argRepSizeW F = 1 diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 4eea38e22c..cc9919a4a0 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE GADTs #-} ----------------------------------------------------------------------------- -- -- Monad for Stg to C-- code generation @@ -16,16 +17,21 @@ module StgCmmMonad ( FCode, -- type - initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, returnFC, fixC, fixC_, nopC, whenC, newUnique, newUniqSupply, + newLabelC, emitLabel, + emit, emitDecl, emitProc, emitProcWithConvention, emitSimpleProc, + emitOutOfLine, emitAssign, emitStore, emitComment, getCmm, cgStmtsToBlocks, getCodeR, getCode, getHeapUsage, - forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly, + mkCmmIfThenElse, mkCmmIfThen, mkCall, mkCmmCall, mkSafeCall, + + forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly, ConTagZ, @@ -69,12 +75,12 @@ import VarEnv import OrdList import Unique import UniqSupply -import FastString(sLit) +import FastString import Outputable import Control.Monad import Data.List -import Prelude hiding( sequence ) +import Prelude hiding( sequence, succ ) import qualified Prelude( sequence ) infixr 9 `thenC` -- Right-associative! @@ -95,12 +101,12 @@ instance Monad FCode where {-# INLINE thenFC #-} {-# INLINE returnFC #-} -initC :: DynFlags -> Module -> FCode a -> IO a -initC dflags mod (FCode code) - = do { uniqs <- mkSplitUniqSupply 'c' - ; case code (initCgInfoDown dflags mod) (initCgState uniqs) of - (res, _) -> return res - } +initC :: IO CgState +initC = do { uniqs <- mkSplitUniqSupply 'c' + ; return (initCgState uniqs) } + +runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) +runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st returnFC :: a -> FCode a returnFC val = FCode (\_info_down state -> (val, state)) @@ -270,6 +276,8 @@ data HeapUsage = type VirtualHpOffset = WordOff + + initCgState :: UniqSupply -> CgState initCgState uniqs = MkCgState { cgs_stmts = mkNop, cgs_tops = nilOL, @@ -308,7 +316,6 @@ initHpUsage = HeapUsage { virtHp = 0, realHp = 0 } maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } - -------------------------------------------------------- -- Operators for getting and setting the state and "info_down". -------------------------------------------------------- @@ -591,6 +598,33 @@ getHeapUsage fcode -- ---------------------------------------------------------------------------- -- Combinators for emitting code +emitCgStmt :: CgStmt -> FCode () +emitCgStmt stmt + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt } + } + +emitLabel :: BlockId -> FCode () +emitLabel id = emitCgStmt (CgLabel id) + +emitComment :: FastString -> FCode () +#if 0 /* def DEBUG */ +emitComment s = emitCgStmt (CgStmt (CmmComment s)) +#else +emitComment _ = return () +#endif + +emitAssign :: CmmReg -> CmmExpr -> FCode () +emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r)) + +emitStore :: CmmExpr -> CmmExpr -> FCode () +emitStore l r = emitCgStmt (CgStmt (CmmStore l r)) + + +newLabelC :: FCode BlockId +newLabelC = do { u <- newUnique + ; return $ mkBlockId u } + emit :: CmmAGraph -> FCode () emit ag = do { state <- getState @@ -601,6 +635,9 @@ emitDecl decl = do { state <- getState ; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } } +emitOutOfLine :: BlockId -> CmmAGraph -> FCode () +emitOutOfLine l stmts = emitCgStmt (CgFork l stmts) + emitProcWithConvention :: Convention -> CmmInfoTable -> CLabel -> [CmmFormal] -> CmmAGraph -> FCode () emitProcWithConvention conv info lbl args blocks @@ -629,6 +666,55 @@ getCmm code ; setState $ state2 { cgs_tops = cgs_tops state1 } ; return (fromOL (cgs_tops state2)) } + +mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph +mkCmmIfThenElse e tbranch fbranch = do + endif <- newLabelC + tid <- newLabelC + fid <- newLabelC + return $ mkCbranch e tid fid <*> + mkLabel tid <*> tbranch <*> mkBranch endif <*> + mkLabel fid <*> fbranch <*> mkLabel endif + +mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph +mkCmmIfThen e tbranch = do + endif <- newLabelC + tid <- newLabelC + return $ mkCbranch e tid endif <*> + mkLabel tid <*> tbranch <*> mkLabel endif + + +mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] + -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph +mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do + k <- newLabelC + let area = Young k + (off, copyin) = copyInOflow retConv area results + copyout = mkCallReturnsTo f callConv actuals k off updfr_off extra_stack + return (copyout <*> mkLabel k <*> copyin) + +mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset + -> FCode CmmAGraph +mkCmmCall f results actuals updfr_off + = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[]) + + +mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] + -> UpdFrameOffset -> Bool + -> FCode CmmAGraph +mkSafeCall t fs as upd i = do + k <- newLabelC + let (_off, copyout) = copyInOflow NativeReturn (Young k) fs + -- see Note [safe foreign call convention] + return + ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth)) + (CmmLit (CmmBlock k)) + <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k + , updfr=upd, intrbl=i }) + <*> mkLabel k + <*> copyout + ) + -- ---------------------------------------------------------------------------- -- CgStmts @@ -640,4 +726,3 @@ cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph cgStmtsToBlocks stmts = do { us <- newUniqSupply ; return (initUs_ us (lgraphOfAGraph stmts)) } - diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index efa234b5a6..bd783a3b30 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -14,7 +14,9 @@ -- for details module StgCmmPrim ( - cgOpApp + cgOpApp, + cgPrimOp -- internal(ish), used by cgCase to get code for a + -- comparison without also turning it into a Bool. ) where #include "HsVersions.h" @@ -67,14 +69,9 @@ cgOpApp :: StgOp -- The op -- Foreign calls cgOpApp (StgFCallOp fcall _) stg_args res_ty - = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty - -- Choose result regs r1, r2 - -- Note [Foreign call results] - ; cgForeignCall res_regs res_hints fcall stg_args - -- r1, r2 = foo( x, y ) - ; emitReturn (map (CmmReg . CmmLocal) res_regs) } - -- return (r1, r2) - + = cgForeignCall fcall stg_args res_ty + -- Note [Foreign call results] + -- tagToEnum# is special: we need to pull the constructor -- out of the table, and perform an appropriate return. @@ -229,23 +226,23 @@ emitPrimOp [res] SparkOp [arg] [(tmp2,NoHint)] (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark")))) [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] - emit (mkAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) + emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp)) emitPrimOp [res] GetCCSOfOp [arg] - = emit (mkAssign (CmmLocal res) val) + = emitAssign (CmmLocal res) val where val | opt_SccProfilingOn = costCentreFrom (cmmUntag arg) | otherwise = CmmLit zeroCLit emitPrimOp [res] GetCurrentCCSOp [_dummy_arg] - = emit (mkAssign (CmmLocal res) curCCS) + = emitAssign (CmmLocal res) curCCS emitPrimOp [res] ReadMutVarOp [mutv] - = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) + = emitAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord) emitPrimOp [] WriteMutVarOp [mutv,var] = do - emit (mkStore (cmmOffsetW mutv fixedHdrSize) var) + emitStore (cmmOffsetW mutv fixedHdrSize) var emitCCall [{-no results-}] (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) @@ -269,32 +266,32 @@ emitPrimOp res@[] TouchOp args@[_arg] -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) emitPrimOp [res] ByteArrayContents_Char [arg] - = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)) + = emitAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize) -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) emitPrimOp [res] StableNameToIntOp [arg] - = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) + = emitAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord) -- #define eqStableNamezh(r,sn1,sn2) \ -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) emitPrimOp [res] EqStableNameOp [arg1,arg2] - = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [ + = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [ cmmLoadIndexW arg1 fixedHdrSize bWord, cmmLoadIndexW arg2 fixedHdrSize bWord - ])) + ]) emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] - = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])) + = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2]) -- #define addrToHValuezh(r,a) r=(P_)a emitPrimOp [res] AddrToAnyOp [arg] - = emit (mkAssign (CmmLocal res) arg) + = emitAssign (CmmLocal res) arg -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) -- Note: argument may be tagged! emitPrimOp [res] DataToTagOp [arg] - = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) + = emitAssign (CmmLocal res) (getConstrTag (cmmUntag arg)) {- Freezing arrays-of-ptrs requires changing an info table, for the benefit of the generational collector. It needs to scavenge mutable @@ -317,7 +314,7 @@ emitPrimOp [res] UnsafeFreezeArrayArrayOp [arg] -- #define unsafeFreezzeByteArrayzh(r,a) r=(a) emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] - = emit (mkAssign (CmmLocal res) arg) + = emitAssign (CmmLocal res) arg -- Copying pointer arrays @@ -497,11 +494,11 @@ emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth -- The rest just translate straightforwardly emitPrimOp [res] op [arg] | nopOp op - = emit (mkAssign (CmmLocal res) arg) + = emitAssign (CmmLocal res) arg | Just (mop,rep) <- narrowOp op - = emit (mkAssign (CmmLocal res) $ - CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) + = emitAssign (CmmLocal res) $ + CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]] emitPrimOp r@[res] op args | Just prim <- callishOp op @@ -746,15 +743,15 @@ loadArrPtrsSize addr = CmmLoad (cmmOffsetB addr off) bWord mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType -> LocalReg -> CmmExpr -> CmmExpr -> FCode () mkBasicIndexedRead off Nothing read_rep res base idx - = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) + = emitAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx) mkBasicIndexedRead off (Just cast) read_rep res base idx - = emit (mkAssign (CmmLocal res) (CmmMachOp cast [ - cmmLoadIndexOffExpr off read_rep base idx])) + = emitAssign (CmmLocal res) (CmmMachOp cast [ + cmmLoadIndexOffExpr off read_rep base idx]) mkBasicIndexedWrite :: ByteOff -> Maybe MachOp -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () mkBasicIndexedWrite off Nothing base idx val - = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val) + = emitStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val mkBasicIndexedWrite off (Just cast) base idx val = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val]) @@ -805,7 +802,7 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit 1)), getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit 1)) ] - emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall + emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()) @@ -875,7 +872,7 @@ doCopyMutableArrayOp = emitCopyArray copy getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)), getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)) ] - emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall + emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()) diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index 6a53317385..9ff4d0be07 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -103,7 +103,7 @@ initUpdFrameProf :: CmmExpr -> FCode () -- Initialise the profiling field of an update frame initUpdFrameProf frame_amode = ifProfiling $ -- frame->header.prof.ccs = CCCS - emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + emitStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) -- is unnecessary because it is not used anyhow. @@ -143,7 +143,7 @@ saveCurrentCostCentre = return Nothing | otherwise = do { local_cc <- newTemp ccType - ; emit (mkAssign (CmmLocal local_cc) curCCS) + ; emitAssign (CmmLocal local_cc) curCCS ; return (Just local_cc) } restoreCurrentCostCentre :: Maybe LocalReg -> FCode () @@ -338,9 +338,9 @@ ldvEnter cl_ptr -- if (era > 0) { -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | -- era | LDV_STATE_USE } - emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + emit =<< mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) (mkStore ldv_wd new_ldv_wd) - mkNop) + mkNop where -- don't forget to substract node's tag ldv_wd = ldvWord cl_ptr diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index d0432315ab..698bf32709 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -180,7 +180,7 @@ registerTickyCtr :: CLabel -> FCode () -- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ -- f_ct.registeredp = 1 } registerTickyCtr ctr_lbl - = emit (mkCmmIfThen test (catAGraphs register_stmts)) + = emit =<< mkCmmIfThen test (catAGraphs register_stmts) where -- krc: code generator doesn't handle Not, so we test for Eq 0 instead test = CmmMachOp (MO_Eq wordWidth) @@ -352,7 +352,7 @@ bumpHistogram _lbl _n bumpHistogramE :: LitString -> CmmExpr -> FCode () bumpHistogramE lbl n = do t <- newTemp cLong - emit (mkAssign (CmmLocal t) n) + emitAssign (CmmLocal t) n emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) (mkAssign (CmmLocal t) eight)) emit (addToMem cLong diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index bb4a653c05..273e59b0b5 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -18,12 +18,11 @@ module StgCmmUtils ( emitDataLits, mkDataLits, emitRODataLits, mkRODataLits, emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, emitRtsCallGen, - assignTemp, newTemp, withTemp, + assignTemp, newTemp, newUnboxedTupleRegs, - mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch, - emitSwitch, + emitMultiAssign, emitCmmLitSwitch, emitSwitch, tagToClosure, mkTaggedObjectLoad, @@ -72,6 +71,7 @@ import Module import Literal import Digraph import ListSetOps +import VarSet import Util import Unique import DynFlags @@ -204,14 +204,14 @@ emitRtsCallGen emitRtsCallGen res pkg fun args _vols safe = do { updfr_off <- getUpdFrameOff ; emit caller_save - ; emit $ call updfr_off + ; call updfr_off ; emit caller_load } where call updfr_off = if safe then - mkCmmCall fun_expr res' args' updfr_off + emit =<< mkCmmCall fun_expr res' args' updfr_off else - mkUnsafeCall (ForeignTarget fun_expr + emit $ mkUnsafeCall (ForeignTarget fun_expr (ForeignConvention CCallConv arg_hints res_hints)) res' args' (args', arg_hints) = unzip args (res', res_hints) = unzip res @@ -441,7 +441,7 @@ assignTemp :: CmmExpr -> FCode LocalReg assignTemp (CmmReg (CmmLocal reg)) = return reg assignTemp e = do { uniq <- newUnique ; let reg = LocalReg uniq (cmmExprType e) - ; emit (mkAssign (CmmLocal reg) e) + ; emitAssign (CmmLocal reg) e ; return reg } newTemp :: CmmType -> FCode LocalReg @@ -471,10 +471,10 @@ newUnboxedTupleRegs res_ty ------------------------------------------------------------------------- --- mkMultiAssign +-- emitMultiAssign ------------------------------------------------------------------------- -mkMultiAssign :: [LocalReg] -> [CmmExpr] -> CmmAGraph +emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode () -- Emit code to perform the assignments in the -- input simultaneously, using temporary variables when necessary. @@ -489,14 +489,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e -- s1 assigns to something s2 uses -- that is, if s1 should *follow* s2 in the final order -mkMultiAssign [] [] = mkNop -mkMultiAssign [reg] [rhs] = mkAssign (CmmLocal reg) rhs -mkMultiAssign regs rhss = ASSERT( equalLength regs rhss ) - unscramble ([1..] `zip` (regs `zip` rhss)) +emitMultiAssign [] [] = return () +emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs +emitMultiAssign regs rhss = ASSERT( equalLength regs rhss ) + unscramble ([1..] `zip` (regs `zip` rhss)) -unscramble :: [Vrtx] -> CmmAGraph -unscramble vertices - = catAGraphs (map do_component components) +unscramble :: [Vrtx] -> FCode () +unscramble vertices = mapM_ do_component components where edges :: [ (Vrtx, Key, [Key]) ] edges = [ (vertex, key1, edges_from stmt1) @@ -511,19 +510,19 @@ unscramble vertices -- do_components deal with one strongly-connected component -- Not cyclic, or singleton? Just do it - do_component :: SCC Vrtx -> CmmAGraph - do_component (AcyclicSCC (_,stmt)) = mk_graph stmt + do_component :: SCC Vrtx -> FCode () + do_component (AcyclicSCC (_,stmt)) = mk_graph stmt do_component (CyclicSCC []) = panic "do_component" do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt -- Cyclic? Then go via temporaries. Pick one to -- break the loop and try again with the rest. - do_component (CyclicSCC ((_,first_stmt) : rest)) - = withUnique $ \u -> + do_component (CyclicSCC ((_,first_stmt) : rest)) = do + u <- newUnique let (to_tmp, from_tmp) = split u first_stmt - in mk_graph to_tmp - <*> unscramble rest - <*> mk_graph from_tmp + mk_graph to_tmp + unscramble rest + mk_graph from_tmp split :: Unique -> Stmt -> (Stmt, Stmt) split uniq (reg, rhs) @@ -532,8 +531,8 @@ unscramble vertices rep = cmmExprType rhs tmp = LocalReg uniq rep - mk_graph :: Stmt -> CmmAGraph - mk_graph (reg, rhs) = mkAssign (CmmLocal reg) rhs + mk_graph :: Stmt -> FCode () + mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs mustFollow :: Stmt -> Stmt -> Bool (reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs @@ -551,7 +550,7 @@ emitSwitch :: CmmExpr -- Tag to switch on -> FCode () emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do { dflags <- getDynFlags - ; emit (mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag) } + ; mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag } where via_C dflags | HscC <- hscTarget dflags = True | otherwise = False @@ -563,38 +562,40 @@ mkCmmSwitch :: Bool -- True <=> never generate a conditional tree -> Maybe CmmAGraph -- Default branch (if any) -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour -- outside this range is undefined - -> CmmAGraph + -> FCode () -- First, two rather common cases in which there is no work to do -mkCmmSwitch _ _ [] (Just code) _ _ = code -mkCmmSwitch _ _ [(_,code)] Nothing _ _ = code +mkCmmSwitch _ _ [] (Just code) _ _ = emit code +mkCmmSwitch _ _ [(_,code)] Nothing _ _ = emit code -- Right, off we go -mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag - = withFreshLabel "switch join" $ \ join_lbl -> - label_default join_lbl mb_deflt $ \ mb_deflt -> - label_branches join_lbl branches $ \ branches -> - assignTemp' tag_expr $ \tag_expr' -> +mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag = do + join_lbl <- newLabelC + mb_deflt_lbl <- label_default join_lbl mb_deflt + branches_lbls <- label_branches join_lbl branches + tag_expr' <- assignTemp' tag_expr - mk_switch tag_expr' (sortBy (comparing fst) branches) mb_deflt - lo_tag hi_tag via_C - -- Sort the branches before calling mk_switch - <*> mkLabel join_lbl + emit =<< mk_switch tag_expr' (sortBy (comparing fst) branches_lbls) + mb_deflt_lbl lo_tag hi_tag via_C + + -- Sort the branches before calling mk_switch + + emitLabel join_lbl mk_switch :: CmmExpr -> [(ConTagZ, BlockId)] -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool - -> CmmAGraph + -> FCode CmmAGraph -- SINGLETON TAG RANGE: no case analysis to do mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C | lo_tag == hi_tag = ASSERT( tag == lo_tag ) - mkBranch lbl + return (mkBranch lbl) -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _ - = mkBranch lbl + = return (mkBranch lbl) -- The simplifier might have eliminated a case -- so we may have e.g. case xs of -- [] -> e @@ -603,7 +604,7 @@ mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _ -- SINGLETON BRANCH: one equality check to do mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _ - = mkCbranch cond deflt lbl + = return (mkCbranch cond deflt lbl) where cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag)) -- We have lo_tag < hi_tag, but there's only one branch, @@ -636,30 +637,34 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C arms :: [Maybe BlockId] arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]] in - mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms + return (mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms) -- if we can knock off a bunch of default cases with one if, then do so | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches - = mkCmmIfThenElse + = do stmts <- mk_switch tag_expr branches mb_deflt + lowest_branch hi_tag via_C + mkCmmIfThenElse (cmmULtWord tag_expr (CmmLit (mkIntCLit lowest_branch))) (mkBranch deflt) - (mk_switch tag_expr branches mb_deflt - lowest_branch hi_tag via_C) + stmts | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches - = mkCmmIfThenElse + = do stmts <- mk_switch tag_expr branches mb_deflt + lo_tag highest_branch via_C + mkCmmIfThenElse (cmmUGtWord tag_expr (CmmLit (mkIntCLit highest_branch))) (mkBranch deflt) - (mk_switch tag_expr branches mb_deflt - lo_tag highest_branch via_C) + stmts | otherwise -- Use an if-tree - = mkCmmIfThenElse + = do lo_stmts <- mk_switch tag_expr lo_branches mb_deflt + lo_tag (mid_tag-1) via_C + hi_stmts <- mk_switch tag_expr hi_branches mb_deflt + mid_tag hi_tag via_C + mkCmmIfThenElse (cmmUGeWord tag_expr (CmmLit (mkIntCLit mid_tag))) - (mk_switch tag_expr hi_branches mb_deflt - mid_tag hi_tag via_C) - (mk_switch tag_expr lo_branches mb_deflt - lo_tag (mid_tag-1) via_C) + hi_stmts + lo_stmts -- we test (e >= mid_tag) rather than (e < mid_tag), because -- the former works better when e is a comparison, and there -- are two tags 0 & 1 (mid_tag == 1). In this case, the code @@ -714,30 +719,31 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C is_lo (t,_) = t < mid_tag -------------- -mkCmmLitSwitch :: CmmExpr -- Tag to switch on +emitCmmLitSwitch :: CmmExpr -- Tag to switch on -> [(Literal, CmmAGraph)] -- Tagged branches -> CmmAGraph -- Default branch (always) - -> CmmAGraph -- Emit the code + -> FCode () -- Emit the code -- Used for general literals, whose size might not be a word, -- where there is always a default case, and where we don't know -- the range of values for certain. For simplicity we always generate a tree. -- -- ToDo: for integers we could do better here, perhaps by generalising -- mk_switch and using that. --SDM 15/09/2004 -mkCmmLitSwitch _scrut [] deflt = deflt -mkCmmLitSwitch scrut branches deflt - = assignTemp' scrut $ \ scrut' -> - withFreshLabel "switch join" $ \ join_lbl -> - label_code join_lbl deflt $ \ deflt -> - label_branches join_lbl branches $ \ branches -> - mk_lit_switch scrut' deflt (sortBy (comparing fst) branches) - <*> mkLabel join_lbl +emitCmmLitSwitch _scrut [] deflt = emit deflt +emitCmmLitSwitch scrut branches deflt = do + scrut' <- assignTemp' scrut + join_lbl <- newLabelC + deflt_lbl <- label_code join_lbl deflt + branches_lbls <- label_branches join_lbl branches + emit =<< mk_lit_switch scrut' deflt_lbl + (sortBy (comparing fst) branches_lbls) + emitLabel join_lbl mk_lit_switch :: CmmExpr -> BlockId -> [(Literal,BlockId)] - -> CmmAGraph + -> FCode CmmAGraph mk_lit_switch scrut deflt [(lit,blk)] - = mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk + = return (mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk) where cmm_lit = mkSimpleLit lit cmm_ty = cmmLitType cmm_lit @@ -745,9 +751,9 @@ mk_lit_switch scrut deflt [(lit,blk)] ne = if isFloatType cmm_ty then MO_F_Ne rep else MO_Ne rep mk_lit_switch scrut deflt_blk_id branches - = mkCmmIfThenElse cond - (mk_lit_switch scrut deflt_blk_id lo_branches) - (mk_lit_switch scrut deflt_blk_id hi_branches) + = do lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches + hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches + mkCmmIfThenElse cond lo_blk hi_blk where n_branches = length branches (mid_lit,_) = branches !! (n_branches `div` 2) @@ -761,49 +767,42 @@ mk_lit_switch scrut deflt_blk_id branches -------------- -label_default :: BlockId -> Maybe CmmAGraph - -> (Maybe BlockId -> CmmAGraph) - -> CmmAGraph -label_default _ Nothing thing_inside - = thing_inside Nothing -label_default join_lbl (Just code) thing_inside - = label_code join_lbl code $ \ lbl -> - thing_inside (Just lbl) +label_default :: BlockId -> Maybe CmmAGraph -> FCode (Maybe BlockId) +label_default _ Nothing + = return Nothing +label_default join_lbl (Just code) + = do lbl <- label_code join_lbl code + return (Just lbl) -------------- -label_branches :: BlockId -> [(a,CmmAGraph)] - -> ([(a,BlockId)] -> CmmAGraph) - -> CmmAGraph -label_branches _join_lbl [] thing_inside - = thing_inside [] -label_branches join_lbl ((tag,code):branches) thing_inside - = label_code join_lbl code $ \ lbl -> - label_branches join_lbl branches $ \ branches' -> - thing_inside ((tag,lbl):branches') +label_branches :: BlockId -> [(a,CmmAGraph)] -> FCode [(a,BlockId)] +label_branches _join_lbl [] + = return [] +label_branches join_lbl ((tag,code):branches) + = do lbl <- label_code join_lbl code + branches' <- label_branches join_lbl branches + return ((tag,lbl):branches') -------------- -label_code :: BlockId -> CmmAGraph -> (BlockId -> CmmAGraph) -> CmmAGraph --- (label_code J code fun) +label_code :: BlockId -> CmmAGraph -> FCode BlockId +-- label_code J code -- generates --- [L: code; goto J] fun L -label_code join_lbl code thing_inside - = withFreshLabel "switch" $ \lbl -> - outOfLine (mkLabel lbl <*> code <*> mkBranch join_lbl) - <*> thing_inside lbl - +-- [L: code; goto J] +-- and returns L +label_code join_lbl code = do + lbl <- newLabelC + emitOutOfLine lbl (code <*> mkBranch join_lbl) + return lbl -------------- -assignTemp' :: CmmExpr -> (CmmExpr -> CmmAGraph) -> CmmAGraph -assignTemp' e thing_inside - | isTrivialCmmExpr e = thing_inside e - | otherwise = withTemp (cmmExprType e) $ \ lreg -> - let reg = CmmLocal lreg in - mkAssign reg e <*> thing_inside (CmmReg reg) - -withTemp :: CmmType -> (LocalReg -> CmmAGraph) -> CmmAGraph -withTemp rep thing_inside - = withUnique $ \uniq -> thing_inside (LocalReg uniq rep) - +assignTemp' :: CmmExpr -> FCode CmmExpr +assignTemp' e + | isTrivialCmmExpr e = return e + | otherwise = do + lreg <- newTemp (cmmExprType e) + let reg = CmmLocal lreg + emitAssign reg e + return (CmmReg reg) ------------------------------------------------------------------------- -- @@ -811,36 +810,13 @@ withTemp rep thing_inside -- ------------------------------------------------------------------------- --- There is just one SRT for each top level binding; all the nested --- bindings use sub-sections of this SRT. The label is passed down to --- the nested bindings via the monad. - -getSRTInfo :: SRT -> FCode C_SRT -getSRTInfo (SRTEntries {}) = panic "getSRTInfo" - -getSRTInfo (SRT off len bmp) - | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] - = do { id <- newUnique - -- ; top_srt <- getSRTLabel - ; let srt_desc_lbl = mkLargeSRTLabel id - -- JD: We're not constructing and emitting SRTs in the back end, - -- which renders this code wrong (it now names a now-non-existent label). - -- ; emitRODataLits srt_desc_lbl - -- ( cmmLabelOffW top_srt off - -- : mkWordCLit (fromIntegral len) - -- : map mkWordCLit bmp) - ; return (C_SRT srt_desc_lbl 0 srt_escape) } - - | otherwise - = do { top_srt <- getSRTLabel - ; return (C_SRT top_srt off (fromIntegral (head bmp))) } - -- The fromIntegral converts to StgHalfWord - -getSRTInfo NoSRT - = -- TODO: Should we panic in this case? - -- Someone obviously thinks there should be an SRT - return NoC_SRT - +-- | Returns 'True' if there is a non-empty SRT, or 'False' otherwise +-- NB. the SRT attached to an StgBind is still used in the new codegen +-- to decide whether we need a static link field on a static closure +-- or not. +getSRTInfo :: SRT -> FCode Bool +getSRTInfo (SRTEntries vs) = return (not (isEmptyVarSet vs)) +getSRTInfo _ = return False srt_escape :: StgHalfWord srt_escape = -1 diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 4371bca95e..3c13bb4704 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -185,16 +185,15 @@ Library CmmOpt CmmParse CmmProcPoint - CmmSpillReload CmmRewriteAssignments - CmmStackLayout CmmType CmmUtils + CmmLayoutStack MkGraph OldCmm + OldCmmLint OldCmmUtils OldPprCmm - OptimizationFuel PprBase PprC PprCmm @@ -440,6 +439,7 @@ Library Pretty Serialized State + Stream StringBuffer UniqFM UniqSet @@ -473,6 +473,8 @@ Library Vectorise.Env Vectorise.Exp Vectorise + Hoopl.Dataflow + Hoopl Exposed-Modules: AsmCodeGen diff --git a/compiler/ghc.mk b/compiler/ghc.mk index f541841046..1ea6159812 100644 --- a/compiler/ghc.mk +++ b/compiler/ghc.mk @@ -355,7 +355,7 @@ ifeq "$(GhcProfiled)" "YES" # parts of the compiler of interest, and then add further cost centres # as necessary. Turn on -auto-all for individual modules like this: -compiler/main/DriverPipeline_HC_OPTS += -auto-all +# compiler/main/DriverPipeline_HC_OPTS += -auto-all compiler/main/GhcMake_HC_OPTS += -auto-all compiler/main/GHC_HC_OPTS += -auto-all diff --git a/compiler/main/CodeOutput.lhs b/compiler/main/CodeOutput.lhs index 24906671cd..e92eb4f34c 100644 --- a/compiler/main/CodeOutput.lhs +++ b/compiler/main/CodeOutput.lhs @@ -15,22 +15,22 @@ import UniqSupply ( mkSplitUniqSupply ) import Finder ( mkStubPaths ) import PprC ( writeCs ) -import CmmLint ( cmmLint ) +import OldCmmLint ( cmmLint ) import Packages import OldCmm ( RawCmmGroup ) import HscTypes import DynFlags import Config import SysTools +import Stream (Stream) +import qualified Stream import ErrUtils import Outputable import Module -import Maybes ( firstJusts ) import SrcLoc import Control.Exception -import Control.Monad import System.Directory import System.FilePath import System.IO @@ -48,19 +48,26 @@ codeOutput :: DynFlags -> ModLocation -> ForeignStubs -> [PackageId] - -> [RawCmmGroup] -- Compiled C-- + -> Stream IO RawCmmGroup () -- Compiled C-- -> IO (Bool{-stub_h_exists-}, Maybe FilePath{-stub_c_exists-}) -codeOutput dflags this_mod location foreign_stubs pkg_deps flat_abstractC +codeOutput dflags this_mod location foreign_stubs pkg_deps cmm_stream = - do { when (dopt Opt_DoCmmLinting dflags) $ do + do { + -- Lint each CmmGroup as it goes past + ; let linted_cmm_stream = + if dopt Opt_DoCmmLinting dflags + then Stream.mapM do_lint cmm_stream + else cmm_stream + + do_lint cmm = do { showPass dflags "CmmLint" - ; let lints = map (cmmLint (targetPlatform dflags)) flat_abstractC - ; case firstJusts lints of + ; case cmmLint (targetPlatform dflags) cmm of Just err -> do { log_action dflags dflags SevDump noSrcSpan defaultDumpStyle err ; ghcExit dflags 1 } Nothing -> return () + ; return cmm } ; showPass dflags "CodeOutput" @@ -68,9 +75,9 @@ codeOutput dflags this_mod location foreign_stubs pkg_deps flat_abstractC ; stubs_exist <- outputForeignStubs dflags this_mod location foreign_stubs ; case hscTarget dflags of { HscInterpreted -> return (); - HscAsm -> outputAsm dflags filenm flat_abstractC; - HscC -> outputC dflags filenm flat_abstractC pkg_deps; - HscLlvm -> outputLlvm dflags filenm flat_abstractC; + HscAsm -> outputAsm dflags filenm linted_cmm_stream; + HscC -> outputC dflags filenm linted_cmm_stream pkg_deps; + HscLlvm -> outputLlvm dflags filenm linted_cmm_stream; HscNothing -> panic "codeOutput: HscNothing" } ; return stubs_exist @@ -90,12 +97,16 @@ doOutput filenm io_action = bracket (openFile filenm WriteMode) hClose io_action \begin{code} outputC :: DynFlags -> FilePath - -> [RawCmmGroup] + -> Stream IO RawCmmGroup () -> [PackageId] -> IO () -outputC dflags filenm flat_absC packages +outputC dflags filenm cmm_stream packages = do + -- ToDo: make the C backend consume the C-- incrementally, by + -- pushing the cmm_stream inside (c.f. nativeCodeGen) + rawcmms <- Stream.collect cmm_stream + -- figure out which header files to #include in the generated .hc file: -- -- * extra_includes from packages @@ -117,7 +128,7 @@ outputC dflags filenm flat_absC packages doOutput filenm $ \ h -> do hPutStr h ("/* GHC_PACKAGES " ++ unwords pkg_names ++ "\n*/\n") hPutStr h cc_injects - writeCs dflags h flat_absC + writeCs dflags h rawcmms \end{code} @@ -128,14 +139,14 @@ outputC dflags filenm flat_absC packages %************************************************************************ \begin{code} -outputAsm :: DynFlags -> FilePath -> [RawCmmGroup] -> IO () -outputAsm dflags filenm flat_absC +outputAsm :: DynFlags -> FilePath -> Stream IO RawCmmGroup () -> IO () +outputAsm dflags filenm cmm_stream | cGhcWithNativeCodeGen == "YES" = do ncg_uniqs <- mkSplitUniqSupply 'n' {-# SCC "OutputAsm" #-} doOutput filenm $ \f -> {-# SCC "NativeCodeGen" #-} - nativeCodeGen dflags f ncg_uniqs flat_absC + nativeCodeGen dflags f ncg_uniqs cmm_stream | otherwise = panic "This compiler was built without a native code generator" @@ -149,12 +160,17 @@ outputAsm dflags filenm flat_absC %************************************************************************ \begin{code} -outputLlvm :: DynFlags -> FilePath -> [RawCmmGroup] -> IO () -outputLlvm dflags filenm flat_absC +outputLlvm :: DynFlags -> FilePath -> Stream IO RawCmmGroup () -> IO () +outputLlvm dflags filenm cmm_stream = do ncg_uniqs <- mkSplitUniqSupply 'n' + + -- ToDo: make the LLVM backend consume the C-- incrementally, + -- by pushing the cmm_stream inside (c.f. nativeCodeGen) + rawcmms <- Stream.collect cmm_stream + {-# SCC "llvm_output" #-} doOutput filenm $ \f -> {-# SCC "llvm_CodeGen" #-} - llvmCodeGen dflags f ncg_uniqs flat_absC + llvmCodeGen dflags f ncg_uniqs rawcmms \end{code} @@ -240,4 +256,3 @@ outputForeignStubs_help fname doc_str header footer = do writeFile fname (header ++ doc_str ++ '\n':footer ++ "\n") return True \end{code} - diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 53aa39f04e..60b6e82bb7 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -164,9 +164,9 @@ data DynFlag = Opt_D_dump_cmm | Opt_D_dump_raw_cmm | Opt_D_dump_cmmz - | Opt_D_dump_cmmz_pretty -- All of the cmmz subflags (there are a lot!) Automatically -- enabled if you run -ddump-cmmz + | Opt_D_dump_cmmz_cfg | Opt_D_dump_cmmz_cbe | Opt_D_dump_cmmz_proc | Opt_D_dump_cmmz_spills @@ -1675,7 +1675,7 @@ dynamic_flags = [ , Flag "ddump-cmm" (setDumpFlag Opt_D_dump_cmm) , Flag "ddump-raw-cmm" (setDumpFlag Opt_D_dump_raw_cmm) , Flag "ddump-cmmz" (setDumpFlag Opt_D_dump_cmmz) - , Flag "ddump-cmmz-pretty" (setDumpFlag Opt_D_dump_cmmz_pretty) + , Flag "ddump-cmmz-cfg" (setDumpFlag Opt_D_dump_cmmz_cbe) , Flag "ddump-cmmz-cbe" (setDumpFlag Opt_D_dump_cmmz_cbe) , Flag "ddump-cmmz-spills" (setDumpFlag Opt_D_dump_cmmz_spills) , Flag "ddump-cmmz-proc" (setDumpFlag Opt_D_dump_cmmz_proc) diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs index 562332d52a..0b03e83029 100644 --- a/compiler/main/HscMain.hs +++ b/compiler/main/HscMain.hs @@ -119,13 +119,12 @@ import TyCon import Name import SimplStg ( stg2stg ) import CodeGen ( codeGen ) -import OldCmm as Old ( CmmGroup ) -import PprCmm ( pprCmms ) +import qualified OldCmm as Old +import qualified Cmm as New import CmmParse ( parseCmmFile ) import CmmBuildInfoTables import CmmPipeline import CmmInfo -import OptimizationFuel ( initOptFuelState ) import CmmCvt import CodeOutput import NameEnv ( emptyNameEnv ) @@ -147,6 +146,9 @@ import UniqFM ( emptyUFM ) import UniqSupply ( initUs_ ) import Bag import Exception +import qualified Stream +import Stream (Stream) + import Util import Data.List @@ -172,7 +174,6 @@ newHscEnv dflags = do nc_var <- newIORef (initNameCache us knownKeyNames) fc_var <- newIORef emptyUFM mlc_var <- newIORef emptyModuleEnv - optFuel <- initOptFuelState return HscEnv { hsc_dflags = dflags, hsc_targets = [], hsc_mod_graph = [], @@ -182,7 +183,6 @@ newHscEnv dflags = do hsc_NC = nc_var, hsc_FC = fc_var, hsc_MLC = mlc_var, - hsc_OptFuel = optFuel, hsc_type_env_var = Nothing } @@ -1276,20 +1276,27 @@ hscGenHardCode cgguts mod_summary = do cost_centre_info stg_binds hpc_info else {-# SCC "CodeGen" #-} - codeGen dflags this_mod data_tycons - cost_centre_info - stg_binds hpc_info + return (codeGen dflags this_mod data_tycons + cost_centre_info + stg_binds hpc_info) + ------------------ Code output ----------------------- - rawcmms <- {-# SCC "cmmToRawCmm" #-} + rawcmms0 <- {-# SCC "cmmToRawCmm" #-} cmmToRawCmm platform cmms - dumpIfSet_dyn dflags Opt_D_dump_raw_cmm "Raw Cmm" (ppr rawcmms) + + let dump a = do dumpIfSet_dyn dflags Opt_D_dump_raw_cmm "Raw Cmm" + (ppr a) + return a + rawcmms1 = Stream.mapM dump rawcmms0 + (_stub_h_exists, stub_c_exists) <- {-# SCC "codeOutput" #-} codeOutput dflags this_mod location foreign_stubs - dependencies rawcmms + dependencies rawcmms1 return stub_c_exists + hscInteractive :: (ModIface, ModDetails, CgGuts) -> ModSummary -> Hsc (InteractiveStatus, ModIface, ModDetails) @@ -1335,7 +1342,7 @@ hscCompileCmmFile hsc_env filename = runHsc hsc_env $ do let dflags = hsc_dflags hsc_env cmm <- ioMsgMaybe $ parseCmmFile dflags filename liftIO $ do - rawCmms <- cmmToRawCmm (targetPlatform dflags) [cmm] + rawCmms <- cmmToRawCmm (targetPlatform dflags) (Stream.yield cmm) _ <- codeOutput dflags no_mod no_loc NoStubs [] rawCmms return () where @@ -1350,24 +1357,52 @@ tryNewCodeGen :: HscEnv -> Module -> [TyCon] -> CollectedCCs -> [(StgBinding,[(Id,[Id])])] -> HpcInfo - -> IO [Old.CmmGroup] + -> IO (Stream IO Old.CmmGroup ()) + -- Note we produce a 'Stream' of CmmGroups, so that the + -- backend can be run incrementally. Otherwise it generates all + -- the C-- up front, which has a significant space cost. tryNewCodeGen hsc_env this_mod data_tycons cost_centre_info stg_binds hpc_info = do let dflags = hsc_dflags hsc_env - prog <- StgCmm.codeGen dflags this_mod data_tycons + + let cmm_stream :: Stream IO New.CmmGroup () + cmm_stream = {-# SCC "StgCmm" #-} + StgCmm.codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info - dumpIfSet_dyn dflags Opt_D_dump_cmmz "Cmm produced by new codegen" - (pprCmms prog) + + -- codegen consumes a stream of CmmGroup, and produces a new + -- stream of CmmGroup (not necessarily synchronised: one + -- CmmGroup on input may produce many CmmGroups on output due + -- to proc-point splitting). + + let dump1 a = do dumpIfSet_dyn dflags Opt_D_dump_cmmz + "Cmm produced by new codegen" (ppr a) + return a + + ppr_stream1 = Stream.mapM dump1 cmm_stream -- We are building a single SRT for the entire module, so -- we must thread it through all the procedures as we cps-convert them. us <- mkSplitUniqSupply 'S' let initTopSRT = initUs_ us emptySRT - (topSRT, prog) <- foldM (cmmPipeline hsc_env) (initTopSRT, []) prog - let prog' = map cmmOfZgraph (srtToData topSRT : prog) - dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" (ppr prog') - return prog' + let run_pipeline topSRT cmmgroup = do + (topSRT, cmmgroup) <- cmmPipeline hsc_env topSRT cmmgroup + return (topSRT,cmmOfZgraph cmmgroup) + + let pipeline_stream = {-# SCC "cmmPipeline" #-} do + topSRT <- Stream.mapAccumL run_pipeline initTopSRT ppr_stream1 + Stream.yield (cmmOfZgraph (srtToData topSRT)) + + let + dump2 a = do dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" $ ppr a + return a + + ppr_stream2 = Stream.mapM dump2 pipeline_stream + + return ppr_stream2 + + myCoreToStg :: DynFlags -> Module -> CoreProgram -> IO ( [(StgBinding,[(Id,[Id])])] -- output program diff --git a/compiler/main/HscTypes.lhs b/compiler/main/HscTypes.lhs index 156f081d3e..adaa9a3171 100644 --- a/compiler/main/HscTypes.lhs +++ b/compiler/main/HscTypes.lhs @@ -142,7 +142,6 @@ import Packages hiding ( Version(..) ) import DynFlags import DriverPhases import BasicTypes -import OptimizationFuel ( OptFuelState ) import IfaceSyn import CoreSyn ( CoreRule, CoreVect ) import Maybes @@ -318,11 +317,6 @@ data HscEnv -- ^ This caches the location of modules, so we don't have to -- search the filesystem multiple times. See also 'hsc_FC'. - hsc_OptFuel :: OptFuelState, - -- ^ Settings to control the use of \"optimization fuel\": - -- by limiting the number of transformations, - -- we can use binary search to help find compiler bugs. - hsc_type_env_var :: Maybe (Module, IORef TypeEnv) -- ^ Used for one-shot compilation only, to initialise -- the 'IfGblEnv'. See 'TcRnTypes.tcg_type_env_var' for diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 51adf46005..4b49fe304e 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -71,6 +71,8 @@ import FastString import UniqSet import ErrUtils import Module +import Stream (Stream) +import qualified Stream -- DEBUGGING ONLY --import OrdList @@ -147,7 +149,7 @@ data NcgImpl statics instr jumpDest = NcgImpl { } -------------------- -nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmmGroup] -> IO () +nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO () nativeCodeGen dflags h us cmms = let platform = targetPlatform dflags nCG' :: (Outputable statics, Outputable instr, Instruction instr) => NcgImpl statics instr jumpDest -> IO () @@ -209,16 +211,16 @@ nativeCodeGen dflags h us cmms nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr) => DynFlags -> NcgImpl statics instr jumpDest - -> Handle -> UniqSupply -> [RawCmmGroup] -> IO () + -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO () nativeCodeGen' dflags ncgImpl h us cmms = do let platform = targetPlatform dflags - split_cmms = concat $ map add_split cmms + split_cmms = Stream.map add_split cmms -- BufHandle is a performance hack. We could hide it inside -- Pretty if it weren't for the fact that we do lots of little -- printDocs here (in order to do codegen in constant space). bufh <- newBufHandle h - (imports, prof) <- cmmNativeGens dflags ncgImpl bufh us split_cmms [] [] 0 + (imports, prof) <- cmmNativeGenStream dflags ncgImpl bufh us split_cmms [] [] 0 bFlush bufh let (native, colorStats, linearStats) @@ -272,6 +274,34 @@ nativeCodeGen' dflags ncgImpl h us cmms split_marker = CmmProc Nothing mkSplitMarkerLabel (ListGraph []) +cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr) + => DynFlags + -> NcgImpl statics instr jumpDest + -> BufHandle + -> UniqSupply + -> Stream IO RawCmmGroup () + -> [[CLabel]] + -> [ ([NatCmmDecl statics instr], + Maybe [Color.RegAllocStats statics instr], + Maybe [Linear.RegAllocStats]) ] + -> Int + -> IO ( [[CLabel]], + [([NatCmmDecl statics instr], + Maybe [Color.RegAllocStats statics instr], + Maybe [Linear.RegAllocStats])] ) + +cmmNativeGenStream dflags ncgImpl h us cmm_stream impAcc profAcc count + = do + r <- Stream.runStream cmm_stream + case r of + Left () -> return (reverse impAcc, reverse profAcc) + Right (cmms, cmm_stream') -> do + (impAcc,profAcc,us') <- cmmNativeGens dflags ncgImpl h us cmms + impAcc profAcc count + cmmNativeGenStream dflags ncgImpl h us' cmm_stream' + impAcc profAcc count + + -- | Do native code generation on all these cmms. -- cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr) @@ -287,11 +317,12 @@ cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr) -> Int -> IO ( [[CLabel]], [([NatCmmDecl statics instr], - Maybe [Color.RegAllocStats statics instr], - Maybe [Linear.RegAllocStats])] ) + Maybe [Color.RegAllocStats statics instr], + Maybe [Linear.RegAllocStats])], + UniqSupply ) -cmmNativeGens _ _ _ _ [] impAcc profAcc _ - = return (reverse impAcc, reverse profAcc) +cmmNativeGens _ _ _ us [] impAcc profAcc _ + = return (impAcc,profAcc,us) cmmNativeGens dflags ncgImpl h us (cmm : cmms) impAcc profAcc count = do @@ -817,7 +848,11 @@ Ideas for other things we could do (put these in Hoopl please!): cmmToCmm :: DynFlags -> RawCmmDecl -> (RawCmmDecl, [CLabel]) cmmToCmm _ top@(CmmData _ _) = (top, []) cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do - blocks' <- mapM cmmBlockConFold (cmmMiniInline dflags (cmmEliminateDeadBlocks blocks)) + let reachable_blocks | dopt Opt_TryNewCodeGen dflags = blocks + | otherwise = cmmEliminateDeadBlocks blocks + -- The new codegen path has already eliminated unreachable blocks by now + + blocks' <- mapM cmmBlockConFold (cmmMiniInline dflags reachable_blocks) return $ CmmProc info lbl (ListGraph blocks') newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #)) @@ -911,7 +946,8 @@ cmmExprConFold referenceKind expr = do dflags <- getDynFlags -- Skip constant folding if new code generator is running -- (this optimization is done in Hoopl) - let expr' = if dopt Opt_TryNewCodeGen dflags + -- SDM: re-enabled for now, while cmmRewriteAssignments is turned off + let expr' = if False -- dopt Opt_TryNewCodeGen dflags then expr else cmmExprCon (targetPlatform dflags) expr cmmExprNative referenceKind expr' diff --git a/compiler/simplStg/SimplStg.lhs b/compiler/simplStg/SimplStg.lhs index d5024ab2e0..635df3ce41 100644 --- a/compiler/simplStg/SimplStg.lhs +++ b/compiler/simplStg/SimplStg.lhs @@ -55,7 +55,10 @@ stg2stg dflags module_name binds ; (processed_binds, _, cost_centres) <- foldl_mn do_stg_pass (binds', us0, ccs) (getStgToDo dflags) - ; let srt_binds = computeSRTs (unarise us1 processed_binds) + ; let un_binds = unarise us1 processed_binds + ; let srt_binds + | dopt Opt_TryNewCodeGen dflags = zip un_binds (repeat []) + | otherwise = computeSRTs un_binds ; dumpIfSet_dyn dflags Opt_D_dump_stg "STG syntax:" (pprStgBindingsWithSRTs srt_binds) diff --git a/compiler/utils/OrdList.lhs b/compiler/utils/OrdList.lhs index a56cdf3f58..7e3b24a5da 100644 --- a/compiler/utils/OrdList.lhs +++ b/compiler/utils/OrdList.lhs @@ -27,11 +27,14 @@ infixl 5 `snocOL` infixr 5 `consOL` data OrdList a - = Many [a] -- Invariant: non-empty + = None + | One a + | Many [a] -- Invariant: non-empty + | Cons a (OrdList a) + | Snoc (OrdList a) a | Two (OrdList a) -- Invariant: non-empty (OrdList a) -- Invariant: non-empty - | One a - | None + nilOL :: OrdList a isNilOL :: OrdList a -> Bool @@ -44,22 +47,33 @@ concatOL :: [OrdList a] -> OrdList a nilOL = None unitOL as = One as -snocOL None b = One b -snocOL as b = Two as (One b) -consOL a None = One a -consOL a bs = Two (One a) bs +snocOL as b = Snoc as b +consOL a bs = Cons a bs concatOL aas = foldr appOL None aas isNilOL None = True isNilOL _ = False -appOL None bs = bs -appOL as None = as -appOL as bs = Two as bs +None `appOL` b = b +a `appOL` None = a +One a `appOL` b = Cons a b +a `appOL` One b = Snoc a b +a `appOL` b = Two a b + +fromOL :: OrdList a -> [a] +fromOL a = go a [] + where go None acc = acc + go (One a) acc = a : acc + go (Cons a b) acc = a : go b acc + go (Snoc a b) acc = go a (b:acc) + go (Two a b) acc = go a (go b acc) + go (Many xs) acc = xs ++ acc mapOL :: (a -> b) -> OrdList a -> OrdList b mapOL _ None = None mapOL f (One x) = One (f x) +mapOL f (Cons x xs) = Cons (f x) (mapOL f xs) +mapOL f (Snoc xs x) = Snoc (mapOL f xs) (f x) mapOL f (Two x y) = Two (mapOL f x) (mapOL f y) mapOL f (Many xs) = Many (map f xs) @@ -69,24 +83,19 @@ instance Functor OrdList where foldrOL :: (a->b->b) -> b -> OrdList a -> b foldrOL _ z None = z foldrOL k z (One x) = k x z +foldrOL k z (Cons x xs) = k x (foldrOL k z xs) +foldrOL k z (Snoc xs x) = foldrOL k (k x z) xs foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1 foldrOL k z (Many xs) = foldr k z xs foldlOL :: (b->a->b) -> b -> OrdList a -> b foldlOL _ z None = z foldlOL k z (One x) = k z x +foldlOL k z (Cons x xs) = foldlOL k (k z x) xs +foldlOL k z (Snoc xs x) = k (foldlOL k z xs) x foldlOL k z (Two b1 b2) = foldlOL k (foldlOL k z b1) b2 foldlOL k z (Many xs) = foldl k z xs -fromOL :: OrdList a -> [a] -fromOL ol - = flat ol [] - where - flat None rest = rest - flat (One x) rest = x:rest - flat (Two a b) rest = flat a (flat b rest) - flat (Many xs) rest = xs ++ rest - toOL :: [a] -> OrdList a toOL [] = None toOL xs = Many xs diff --git a/compiler/utils/Stream.hs b/compiler/utils/Stream.hs new file mode 100644 index 0000000000..2fa76d2345 --- /dev/null +++ b/compiler/utils/Stream.hs @@ -0,0 +1,97 @@ +-- ----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2012 +-- +-- Monadic streams +-- +-- ----------------------------------------------------------------------------- + +module Stream ( + Stream(..), yield, liftIO, + collect, fromList, + Stream.map, Stream.mapM, Stream.mapAccumL + ) where + +-- | +-- @Stream m a b@ is a computation in some Monad @m@ that delivers a sequence +-- of elements of type @a@ followed by a result of type @b@. +-- +-- More concretely, a value of type @Stream m a b@ can be run using @runStream@ +-- in the Monad @m@, and it delivers either +-- +-- * the final result: @Left b@, or +-- * @Right (a,str)@, where @a@ is the next element in the stream, and @str@ +-- is a computation to get the rest of the stream. +-- +-- Stream is itself a Monad, and provides an operation 'yield' that +-- produces a new element of the stream. This makes it convenient to turn +-- existing monadic computations into streams. +-- +-- The idea is that Stream is useful for making a monadic computation +-- that produces values from time to time. This can be used for +-- knitting together two complex monadic operations, so that the +-- producer does not have to produce all its values before the +-- consumer starts consuming them. We make the producer into a +-- Stream, and the consumer pulls on the stream each time it wants a +-- new value. +-- +newtype Stream m a b = Stream { runStream :: m (Either b (a, Stream m a b)) } + +instance Monad m => Monad (Stream m a) where + return a = Stream (return (Left a)) + + Stream m >>= k = Stream $ do + r <- m + case r of + Left b -> runStream (k b) + Right (a,str) -> return (Right (a, str >>= k)) + +yield :: Monad m => a -> Stream m a () +yield a = Stream (return (Right (a, return ()))) + +liftIO :: IO a -> Stream IO b a +liftIO io = Stream $ io >>= return . Left + +-- | Turn a Stream into an ordinary list, by demanding all the elements. +collect :: Monad m => Stream m a () -> m [a] +collect str = go str [] + where + go str acc = do + r <- runStream str + case r of + Left () -> return (reverse acc) + Right (a, str') -> go str' (a:acc) + +-- | Turn a list into a 'Stream', by yielding each element in turn. +fromList :: Monad m => [a] -> Stream m a () +fromList = mapM_ yield + +-- | Apply a function to each element of a 'Stream', lazilly +map :: Monad m => (a -> b) -> Stream m a x -> Stream m b x +map f str = Stream $ do + r <- runStream str + case r of + Left x -> return (Left x) + Right (a, str') -> return (Right (f a, Stream.map f str')) + +-- | Apply a monadic operation to each element of a 'Stream', lazilly +mapM :: Monad m => (a -> m b) -> Stream m a x -> Stream m b x +mapM f str = Stream $ do + r <- runStream str + case r of + Left x -> return (Left x) + Right (a, str') -> do + b <- f a + return (Right (b, Stream.mapM f str')) + +-- | analog of the list-based 'mapAccumL' on Streams. This is a simple +-- way to map over a Stream while carrying some state around. +mapAccumL :: Monad m => (c -> a -> m (c,b)) -> c -> Stream m a () + -> Stream m b c +mapAccumL f c str = Stream $ do + r <- runStream str + case r of + Left () -> return (Left c) + Right (a, str') -> do + (c',b) <- f c a + return (Right (b, mapAccumL f c' str')) |