diff options
Diffstat (limited to 'compiler')
58 files changed, 4629 insertions, 2425 deletions
| diff --git a/compiler/basicTypes/UniqSupply.lhs b/compiler/basicTypes/UniqSupply.lhs index bb40be7ac1..f3fb28ac21 100644 --- a/compiler/basicTypes/UniqSupply.lhs +++ b/compiler/basicTypes/UniqSupply.lhs @@ -109,7 +109,7 @@ takeUniqFromSupply (MkSplitUniqSupply n s1 _) = (mkUniqueGrimily (iBox n), s1)  \begin{code}  -- | A monad which just gives the ability to obtain 'Unique's -newtype UniqSM result = USM { unUSM :: UniqSupply -> (result, UniqSupply) } +newtype UniqSM result = USM { unUSM :: UniqSupply -> (# result, UniqSupply #) }  instance Monad UniqSM where    return = returnUs @@ -118,21 +118,21 @@ instance Monad UniqSM where  instance Functor UniqSM where      fmap f (USM x) = USM (\us -> case x us of -                                 (r, us') -> (f r, us')) +                                 (# r, us' #) -> (# f r, us' #))  instance Applicative UniqSM where      pure = returnUs      (USM f) <*> (USM x) = USM $ \us -> case f us of -                            (ff, us')  -> case x us' of -                              (xx, us'') -> (ff xx, us'') +                            (# ff, us' #)  -> case x us' of +                              (# xx, us'' #) -> (# ff xx, us'' #)  -- | Run the 'UniqSM' action, returning the final 'UniqSupply'  initUs :: UniqSupply -> UniqSM a -> (a, UniqSupply) -initUs init_us m = case unUSM m init_us of { (r,us) -> (r,us) } +initUs init_us m = case unUSM m init_us of { (# r, us #) -> (r,us) }  -- | Run the 'UniqSM' action, discarding the final 'UniqSupply'  initUs_ :: UniqSupply -> UniqSM a -> a -initUs_ init_us m = case unUSM m init_us of { (r, _) -> r } +initUs_ init_us m = case unUSM m init_us of { (# r, _ #) -> r }  {-# INLINE thenUs #-}  {-# INLINE lazyThenUs #-} @@ -142,27 +142,30 @@ initUs_ init_us m = case unUSM m init_us of { (r, _) -> r }  @thenUs@ is where we split the @UniqSupply@.  \begin{code} +liftUSM :: UniqSM a -> UniqSupply -> (a, UniqSupply) +liftUSM (USM m) us = case m us of (# a, us' #) -> (a, us') +  instance MonadFix UniqSM where -    mfix m = USM (\us -> let (r,us') = unUSM (m r) us in (r,us')) +    mfix m = USM (\us -> let (r,us') = liftUSM (m r) us in (# r,us' #))  thenUs :: UniqSM a -> (a -> UniqSM b) -> UniqSM b  thenUs (USM expr) cont    = USM (\us -> case (expr us) of -                   (result, us') -> unUSM (cont result) us') +                   (# result, us' #) -> unUSM (cont result) us')  lazyThenUs :: UniqSM a -> (a -> UniqSM b) -> UniqSM b -lazyThenUs (USM expr) cont -  = USM (\us -> let (result, us') = expr us in unUSM (cont result) us') +lazyThenUs expr cont +  = USM (\us -> let (result, us') = liftUSM expr us in unUSM (cont result) us')  thenUs_ :: UniqSM a -> UniqSM b -> UniqSM b  thenUs_ (USM expr) (USM cont) -  = USM (\us -> case (expr us) of { (_, us') -> cont us' }) +  = USM (\us -> case (expr us) of { (# _, us' #) -> cont us' })  returnUs :: a -> UniqSM a -returnUs result = USM (\us -> (result, us)) +returnUs result = USM (\us -> (# result, us #))  getUs :: UniqSM UniqSupply -getUs = USM (\us -> splitUniqSupply us) +getUs = USM (\us -> case splitUniqSupply us of (us1,us2) -> (# us1, us2 #))  -- | A monad for generating unique identifiers  class Monad m => MonadUnique m where @@ -177,17 +180,17 @@ class Monad m => MonadUnique m where      getUniquesM = liftM uniqsFromSupply getUniqueSupplyM  instance MonadUnique UniqSM where -    getUniqueSupplyM = USM (\us -> splitUniqSupply us) +    getUniqueSupplyM = getUs      getUniqueM  = getUniqueUs      getUniquesM = getUniquesUs  getUniqueUs :: UniqSM Unique  getUniqueUs = USM (\us -> case splitUniqSupply us of -                          (us1,us2) -> (uniqFromSupply us1, us2)) +                          (us1,us2) -> (# uniqFromSupply us1, us2 #))  getUniquesUs :: UniqSM [Unique]  getUniquesUs = USM (\us -> case splitUniqSupply us of -                           (us1,us2) -> (uniqsFromSupply us1, us2)) +                           (us1,us2) -> (# uniqsFromSupply us1, us2 #))  \end{code}  \begin{code} diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs index feeacb553d..d5a8e045bf 100644 --- a/compiler/cmm/BlockId.hs +++ b/compiler/cmm/BlockId.hs @@ -31,15 +31,9 @@ compilation unit in which it appears.  type BlockId = Hoopl.Label -instance Uniquable BlockId where -  getUnique label = getUnique (uniqueToInt $ lblToUnique label) -  mkBlockId :: Unique -> BlockId  mkBlockId unique = uniqueToLbl $ intToUnique $ getKey unique -instance Outputable BlockId where -  ppr label = ppr (getUnique label) -  retPtLbl :: BlockId -> CLabel  retPtLbl label = mkReturnPtLabel $ getUnique label diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index f1318c1dc9..d70fd8c835 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -69,8 +69,6 @@ type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph  --   (a) C--, i.e. populated with various C-- constructs  --       (Cmm and RawCmm in OldCmm.hs)  --   (b) Native code, populated with data/instructions --- --- A second family of instances based on Hoopl is in Cmm.hs.  -- | A top-level chunk, abstracted over the type of the contents of  -- the basic blocks (Cmm or instructions are the likely instantiations). @@ -103,11 +101,15 @@ type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f  --     Info Tables  ----------------------------------------------------------------------------- -data CmmTopInfo   = TopInfo {info_tbl :: CmmInfoTable, stack_info :: CmmStackInfo} +data CmmTopInfo   = TopInfo { info_tbl :: CmmInfoTable +                            , stack_info :: CmmStackInfo }  data CmmStackInfo     = StackInfo { -       arg_space :: ByteOff,            -- XXX: comment? +       arg_space :: ByteOff, +               -- number of bytes of arguments on the stack on entry to the +               -- the proc.  This is filled in by StgCmm.codeGen, and used +               -- by the stack allocator later.         updfr_space :: Maybe ByteOff     -- XXX: comment?     } diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs index 011947f55d..2378988b68 100644 --- a/compiler/cmm/CmmBuildInfoTables.hs +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -14,12 +14,10 @@  {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}  module CmmBuildInfoTables      ( CAFSet, CAFEnv, cafAnal, localCAFInfo, mkTopCAFInfo -    , setInfoTableSRT, setInfoTableStackMap +    , setInfoTableSRT      , TopSRT, emptySRT, srtToData      , bundleCAFs -    , lowerSafeForeignCalls -    , cafTransfers, liveSlotTransfers -    , mkLiveness ) +    , cafTransfers )  where  #include "HsVersions.h" @@ -39,7 +37,6 @@ import Bitmap  import CLabel  import Cmm  import CmmUtils -import CmmStackLayout  import Module  import FastString  import ForeignCall @@ -54,129 +51,25 @@ import Outputable  import SMRep  import UniqSupply -import Compiler.Hoopl +import Hoopl  import Data.Map (Map)  import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set  import qualified FiniteMap as Map +#if __GLASGOW_HASKELL__ < 704 +foldSet = Set.fold +#else +foldSet = Set.foldr +#endif +  ----------------------------------------------------------------  -- Building InfoTables  ----------------------------------------------------------------------- --- Stack Maps - --- Given a block ID, we return a representation of the layout of the stack, --- as suspended before entering that block. --- (For a return site to a function call, the layout does not include the ---  parameter passing area (or the "return address" on the stack)). --- If the element is `Nothing`, then it represents a word of the stack that --- does not contain a live pointer. --- If the element is `Just` a register, then it represents a live spill slot --- for a pointer; we assume that a pointer is the size of a word. --- The head of the list represents the young end of the stack where the infotable --- pointer for the block `Bid` is stored. --- The infotable pointer itself is not included in the list. --- Call areas are also excluded from the list: besides the stuff in the update --- frame (and the return infotable), call areas should never be live across --- function calls. - --- RTS Invariant: All pointers must be word-aligned because each bit in the bitmap --- represents a word. Consequently, we have to be careful when we see a live slot --- on the stack: if we have packed multiple sub-word values into a word, --- we have to make sure that we only mark the entire word as a non-pointer. - --- Also, don't forget to stop at the old end of the stack (oldByte), --- which may differ depending on whether there is an update frame. - -type RegSlotInfo -   = ( Int	  -- Offset from oldest byte of Old area -     , LocalReg   -- The register -     , Int)       -- Width of the register - -live_ptrs :: ByteOff -> BlockEnv SubAreaSet -> AreaMap -> BlockId -> StackLayout -live_ptrs oldByte slotEnv areaMap bid = -  -- pprTrace "live_ptrs for" (ppr bid <+> text (show oldByte ++ "-" ++ show youngByte) <+> -  --                           ppr liveSlots) $ -  -- pprTrace ("stack layout for " ++ show bid ++ ": ") (ppr res) $ res -  res -  where  -        res = mkLiveness (reverse $ slotsToList youngByte liveSlots []) -  -        slotsToList :: Int -> [RegSlotInfo] -> [Maybe LocalReg] -> [Maybe LocalReg] -        -- n starts at youngByte and is decremented down to oldByte -	-- Returns a list, one element per word, with  -	--    (Just r) meaning 'pointer register r is saved here',  -	--    Nothing  meaning 'non-pointer or empty' - -        slotsToList n [] results | n == oldByte = results -- at old end of stack frame - -        slotsToList n (s : _) _  | n == oldByte = -          pprPanic "slot left off live_ptrs" (ppr s <+> ppr oldByte <+> -               ppr n <+> ppr liveSlots <+> ppr youngByte) - -        slotsToList n _ _ | n < oldByte = -          panic "stack slots not allocated on word boundaries?" - -        slotsToList n l@((n', r, w) : rst) results = -          if n == (n' + w) then -- slot's young byte is at n -            ASSERT (not (isPtr r) || -                    (n `mod` wORD_SIZE == 0 && w == wORD_SIZE)) -- ptrs must be aligned -            slotsToList next (dropWhile (non_ptr_younger_than next) rst) -                        (stack_rep : results) -          else slotsToList next (dropWhile (non_ptr_younger_than next) l) -                           (Nothing : results) -          where next = n - wORD_SIZE -                stack_rep = if isPtr r then Just r else Nothing - -        slotsToList n [] results = slotsToList (n - wORD_SIZE) [] (Nothing : results) - -        non_ptr_younger_than next (n', r, w) = -          n' + w > next && -            ASSERT (not (isPtr r)) -            True -        isPtr = isGcPtrType . localRegType - -        liveSlots :: [RegSlotInfo] -        liveSlots = sortBy (\ (off,_,_) (off',_,_) -> compare off' off) -                           (Map.foldRightWithKey (\_ -> flip $ foldl add_slot) [] slots) -                     -        add_slot :: [RegSlotInfo] -> SubArea -> [RegSlotInfo] -        add_slot rst (a@(RegSlot r@(LocalReg _ ty)), off, w) =  -          if off == w && widthInBytes (typeWidth ty) == w then -            (expectJust "add_slot" (Map.lookup a areaMap), r, w) : rst -          else panic "live_ptrs: only part of a variable live at a proc point" -        add_slot rst (CallArea Old, _, _) = -          rst -- the update frame (or return infotable) should be live -              -- would be nice to check that only that part of the callarea is live... -        add_slot rst ((CallArea _), _, _) = -          rst -          -- JD: THIS ISN'T CURRENTLY A CORRECTNESS PROBLEM, BUT WE SHOULD REALLY -          -- MAKE LIVENESS INFO AROUND CALLS MORE PRECISE -- FOR NOW, A 32-BIT -          -- FLOAT PADS OUT TO 64 BITS, BUT WE ASSUME THE WHOLE PARAMETER-PASSING -          -- AREA IS LIVE (WHICH IT ISN'T...).  WE SHOULD JUST PUT THE LIVE AREAS -          -- IN THE CALL NODES, WHICH SHOULD EVENTUALLY HAVE LIVE REGISTER AS WELL, -          -- SO IT'S ALL GOING IN THE SAME DIRECTION. -          -- pprPanic "CallAreas must not be live across function calls" (ppr bid <+> ppr c) - -        slots :: SubAreaSet	 -- The SubAreaSet for 'bid' -        slots = expectJust "live_ptrs slots" $ mapLookup bid slotEnv -        youngByte = expectJust "live_ptrs bid_pos" $ Map.lookup (CallArea (Young bid)) areaMap - --- Construct the stack maps for a procedure _if_ it needs an infotable. --- When wouldn't a procedure need an infotable? If it is a procpoint that --- is not the successor of a call. -setInfoTableStackMap :: SlotEnv -> AreaMap -> CmmDecl -> CmmDecl -setInfoTableStackMap slotEnv areaMap -     t@(CmmProc (TopInfo {stack_info=StackInfo {updfr_space = Just updfr_off}}) _  -                (CmmGraph {g_entry = eid})) -  = updInfo (const (live_ptrs updfr_off slotEnv areaMap eid)) id t -setInfoTableStackMap _ _ t = t -                  - - ------------------------------------------------------------------------  -- SRTs  -- WE NEED AN EXAMPLE HERE. @@ -191,14 +84,14 @@ setInfoTableStackMap _ _ t = t  -----------------------------------------------------------------------  -- Finding the CAFs used by a procedure -type CAFSet = Map CLabel () +type CAFSet = Set CLabel  type CAFEnv = BlockEnv CAFSet  -- First, an analysis to find live CAFs.  cafLattice :: DataflowLattice CAFSet -cafLattice = DataflowLattice "live cafs" Map.empty add -  where add _ (OldFact old) (NewFact new) = case old `Map.union` new of -                                              new' -> (changeIf $ Map.size new' > Map.size old, new') +cafLattice = DataflowLattice "live cafs" Set.empty add +  where add _ (OldFact old) (NewFact new) = case old `Set.union` new of +                                              new' -> (changeIf $ Set.size new' > Set.size old, new')  cafTransfers :: BwdTransfer CmmNode CAFSet  cafTransfers = mkBTransfer3 first middle last @@ -210,11 +103,11 @@ cafTransfers = mkBTransfer3 first middle last                 CmmLit (CmmLabelOff c _)         -> add c set                 CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set                 _ -> set -        add l s = if hasCAF l then Map.insert (toClosureLbl l) () s +        add l s = if hasCAF l then Set.insert (toClosureLbl l) s                                else s -cafAnal :: CmmGraph -> FuelUniqSM CAFEnv -cafAnal g = liftM snd $ dataflowPassBwd g [] $ analBwd cafLattice cafTransfers +cafAnal :: Platform -> CmmGraph -> CAFEnv +cafAnal g = dataflowAnalBwd g [] $ analBwd cafLattice cafTransfers  -----------------------------------------------------------------------  -- Building the SRTs @@ -266,13 +159,13 @@ srtToData srt = [CmmData RelocatableReadOnlyData (Statics (lbl srt) tbl)]  buildSRTs :: TopSRT -> Map CLabel CAFSet -> CAFSet ->               FuelUniqSM (TopSRT, Maybe CmmDecl, C_SRT)  buildSRTs topSRT topCAFMap cafs = -  do let liftCAF lbl () z = -- get CAFs for functions without static closures -           case Map.lookup lbl topCAFMap of Just cafs -> z `Map.union` cafs -                                            Nothing   -> Map.insert lbl () z +  do let liftCAF lbl z = -- get CAFs for functions without static closures +           case Map.lookup lbl topCAFMap of Just cafs -> z `Set.union` cafs +                                            Nothing   -> Set.insert lbl z           -- For each label referring to a function f without a static closure,           -- replace it with the CAFs that are reachable from f.           sub_srt topSRT localCafs = -           let cafs = Map.keys (Map.foldRightWithKey liftCAF Map.empty localCafs) +           let cafs = Set.elems (foldSet liftCAF Set.empty localCafs)                 mkSRT topSRT =                   do localSRTs <- procpointSRT (lbl topSRT) (elt_map topSRT) cafs                      return (topSRT, localSRTs) @@ -373,21 +266,21 @@ mkTopCAFInfo localCAFs = foldl addToTop Map.empty g            Map.insert l (flatten env cafset) env          addToTop env (CyclicSCC nodes) =            let (lbls, cafsets) = unzip nodes -              cafset  = lbls `Map.deleteList` foldl Map.union Map.empty cafsets +              cafset  = foldr Set.delete (foldl Set.union Set.empty cafsets) lbls            in foldl (\env l -> Map.insert l (flatten env cafset) env) env lbls -        flatten env cafset = Map.foldRightWithKey (lookup env) Map.empty cafset -        lookup env caf () cafset' = -          case Map.lookup caf env of Just cafs -> Map.foldRightWithKey add cafset' cafs -                                     Nothing -> add caf () cafset' -        add caf () cafset' = Map.insert caf () cafset' +        flatten env cafset = foldSet (lookup env) Set.empty cafset +        lookup env caf cafset' = +          case Map.lookup caf env of Just cafs -> foldSet add cafset' cafs +                                     Nothing -> add caf cafset' +        add caf cafset' = Set.insert caf cafset'          g = stronglyConnCompFromEdgedVertices -              (map (\n@(l, cafs) -> (n, l, Map.keys cafs)) localCAFs) +              (map (\n@(l, cafs) -> (n, l, Set.elems cafs)) localCAFs)  -- Bundle the CAFs used at a procpoint.  bundleCAFs :: CAFEnv -> CmmDecl -> (CAFSet, CmmDecl)  bundleCAFs cafEnv t@(CmmProc _ _ (CmmGraph {g_entry=entry})) =    (expectJust "bundleCAFs" (mapLookup entry cafEnv), t) -bundleCAFs _ t = (Map.empty, t) +bundleCAFs _ t = (Set.empty, t)  -- Construct the SRTs for the given procedure.  setInfoTableSRT :: Map CLabel CAFSet -> TopSRT -> (CAFSet, CmmDecl) -> @@ -418,91 +311,3 @@ updInfoTbl toVars toSrt info_tbl@(CmmInfoTable {})                             StackRep ls -> StackRep (toVars ls)                             other       -> other }  updInfoTbl _ _ t@CmmNonInfoTable = t -   ----------------------------------------------------------------- --- Safe foreign calls: We need to insert the code that suspends and resumes --- the thread before and after a safe foreign call. --- Why do we do this so late in the pipeline? --- Because we need this code to appear without interrruption: you can't rely on the --- value of the stack pointer between the call and resetting the thread state; --- you need to have an infotable on the young end of the stack both when --- suspending the thread and making the foreign call. --- All of this is much easier if we insert the suspend and resume calls here. - --- At the same time, we prepare for the stages of the compiler that --- build the proc points. We have to do this at the same time because --- the safe foreign calls need special treatment with respect to infotables. --- A safe foreign call needs an infotable even though it isn't --- a procpoint. The following datatype captures the information --- needed to generate the infotables along with the Cmm data and procedures. - --- JD: Why not do this while splitting procedures? -lowerSafeForeignCalls :: AreaMap -> CmmDecl -> FuelUniqSM CmmDecl -lowerSafeForeignCalls _ t@(CmmData _ _) = return t -lowerSafeForeignCalls areaMap (CmmProc info l g@(CmmGraph {g_entry=entry})) = do -  let block b mblocks = mblocks >>= lowerSafeCallBlock entry areaMap b -  blocks <- foldGraphBlocks block (return mapEmpty) g -  return $ CmmProc info l (ofBlockMap entry blocks) - --- If the block ends with a safe call in the block, lower it to an unsafe --- call (with appropriate saves and restores before and after). -lowerSafeCallBlock :: BlockId -> AreaMap -> CmmBlock -> BlockEnv CmmBlock -                              -> FuelUniqSM (BlockEnv CmmBlock) -lowerSafeCallBlock entry areaMap b blocks = -  case blockToNodeList b of -    (JustC (CmmEntry id), m, JustC l@(CmmForeignCall {})) -> lowerSafeForeignCall entry areaMap blocks id m l -    _                                                    -> return $ insertBlock b blocks - --- Late in the code generator, we want to insert the code necessary --- to lower a safe foreign call to a sequence of unsafe calls. -lowerSafeForeignCall :: BlockId -> AreaMap -> BlockEnv CmmBlock -> BlockId -> [CmmNode O O] -> CmmNode O C -                                -> FuelUniqSM (BlockEnv CmmBlock) -lowerSafeForeignCall entry areaMap blocks bid m -    (CmmForeignCall {tgt=tgt, res=rs, args=as, succ=succ, updfr = updfr_off, intrbl = intrbl}) = - do let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) -    -- Both 'id' and 'new_base' are KindNonPtr because they're -    -- RTS-only objects and are not subject to garbage collection -    id <- newTemp bWord -    new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) -    let (caller_save, caller_load) = callerSaveVolatileRegs -    load_tso <- newTemp gcWord -- TODO FIXME NOW -    load_stack <- newTemp gcWord -- TODO FIXME NOW -    let (<**>) = (M.<*>) -    let suspendThread = foreignLbl "suspendThread" -        resumeThread  = foreignLbl "resumeThread" -        foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit name))) -        suspend = saveThreadState <**> -                  caller_save <**> -                  mkUnsafeCall (ForeignTarget suspendThread -                                (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) -                               [id] [CmmReg (CmmGlobal BaseReg), CmmLit (CmmInt (fromIntegral (fromEnum intrbl)) wordWidth)] -        midCall = mkUnsafeCall tgt rs as -        resume  = mkUnsafeCall (ForeignTarget resumeThread -                                (ForeignConvention CCallConv [AddrHint] [AddrHint])) -                     [new_base] [CmmReg (CmmLocal id)] <**> -                  -- Assign the result to BaseReg: we -                  -- might now have a different Capability! -                  mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <**> -                  caller_load <**> -                  loadThreadState load_tso load_stack -        -- We have to save the return value on the stack because its next use -        -- may appear in a different procedure due to procpoint splitting... -        saveRetVals = foldl (<**>) emptyAGraph $ map (M.mkMiddle . spill) rs -        spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) -        regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset) -          where offset = w + expectJust "lowerForeign" (Map.lookup (RegSlot r) areaMap) -                sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup (CallArea area) areaMap) -                area = if succ == entry then Old else Young succ -                w = widthInBytes $ typeWidth $ localRegType r -        -- Note: The successor must be a procpoint, and we have already split, -        --       so we use a jump, not a branch. -        succLbl = CmmLit (CmmLabel (infoTblLbl succ)) -        jump = CmmCall { cml_target  = succLbl, cml_cont = Nothing -                       , cml_args    = widthInBytes wordWidth ,cml_ret_args = 0 -                       , cml_ret_off = updfr_off} -    graph' <- liftUniq $ labelAGraph bid $ catAGraphs (map M.mkMiddle m) <**> -                                           suspend <**> midCall <**> -                                           resume  <**> saveRetVals <**> M.mkLast jump -    return $ blocks `mapUnion` toBlockMap graph' -lowerSafeForeignCall _ _ _ _ _ _ = panic "lowerSafeForeignCall was passed something else" - diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs index abbfd01156..4df7304acf 100644 --- a/compiler/cmm/CmmCommonBlockElim.hs +++ b/compiler/cmm/CmmCommonBlockElim.hs @@ -13,22 +13,23 @@ where  import BlockId  import Cmm  import CmmUtils +import CmmContFlowOpt  import Prelude hiding (iterate, succ, unzip, zip) -import Compiler.Hoopl +import Hoopl hiding (ChangeFlag)  import Data.Bits  import qualified Data.List as List  import Data.Word  import FastString -import Control.Monad  import Outputable  import UniqFM -import Unique  my_trace :: String -> SDoc -> a -> a  my_trace = if False then pprTrace else \_ _ a -> a --- Eliminate common blocks: +-- ----------------------------------------------------------------------------- +-- Eliminate common blocks +  -- If two blocks are identical except for the label on the first node,  -- then we can eliminate one of the blocks. To ensure that the semantics  -- of the program are preserved, we have to rewrite each predecessor of the @@ -42,59 +43,50 @@ my_trace = if False then pprTrace else \_ _ a -> a  -- TODO: Use optimization fuel  elimCommonBlocks :: CmmGraph -> CmmGraph -elimCommonBlocks g = -    upd_graph g . snd $ iterate common_block reset hashed_blocks -                                (emptyUFM, mapEmpty) -      where hashed_blocks    = map (\b -> (hash_block b, b)) (reverse (postorderDfs g)) -            reset (_, subst) = (emptyUFM, subst) +elimCommonBlocks g = replaceLabels env g +  where +     env = iterate hashed_blocks mapEmpty +     hashed_blocks = map (\b -> (hash_block b, b)) $ postorderDfs g  -- Iterate over the blocks until convergence -iterate :: (t -> a -> (Bool, t)) -> (t -> t) -> [a] -> t -> t -iterate upd reset blocks state = -  case foldl upd' (False, state) blocks of -    (True,  state') -> iterate upd reset blocks (reset state') -    (False, state') -> state' -  where upd' (b, s) a = let (b', s') = upd s a in (b || b', s') -- lift to track changes +iterate :: [(HashCode,CmmBlock)] -> BlockEnv BlockId -> BlockEnv BlockId +iterate blocks subst = +  case foldl common_block (False, emptyUFM, subst) blocks of +    (changed,  _, subst) +       | changed   -> iterate blocks subst +       | otherwise -> subst + +type State  = (ChangeFlag, UniqFM [CmmBlock], BlockEnv BlockId) + +type ChangeFlag = Bool +type HashCode = Int  -- Try to find a block that is equal (or ``common'') to b. -type BidMap = BlockEnv BlockId -type State  = (UniqFM [CmmBlock], BidMap) -common_block :: (Outputable h, Uniquable h) =>  State -> (h, CmmBlock) -> (Bool, State) -common_block (bmap, subst) (hash, b) = +common_block :: State -> (HashCode, CmmBlock) -> State +common_block (old_change, bmap, subst) (hash, b) =    case lookupUFM bmap hash of      Just bs -> case (List.find (eqBlockBodyWith (eqBid subst) b) bs,                       mapLookup bid subst) of                   (Just b', Nothing)                         -> addSubst b'                   (Just b', Just b'') | entryLabel b' /= b'' -> addSubst b' -                 _ -> (False, (addToUFM bmap hash (b : bs), subst)) -    Nothing -> (False, (addToUFM bmap hash [b], subst)) +                                     | otherwise -> (old_change, bmap, subst) +                 _ -> (old_change, addToUFM bmap hash (b : bs), subst) +    Nothing -> (old_change, addToUFM bmap hash [b], subst)    where bid = entryLabel b -        addSubst b' = my_trace "found new common block" (ppr (entryLabel b')) $ -                      (True, (bmap, mapInsert bid (entryLabel b') subst)) - --- Given the map ``subst'' from BlockId -> BlockId, we rewrite the graph. -upd_graph :: CmmGraph -> BidMap -> CmmGraph -upd_graph g subst = mapGraphNodes (id, middle, last) g -  where middle = mapExpDeep exp -        last l = last' (mapExpDeep exp l) -        last' :: CmmNode O C -> CmmNode O C -        last' (CmmBranch bid)              = CmmBranch $ sub bid -        last' (CmmCondBranch p t f)        = cond p (sub t) (sub f) -        last' (CmmCall t (Just bid) a r o) = CmmCall t (Just $ sub bid) a r o -        last' l@(CmmCall _ Nothing _ _ _)  = l -        last' (CmmForeignCall t r a bid u i) = CmmForeignCall t r a (sub bid) u i -        last' (CmmSwitch e bs)             = CmmSwitch e $ map (liftM sub) bs -        cond p t f = if t == f then CmmBranch t else CmmCondBranch p t f -        exp (CmmStackSlot (CallArea (Young id))       off) = -             CmmStackSlot (CallArea (Young (sub id))) off -        exp (CmmLit (CmmBlock id)) = CmmLit (CmmBlock (sub id)) -        exp e = e -        sub = lookupBid subst +        addSubst b' = my_trace "found new common block" (ppr bid <> char '=' <> ppr (entryLabel b')) $ +                      (True, bmap, mapInsert bid (entryLabel b') subst) + + +-- ----------------------------------------------------------------------------- +-- Hashing and equality on blocks + +-- Below here is mostly boilerplate: hashing blocks ignoring labels, +-- and comparing blocks modulo a label mapping.  -- To speed up comparisons, we hash each basic block modulo labels.  -- The hashing is a bit arbitrary (the numbers are completely arbitrary),  -- but it should be fast and good enough. -hash_block :: CmmBlock -> Int +hash_block :: CmmBlock -> HashCode  hash_block block =    fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32))    -- UniqFM doesn't like negative Ints @@ -103,11 +95,11 @@ hash_block block =          hash_lst m h = hash_node m + h `shiftL` 1          hash_node :: CmmNode O x -> Word32 -        hash_node (CmmComment (FastString u _ _ _ _)) = cvt u +        hash_node (CmmComment (FastString u _ _ _ _)) = 0 -- don't care          hash_node (CmmAssign r e) = hash_reg r + hash_e e          hash_node (CmmStore e e') = hash_e e + hash_e e'          hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as -        hash_node (CmmBranch _) = 23 -- would be great to hash these properly +        hash_node (CmmBranch _) = 23 -- NB. ignore the label          hash_node (CmmCondBranch p _ _) = hash_e p          hash_node (CmmCall e _ _ _ _) = hash_e e          hash_node (CmmForeignCall t _ _ _ _ _) = hash_tgt t @@ -143,18 +135,60 @@ hash_block block =  -- Utilities: equality and substitution on the graph.  -- Given a map ``subst'' from BlockID -> BlockID, we define equality. -eqBid :: BidMap -> BlockId -> BlockId -> Bool +eqBid :: BlockEnv BlockId -> BlockId -> BlockId -> Bool  eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid' -lookupBid :: BidMap -> BlockId -> BlockId +lookupBid :: BlockEnv BlockId -> BlockId -> BlockId  lookupBid subst bid = case mapLookup bid subst of                          Just bid  -> lookupBid subst bid                          Nothing -> bid --- Equality on the body of a block, modulo a function mapping block IDs to block IDs. +-- Middle nodes and expressions can contain BlockIds, in particular in +-- CmmStackSlot and CmmBlock, so we have to use a special equality for +-- these. +-- +eqMiddleWith :: (BlockId -> BlockId -> Bool) +             -> CmmNode O O -> CmmNode O O -> Bool +eqMiddleWith eqBid (CmmComment _) (CmmComment _) = True +eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2) +  = r1 == r2 && eqExprWith eqBid e1 e2 +eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2) +  = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2 +eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1) +                   (CmmUnsafeForeignCall t2 r2 a2) +  = t1 == t2 && r1 == r2 && and (zipWith (eqExprWith eqBid) a1 a2) +eqMiddleWith _ _ _ = False + +eqExprWith :: (BlockId -> BlockId -> Bool) +           -> CmmExpr -> CmmExpr -> Bool +eqExprWith eqBid = eq + where +  CmmLit l1          `eq` CmmLit l2          = eqLit l1 l2 +  CmmLoad e1 _       `eq` CmmLoad e2 _       = e1 `eq` e2 +  CmmReg r1          `eq` CmmReg r2          = r1==r2 +  CmmRegOff r1 i1    `eq` CmmRegOff r2 i2    = r1==r2 && i1==i2 +  CmmMachOp op1 es1  `eq` CmmMachOp op2 es2  = op1==op2 && es1 `eqs` es2 +  CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2 +  _e1                `eq` _e2                = False + +  xs `eqs` ys = and (zipWith eq xs ys) + +  eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2 +  eqLit l1 l2 = l1 == l2 + +  eqArea Old Old = True +  eqArea (Young id1) (Young id2) = eqBid id1 id2 +  eqArea _ _ = False + +-- Equality on the body of a block, modulo a function mapping block +-- IDs to block IDs.  eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool -eqBlockBodyWith eqBid block block' = middles == middles' && eqLastWith eqBid last last' -  where (_, middles , JustC last  :: MaybeC C (CmmNode O C)) = blockToNodeList block -        (_, middles', JustC last' :: MaybeC C (CmmNode O C)) = blockToNodeList block' +eqBlockBodyWith eqBid block block' +  = and (zipWith (eqMiddleWith eqBid) (blockToList m) (blockToList m')) && +    eqLastWith eqBid l l' +  where (_,m,l)   = blockSplit block +        (_,m',l') = blockSplit block' + +  eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool  eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2 diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 73ce57e93f..36e7b8ec62 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -2,19 +2,19 @@  {-# OPTIONS_GHC -fno-warn-warnings-deprecations -fno-warn-incomplete-patterns #-}  module CmmContFlowOpt -    ( runCmmContFlowOpts -    , removeUnreachableBlocks, replaceBranches +    ( cmmCfgOpts +    , cmmCfgOptsProc +    , removeUnreachableBlocks +    , replaceLabels      )  where  import BlockId  import Cmm  import CmmUtils -import Digraph  import Maybes -import Outputable -import Compiler.Hoopl +import Hoopl  import Control.Monad  import Prelude hiding (succ, unzip, zip) @@ -24,104 +24,158 @@ import Prelude hiding (succ, unzip, zip)  --  ----------------------------------------------------------------------------- -runCmmContFlowOpts :: CmmGroup -> CmmGroup -runCmmContFlowOpts = map (optProc cmmCfgOpts) -  cmmCfgOpts :: CmmGraph -> CmmGraph -cmmCfgOpts = removeUnreachableBlocks . blockConcat . branchChainElim -        -- Here branchChainElim can ultimately be replaced -        -- with a more exciting combination of optimisations +cmmCfgOpts = removeUnreachableBlocks . blockConcat + +cmmCfgOptsProc :: CmmDecl -> CmmDecl +cmmCfgOptsProc = optProc cmmCfgOpts  optProc :: (g -> g) -> GenCmmDecl d h g -> GenCmmDecl d h g  optProc opt (CmmProc info lbl g) = CmmProc info lbl (opt g)  optProc _   top                  = top +  -----------------------------------------------------------------------------  -- --- Branch Chain Elimination +-- Block concatenation  --  ----------------------------------------------------------------------------- --- | Remove any basic block of the form L: goto L', and replace L with --- L' everywhere else, unless L is the successor of a call instruction --- and L' is the entry block. You don't want to set the successor of a --- function call to the entry block because there is no good way to --- store both the infotables for the call and from the callee, while --- putting the stack pointer in a consistent place. +-- This optimisation does two things: +--   - If a block finishes with an unconditional branch, then we may +--     be able to concatenate the block it points to and remove the +--     branch.  We do this either if the destination block is small +--     (e.g. just another branch), or if this is the only jump to +--     this particular destination block. +-- +--   - If a block finishes in a call whose continuation block is a +--     goto, then we can shortcut the destination, making the +--     continuation block the destination of the goto. +-- +-- Both transformations are improved by working from the end of the +-- graph towards the beginning, because we may be able to perform many +-- shortcuts in one go. + + +-- We need to walk over the blocks from the end back to the +-- beginning.  We are going to maintain the "current" graph +-- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId +-- to BlockId, representing continuation labels that we have +-- renamed.  This latter mapping is important because we might +-- shortcut a CmmCall continuation.  For example: +-- +--    Sp[0] = L +--    call g returns to L +-- +--    L: goto M  -- --- JD isn't quite sure when it's safe to share continuations for different --- function calls -- have to think about where the SP will be, --- so we'll table that problem for now by leaving all call successors alone. - -branchChainElim :: CmmGraph -> CmmGraph -branchChainElim g -  | null lone_branch_blocks = g    -- No blocks to remove -  | otherwise               = {- pprTrace "branchChainElim" (ppr forest) $ -} -                              replaceLabels (mapFromList edges) g +--    M: ... +-- +-- So when we shortcut the L block, we need to replace not only +-- the continuation of the call, but also references to L in the +-- code (e.g. the assignment Sp[0] = L).  So we keep track of +-- which labels we have renamed and apply the mapping at the end +-- with replaceLabels. + +blockConcat  :: CmmGraph -> CmmGraph +blockConcat g@CmmGraph { g_entry = entry_id } +  = replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks    where -    blocks = toBlockList g - -    lone_branch_blocks :: [(BlockId, BlockId)] -      -- each (L,K) is a block of the form -      --   L : goto K -    lone_branch_blocks = mapCatMaybes isLoneBranch blocks - -    call_succs = foldl add emptyBlockSet blocks -      where add :: BlockSet -> CmmBlock -> BlockSet -            add succs b = -              case lastNode b of -                (CmmCall _ (Just k) _ _ _) -> setInsert k succs -                (CmmForeignCall {succ=k})  -> setInsert k succs -                _                          -> succs - -    isLoneBranch :: CmmBlock -> Maybe (BlockId, BlockId) -    isLoneBranch block -      | (JustC (CmmEntry id), [], JustC (CmmBranch target)) <- blockToNodeList block -      , not (setMember id call_succs) -      = Just (id,target) -      | otherwise -      = Nothing - -    -- We build a graph from lone_branch_blocks (every node has only -    -- one out edge).  Then we -    --   - topologically sort the graph: if from A we can reach B, -    --     then A occurs before B in the result list. -    --   - depth-first search starting from the nodes in this list. -    --     This gives us a [[node]], in which each list is a dependency -    --     chain. -    --   - for each list [a1,a2,...an] replace branches to ai with an. -    -- -    -- This approach nicely deals with cycles by ignoring them. -    -- Branches in a cycle will be redirected to somewhere in the -    -- cycle, but we don't really care where.  A cycle should be dead code, -    -- and so will be eliminated by removeUnreachableBlocks. -    -- -    fromNode (b,_) = b -    toNode   a     = (a,a) - -    all_block_ids :: LabelSet -    all_block_ids = setFromList (map fst lone_branch_blocks) -                      `setUnion` -                    setFromList (map snd lone_branch_blocks) - -    forest = dfsTopSortG $ graphFromVerticesAndAdjacency nodes lone_branch_blocks -        where nodes = map toNode $ setElems $ all_block_ids - -    edges  = [ (fromNode y, fromNode x) -             | (x:xs) <- map reverse forest, y <- xs ] +     -- we might be able to shortcut the entry BlockId itself +     new_entry +       | Just entry_blk <- mapLookup entry_id new_blocks +       , Just dest      <- canShortcut entry_blk +       = dest +       | otherwise +       = entry_id ----------------------------------------------------------------- +     blocks = postorderDfs g + +     (new_blocks, shortcut_map) = +           foldr maybe_concat (toBlockMap g, mapEmpty) blocks + +     maybe_concat :: CmmBlock +                  -> (BlockEnv CmmBlock, BlockEnv BlockId) +                  -> (BlockEnv CmmBlock, BlockEnv BlockId) +     maybe_concat block unchanged@(blocks, shortcut_map) +        | CmmBranch b' <- last +        , Just blk' <- mapLookup b' blocks +        , shouldConcatWith b' blk' +        = (mapInsert bid (splice head blk') blocks, shortcut_map) + +        -- calls: if we can shortcut the continuation label, then +        -- we must *also* remember to substitute for the label in the +        -- code, because we will push it somewhere. +        | Just b'   <- callContinuation_maybe last +        , Just blk' <- mapLookup b' blocks +        , Just dest <- canShortcut blk' +        = (blocks, mapInsert b' dest shortcut_map) +           -- replaceLabels will substitute dest for b' everywhere, later + +        -- non-calls: see if we can shortcut any of the successors. +        | Nothing <- callContinuation_maybe last +        = ( mapInsert bid (blockJoinTail head shortcut_last) blocks +          , shortcut_map ) + +        | otherwise +        = (blocks, shortcut_map) +        where +          (head, last) = blockSplitTail block +          bid = entryLabel block +          shortcut_last = mapSuccessors shortcut last +          shortcut l = +             case mapLookup l blocks of +               Just b | Just dest <- canShortcut b  -> dest +               _otherwise -> l + +     shouldConcatWith b block +       | num_preds b == 1    = True  -- only one predecessor: go for it +       | okToDuplicate block = True  -- short enough to duplicate +       | otherwise           = False +       where num_preds bid = mapLookup bid backEdges `orElse` 0 + +     canShortcut :: CmmBlock -> Maybe BlockId +     canShortcut block +       | (_, middle, CmmBranch dest) <- blockSplit block +       , isEmptyBlock middle +       = Just dest +       | otherwise +       = Nothing + +     backEdges :: BlockEnv Int -- number of predecessors for each block +     backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id +                   mapMap setSize $ predMap blocks + +     splice :: Block CmmNode C O -> CmmBlock -> CmmBlock +     splice head rest = head `blockAppend` snd (blockSplitHead rest) + + +callContinuation_maybe :: CmmNode O C -> Maybe BlockId +callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b +callContinuation_maybe (CmmForeignCall { succ = b })   = Just b +callContinuation_maybe _ = Nothing + +okToDuplicate :: CmmBlock -> Bool +okToDuplicate block +  = case blockSplit block of (_, m, _) -> isEmptyBlock m +  -- cheap and cheerful; we might expand this in the future to +  -- e.g. spot blocks that represent a single instruction or two + +------------------------------------------------------------------------ +-- Map over the CmmGraph, replacing each label with its mapping in the +-- supplied BlockEnv.  replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph -replaceLabels env = -  replace_eid . mapGraphNodes1 txnode +replaceLabels env g +  | mapNull env = g +  | otherwise   = replace_eid $ mapGraphNodes1 txnode g     where       replace_eid g = g {g_entry = lookup (g_entry g)}       lookup id = mapLookup id env `orElse` id       txnode :: CmmNode e x -> CmmNode e x       txnode (CmmBranch bid)         = CmmBranch (lookup bid) -     txnode (CmmCondBranch p t f)   = CmmCondBranch (exp p) (lookup t) (lookup f) +     txnode (CmmCondBranch p t f)   = mkCmmCondBranch (exp p) (lookup t) (lookup f)       txnode (CmmSwitch e arms)      = CmmSwitch (exp e) (map (liftM lookup) arms)       txnode (CmmCall t k a res r)   = CmmCall (exp t) (liftM lookup k) a res r       txnode fc@CmmForeignCall{}     = fc{ args = map exp (args fc) @@ -130,90 +184,25 @@ replaceLabels env =       exp :: CmmExpr -> CmmExpr       exp (CmmLit (CmmBlock bid))                = CmmLit (CmmBlock (lookup bid)) -     exp (CmmStackSlot (CallArea (Young id)) i) = CmmStackSlot (CallArea (Young (lookup id))) i +     exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i       exp e                                      = e - -replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph -replaceBranches env g = mapGraphNodes (id, id, last) g -  where -    last :: CmmNode O C -> CmmNode O C -    last (CmmBranch id)          = CmmBranch (lookup id) -    last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi) -    last (CmmSwitch e tbl)       = CmmSwitch e (map (fmap lookup) tbl) -    last l@(CmmCall {})          = l -    last l@(CmmForeignCall {})   = l -    lookup id = fmap lookup (mapLookup id env) `orElse` id -            -- XXX: this is a recursive lookup, it follows chains until the lookup -            -- returns Nothing, at which point we return the last BlockId +mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C +mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f  ----------------------------------------------------------------  -- Build a map from a block to its set of predecessors. Very useful. +  predMap :: [CmmBlock] -> BlockEnv BlockSet  predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges    where add_preds block env = foldl (add (entryLabel block)) env (successors block)          add bid env b' =            mapInsert b' (setInsert bid (mapLookup b' env `orElse` setEmpty)) env ------------------------------------------------------------------------------ --- --- Block concatenation --- ------------------------------------------------------------------------------ - --- If a block B branches to a label L, L is not the entry block, --- and L has no other predecessors, --- then we can splice the block starting with L onto the end of B. --- Order matters, so we work bottom up (reverse postorder DFS). --- This optimization can be inhibited by unreachable blocks, but --- the reverse postorder DFS returns only reachable blocks. --- --- To ensure correctness, we have to make sure that the BlockId of the block --- we are about to eliminate is not named in another instruction. --- --- Note: This optimization does _not_ subsume branch chain elimination. - -blockConcat  :: CmmGraph -> CmmGraph -blockConcat g@(CmmGraph {g_entry=eid}) = -  replaceLabels concatMap $ ofBlockMap (g_entry g) blocks' -  where -     blocks = postorderDfs g - -     (blocks', concatMap) = -           foldr maybe_concat (toBlockMap g, mapEmpty) $ blocks - -     maybe_concat :: CmmBlock -> (LabelMap CmmBlock, LabelMap Label) -> (LabelMap CmmBlock, LabelMap Label) -     maybe_concat b unchanged@(blocks', concatMap) = -       let bid = entryLabel b -       in case blockToNodeList b of -            (JustC h, m, JustC (CmmBranch b')) -> -               if canConcatWith b' then -                 (mapInsert bid (splice blocks' h m b') blocks', -                  mapInsert b' bid concatMap) -               else unchanged -            _ -> unchanged - -     num_preds bid = liftM setSize (mapLookup bid backEdges) `orElse` 0 - -     canConcatWith b' = b' /= eid && num_preds b' == 1 - -     backEdges = predMap blocks - -     splice :: forall map n e x. -               IsMap map => -               map (Block n e x) -> n C O -> [n O O] -> KeyOf map -> Block n C x -     splice blocks' h m bid' = -          case mapLookup bid' blocks' of -            Nothing -> panic "unknown successor block" -            Just block | (_, m', l') <- blockToNodeList block -                -> blockOfNodeList (JustC h, (m ++ m'), l') -  -----------------------------------------------------------------------------  --  -- Removing unreachable blocks --- ------------------------------------------------------------------------------  removeUnreachableBlocks :: CmmGraph -> CmmGraph  removeUnreachableBlocks g diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index 80c6079aac..e72eee041c 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -12,29 +12,25 @@ import CmmUtils  import qualified OldCmm as Old  import OldPprCmm () -import Compiler.Hoopl hiding ((<*>), mkLabel, mkBranch) +import Hoopl hiding ((<*>), mkLabel, mkBranch)  import Data.Maybe  import Maybes  import Outputable  cmmOfZgraph :: CmmGroup -> Old.CmmGroup  cmmOfZgraph tops = map mapTop tops -  where mapTop (CmmProc h l g) = CmmProc (Old.CmmInfo Nothing Nothing (info_tbl h)) l (ofZgraph g) +  where mapTop (CmmProc h l g) = CmmProc (info_tbl h) l (ofZgraph g)          mapTop (CmmData s ds) = CmmData s ds  data ValueDirection = Arguments | Results -add_hints :: Convention -> ValueDirection -> [a] -> [Old.CmmHinted a] +add_hints :: ForeignTarget -> ValueDirection -> [a] -> [Old.CmmHinted a]  add_hints conv vd args = zipWith Old.CmmHinted args (get_hints conv vd) -get_hints :: Convention -> ValueDirection -> [ForeignHint] -get_hints (Foreign (ForeignConvention _ hints _)) Arguments = hints -get_hints (Foreign (ForeignConvention _ _ hints)) Results   = hints -get_hints _other_conv                             _vd       = repeat NoHint - -get_conv :: ForeignTarget -> Convention -get_conv (PrimTarget _)       = NativeNodeCall -- JD: SUSPICIOUS -get_conv (ForeignTarget _ fc) = Foreign fc +get_hints :: ForeignTarget -> ValueDirection -> [ForeignHint] +get_hints (ForeignTarget _ (ForeignConvention _ hints _)) Arguments = hints +get_hints (ForeignTarget _ (ForeignConvention _ _ hints)) Results   = hints +get_hints (PrimTarget _) _vd = repeat NoHint  cmm_target :: ForeignTarget -> Old.CmmCallTarget  cmm_target (PrimTarget op) = Old.CmmPrim op Nothing @@ -89,8 +85,8 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g                              CmmUnsafeForeignCall (PrimTarget MO_Touch) _ _ -> Old.CmmNop                              CmmUnsafeForeignCall target ress args          ->                                 Old.CmmCall (cmm_target target) -                                          (add_hints (get_conv target) Results   ress) -                                          (add_hints (get_conv target) Arguments args) +                                          (add_hints target Results   ress) +                                          (add_hints target Arguments args)                                            Old.CmmMayReturn                    last :: CmmNode O C -> () -> [Old.CmmStmt] diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 6eb91e89ba..939d4b7ca9 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -14,11 +14,11 @@ module CmmExpr      , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node, baseReg      , VGcPtr(..), vgcFlag 	-- Temporary!      , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed -    , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed      , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet -            , plusRegSet, minusRegSet, timesRegSet -    , regUsedIn, regSlot -    , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf +            , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet +            , regSetToList +    , regUsedIn +    , Area(..)      , module CmmMachOp      , module CmmType      ) @@ -31,9 +31,10 @@ import CmmMachOp  import BlockId  import CLabel  import Unique -import UniqSet  import Data.Map (Map) +import Data.Set (Set) +import qualified Data.Set as Set  -----------------------------------------------------------------------------  --		CmmExpr @@ -42,11 +43,12 @@ import Data.Map (Map)  data CmmExpr    = CmmLit CmmLit               -- Literal -  | CmmLoad CmmExpr CmmType     -- Read memory location -  | CmmReg CmmReg		-- Contents of register +  | CmmLoad !CmmExpr !CmmType   -- Read memory location +  | CmmReg !CmmReg              -- Contents of register    | CmmMachOp MachOp [CmmExpr]  -- Machine operation (+, -, *, etc.) -  | CmmStackSlot Area Int       -- addressing expression of a stack slot -  | CmmRegOff CmmReg Int	 +  | CmmStackSlot Area {-# UNPACK #-} !Int +                                -- addressing expression of a stack slot +  | CmmRegOff !CmmReg Int  	-- CmmRegOff reg i  	--        ** is shorthand only, meaning **  	-- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] @@ -62,20 +64,16 @@ instance Eq CmmExpr where	-- Equality ignores the types    _e1               == _e2               = False  data CmmReg  -  = CmmLocal  LocalReg +  = CmmLocal  {-# UNPACK #-} !LocalReg    | CmmGlobal GlobalReg    deriving( Eq, Ord )  -- | A stack area is either the stack slot where a variable is spilled  -- or the stack space where function arguments and results are passed.  data Area -  = RegSlot  LocalReg -  | CallArea AreaId -  deriving (Eq, Ord) - -data AreaId    = Old            -- See Note [Old Area] -  | Young BlockId +  | Young {-# UNPACK #-} !BlockId  -- Invariant: must be a continuation BlockId +                   -- See Note [Continuation BlockId] in CmmNode.    deriving (Eq, Ord)  {- Note [Old Area]  @@ -94,15 +92,8 @@ necessarily at the young end of the Old area.  End of note -} -type SubArea    = (Area, Int, Int) -- area, offset, width -type SubAreaSet = Map Area [SubArea] - -type AreaMap    = Map Area Int -     -- Byte offset of the oldest byte of the Area,  -     -- relative to the oldest byte of the Old Area -  data CmmLit -  = CmmInt Integer  Width +  = CmmInt !Integer  Width  	-- Interpretation: the 2's complement representation of the value  	-- is truncated to the specified size.  This is easier than trying  	-- to keep the value within range, because we don't know whether @@ -120,7 +111,11 @@ data CmmLit          -- It is also used inside the NCG during when generating          -- position-independent code.     | CmmLabelDiffOff CLabel CLabel Int   -- label1 - label2 + offset -  | CmmBlock BlockId			-- Code label + +  | CmmBlock {-# UNPACK #-} !BlockId     -- Code label +        -- Invariant: must be a continuation BlockId +        -- See Note [Continuation BlockId] in CmmNode. +    | CmmHighStackMark -- stands for the max stack space used during a procedure    deriving Eq @@ -163,7 +158,7 @@ maybeInvertCmmExpr _ = Nothing  -----------------------------------------------------------------------------  data LocalReg -  = LocalReg !Unique CmmType +  = LocalReg {-# UNPACK #-} !Unique CmmType      -- ^ Parameters:      --   1. Identifier      --   2. Type @@ -189,22 +184,35 @@ localRegType (LocalReg _ rep) = rep  -----------------------------------------------------------------------------  -- | Sets of local registers -type RegSet              =  UniqSet LocalReg + +-- These are used for dataflow facts, and a common operation is taking +-- the union of two RegSets and then asking whether the union is the +-- same as one of the inputs.  UniqSet isn't good here, because +-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary +-- Sets. + +type RegSet              =  Set LocalReg  emptyRegSet             :: RegSet +nullRegSet              :: RegSet -> Bool  elemRegSet              :: LocalReg -> RegSet -> Bool  extendRegSet            :: RegSet -> LocalReg -> RegSet  deleteFromRegSet        :: RegSet -> LocalReg -> RegSet  mkRegSet                :: [LocalReg] -> RegSet  minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet - -emptyRegSet      = emptyUniqSet -elemRegSet       = elementOfUniqSet -extendRegSet     = addOneToUniqSet -deleteFromRegSet = delOneFromUniqSet -mkRegSet         = mkUniqSet -minusRegSet      = minusUniqSet -plusRegSet       = unionUniqSets -timesRegSet      = intersectUniqSets +sizeRegSet              :: RegSet -> Int +regSetToList            :: RegSet -> [LocalReg] + +emptyRegSet      = Set.empty +nullRegSet       = Set.null +elemRegSet       = Set.member +extendRegSet     = flip Set.insert +deleteFromRegSet = flip Set.delete +mkRegSet         = Set.fromList +minusRegSet      = Set.difference +plusRegSet       = Set.union +timesRegSet      = Set.intersection +sizeRegSet       = Set.size +regSetToList     = Set.toList  class UserOfLocalRegs a where    foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b @@ -236,7 +244,7 @@ instance DefinerOfLocalRegs LocalReg where      foldRegsDefd f z r = f z r  instance UserOfLocalRegs RegSet where -    foldRegsUsed f = foldUniqSet (flip f) +    foldRegsUsed f = Set.fold (flip f)  instance UserOfLocalRegs CmmExpr where    foldRegsUsed f z e = expr z e @@ -271,49 +279,6 @@ reg `regUsedIn` CmmMachOp _ es   = any (reg `regUsedIn`) es  _   `regUsedIn` CmmStackSlot _ _ = False  ----------------------------------------------------------------------------- ---    Stack slots ------------------------------------------------------------------------------ - -isStackSlotOf :: CmmExpr -> LocalReg -> Bool -isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r' -isStackSlotOf _ _ = False - -regSlot :: LocalReg -> CmmExpr -regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r) - ------------------------------------------------------------------------------ ---    Stack slot use information for expressions and other types [_$_] ------------------------------------------------------------------------------ - --- Fold over the area, the offset into the area, and the width of the subarea. -class UserOfSlots a where -  foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b - -class DefinerOfSlots a where -  foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b - -instance UserOfSlots CmmExpr where -  foldSlotsUsed f z e = expr z e -    where expr z (CmmLit _)          = z -          expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty) -          expr z (CmmLoad addr _)    = foldSlotsUsed f z addr -          expr z (CmmReg _)          = z -          expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs -          expr z (CmmRegOff _ _)     = z -          expr z (CmmStackSlot _ _)  = z - -instance UserOfSlots a => UserOfSlots [a] where -  foldSlotsUsed _ set [] = set -  foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs - -instance DefinerOfSlots a => DefinerOfSlots [a] where -  foldSlotsDefd _ set [] = set -  foldSlotsDefd f set (x:xs) = foldSlotsDefd f (foldSlotsDefd f set x) xs - -instance DefinerOfSlots SubArea where -    foldSlotsDefd f z a = f z a - ------------------------------------------------------------------------------  --		Global STG registers  ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index fe0c104d1c..a171faa057 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -19,6 +19,8 @@ import CmmUtils  import CLabel  import SMRep  import Bitmap +import Stream (Stream) +import qualified Stream  import Maybes  import Constants @@ -40,10 +42,16 @@ mkEmptyContInfoTable info_lbl                   , cit_prof = NoProfilingInfo                   , cit_srt  = NoC_SRT } -cmmToRawCmm :: Platform -> [Old.CmmGroup] -> IO [Old.RawCmmGroup] +cmmToRawCmm :: Platform -> Stream IO Old.CmmGroup () +            -> IO (Stream IO Old.RawCmmGroup ())  cmmToRawCmm platform cmms    = do { uniqs <- mkSplitUniqSupply 'i' -       ; return (initUs_ uniqs (mapM (concatMapM (mkInfoTable platform)) cmms)) } +       ; let do_one uniqs cmm = do +                case initUs uniqs $ concatMapM (mkInfoTable platform) cmm of +                  (b,uniqs') -> return (uniqs',b) +                  -- NB. strictness fixes a space leak.  DO NOT REMOVE. +       ; return (Stream.mapAccumL do_one uniqs cmms >> return ()) +       }  -- Make a concrete info table, represented as a list of CmmStatic  -- (it can't be simply a list of Word, because the SRT field is @@ -82,7 +90,7 @@ mkInfoTable :: Platform -> CmmDecl -> UniqSM [RawCmmDecl]  mkInfoTable _ (CmmData sec dat)     = return [CmmData sec dat] -mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks) +mkInfoTable platform (CmmProc info entry_label blocks)    | CmmNonInfoTable <- info   -- Code without an info table.  Easy.    = return [CmmProc Nothing entry_label blocks] @@ -91,7 +99,8 @@ mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks)         ; return (top_decls  ++                   mkInfoTableAndCode info_lbl info_cts                                      entry_label blocks) } -  | otherwise = panic "mkInfoTable"  -- Patern match overlap check not clever enough +  | otherwise = panic "mkInfoTable" +                  -- Patern match overlap check not clever enough  -----------------------------------------------------  type InfoTableContents = ( [CmmLit]	     -- The standard part diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs new file mode 100644 index 0000000000..660506e7dc --- /dev/null +++ b/compiler/cmm/CmmLayoutStack.hs @@ -0,0 +1,1045 @@ +{-# LANGUAGE RecordWildCards, GADTs #-} +module CmmLayoutStack ( +       cmmLayoutStack, setInfoTableStackMap, cmmSink +  ) where + +import StgCmmUtils      ( callerSaveVolatileRegs ) -- XXX +import StgCmmForeign    ( saveThreadState, loadThreadState ) -- XXX + +import Cmm +import BlockId +import CLabel +import CmmUtils +import MkGraph +import Module +import ForeignCall +import CmmLive +import CmmProcPoint +import SMRep +import Hoopl hiding ((<*>), mkLast, mkMiddle) +import OptimizationFuel +import Constants +import UniqSupply +import Maybes +import UniqFM +import Util + +import FastString +import Outputable +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.Set as Set +import Control.Monad.Fix +import Data.Array as Array +import Data.Bits +import Data.List (nub, partition) +import Control.Monad (liftM) + +#include "HsVersions.h" + + +data StackSlot = Occupied | Empty +     -- Occupied: a return address or part of an update frame + +instance Outputable StackSlot where +  ppr Occupied = ptext (sLit "XXX") +  ppr Empty    = ptext (sLit "---") + +-- All stack locations are expressed as positive byte offsets from the +-- "base", which is defined to be the address above the return address +-- on the stack on entry to this CmmProc. +-- +-- Lower addresses have higher StackLocs. +-- +type StackLoc = ByteOff + +{- + A StackMap describes the stack at any given point.  At a continuation + it has a particular layout, like this: + +         |             | <- base +         |-------------| +         |     ret0    | <- base + 8 +         |-------------| +         .  upd frame  . <- base + sm_ret_off +         |-------------| +         |             | +         .    vars     . +         . (live/dead) . +         |             | <- base + sm_sp - sm_args +         |-------------| +         |    ret1     | +         .  ret vals   . <- base + sm_sp    (<--- Sp points here) +         |-------------| + +Why do we include the final return address (ret0) in our stack map?  I +have absolutely no idea, but it seems to be done that way consistently +in the rest of the code generator, so I played along here. --SDM + +Note that we will be constructing an info table for the continuation +(ret1), which needs to describe the stack down to, but not including, +the update frame (or ret0, if there is no update frame). +-} + +data StackMap = StackMap + {  sm_sp   :: StackLoc +       -- ^ the offset of Sp relative to the base on entry +       -- to this block. + ,  sm_args :: ByteOff +       -- ^ the number of bytes of arguments in the area for this block +       -- Defn: the offset of young(L) relative to the base is given by +       -- (sm_sp - sm_args) of the StackMap for block L. + ,  sm_ret_off :: ByteOff +       -- ^ Number of words of stack that we do not describe with an info +       -- table, because it contains an update frame. + ,  sm_regs :: UniqFM (LocalReg,StackLoc) +       -- ^ regs on the stack + } + +instance Outputable StackMap where +  ppr StackMap{..} = +     text "Sp = " <> int sm_sp $$ +     text "sm_args = " <> int sm_args $$ +     text "sm_ret_off = " <> int sm_ret_off $$ +     text "sm_regs = " <> ppr (eltsUFM sm_regs) + + +cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph +               -> FuelUniqSM (CmmGraph, BlockEnv StackMap) +cmmLayoutStack procpoints entry_args +               graph0@(CmmGraph { g_entry = entry }) +  = do +    pprTrace "cmmLayoutStack" (ppr entry_args) $ return () +    (graph, liveness) <- removeDeadAssignments graph0 +    pprTrace "liveness" (ppr liveness) $ return () +    let blocks = postorderDfs graph + +    (final_stackmaps, final_high_sp, new_blocks) <- liftUniq $ +          mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> +            layout procpoints liveness entry entry_args +                   rec_stackmaps rec_high_sp blocks + +    new_blocks' <- liftUniq $ mapM lowerSafeForeignCall new_blocks + +    pprTrace ("Sp HWM") (ppr final_high_sp) $ +       return (ofBlockList entry new_blocks', final_stackmaps) + + + +layout :: BlockSet                      -- proc points +       -> BlockEnv CmmLive              -- liveness +       -> BlockId                       -- entry +       -> ByteOff                       -- stack args on entry + +       -> BlockEnv StackMap             -- [final] stack maps +       -> ByteOff                       -- [final] Sp high water mark + +       -> [CmmBlock]                    -- [in] blocks + +       -> UniqSM +          ( BlockEnv StackMap           -- [out] stack maps +          , ByteOff                     -- [out] Sp high water mark +          , [CmmBlock]                  -- [out] new blocks +          ) + +layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks +  = go blocks init_stackmap entry_args [] +  where +    (updfr, cont_info)  = collectContInfo blocks + +    init_stackmap = mapSingleton entry StackMap{ sm_sp   = entry_args +                                               , sm_args = entry_args +                                               , sm_ret_off = updfr +                                               , sm_regs = emptyUFM +                                               } + +    go [] acc_stackmaps acc_hwm acc_blocks +      = return (acc_stackmaps, acc_hwm, acc_blocks) + +    go (b0 : bs) acc_stackmaps acc_hwm acc_blocks +      = do +       let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0 +     +       let stack0@StackMap { sm_sp = sp0 } +               = mapFindWithDefault +                     (pprPanic "no stack map for" (ppr entry_lbl)) +                     entry_lbl acc_stackmaps +     +       pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return () +     +       -- (a) Update the stack map to include the effects of +       --     assignments in this block +       let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0 +     +       -- (b) Insert assignments to reload all the live variables if this +       --     block is a proc point +       let middle1 = if entry_lbl `setMember` procpoints +                        then foldr blockCons middle0 (insertReloads stack0) +                        else middle0 +     +       -- (c) Look at the last node and if we are making a call or +       --     jumping to a proc point, we must save the live +       --     variables, adjust Sp, and construct the StackMaps for +       --     each of the successor blocks.  See handleLastNode for +       --     details. +       (middle2, sp_off, last1, fixup_blocks, out) +           <- handleLastNode procpoints liveness cont_info +                             acc_stackmaps stack1 middle0 last0 +     +       pprTrace "layout(out)" (ppr out) $ return () + +       -- (d) Manifest Sp: run over the nodes in the block and replace +       --     CmmStackSlot with CmmLoad from Sp with a concrete offset. +       -- +       -- our block: +       --    middle1          -- the original middle nodes +       --    middle2          -- live variable saves from handleLastNode +       --    Sp = Sp + sp_off -- Sp adjustment goes here +       --    last1            -- the last node +       -- +       let middle_pre = blockToList $ foldl blockSnoc middle1 middle2 + +           sp_high = final_hwm - entry_args +              -- The stack check value is adjusted by the Sp offset on +              -- entry to the proc, which is entry_args.  We are +              -- assuming that we only do a stack check at the +              -- beginning of a proc, and we don't modify Sp before the +              -- check. + +           final_blocks = manifestSp final_stackmaps stack0 sp0 sp_high entry0 +                              middle_pre sp_off last1 fixup_blocks + +           acc_stackmaps' = mapUnion acc_stackmaps out + +           hwm' = maximum (acc_hwm : (sp0 - sp_off) : map sm_sp (mapElems out)) + +       go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks) + + +-- ----------------------------------------------------------------------------- + +-- This doesn't seem right somehow.  We need to find out whether this +-- proc will push some update frame material at some point, so that we +-- can avoid using that area of the stack for spilling.  The +-- updfr_space field of the CmmProc *should* tell us, but it doesn't +-- (I think maybe it gets filled in later when we do proc-point +-- splitting). +-- +-- So we'll just take the max of all the cml_ret_offs.  This could be +-- unnecessarily pessimistic, but probably not in the code we +-- generate. + +collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff) +collectContInfo blocks +  = (maximum ret_offs, mapFromList (catMaybes mb_argss)) + where +  (mb_argss, ret_offs) = mapAndUnzip get_cont blocks + +  get_cont b = +     case lastNode b of +        CmmCall { cml_cont = Just l, .. } +           -> (Just (l, cml_ret_args), cml_ret_off) +        CmmForeignCall { .. } +           -> (Just (succ, 0), updfr) -- ?? +        _other -> (Nothing, 0) + + +-- ----------------------------------------------------------------------------- +-- Updating the StackMap from middle nodes + +-- Look for loads from stack slots, and update the StackMap.  This is +-- purelyu for optimisation reasons, so that we can avoid saving a +-- variable back to a different stack slot if it is already on the +-- stack. +-- +-- This happens a lot: for example when function arguments are passed +-- on the stack and need to be immediately saved across a call, we +-- want to just leave them where they are on the stack. +-- +procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap +procMiddle stackmaps node sm +  = case node of +     CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _) +       -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) } +        where loc = getStackLoc area off stackmaps +     CmmAssign (CmmLocal r) _other +       -> sm { sm_regs = delFromUFM (sm_regs sm) r } +     _other +       -> sm + +getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc +getStackLoc Old       n _         = n +getStackLoc (Young l) n stackmaps = +  case mapLookup l stackmaps of +    Nothing -> pprPanic "getStackLoc" (ppr l) +    Just sm -> sm_sp sm - sm_args sm + n + + +-- ----------------------------------------------------------------------------- +-- Handling stack allocation for a last node + +-- We take a single last node and turn it into: +-- +--    C1 (some statements) +--    Sp = Sp + N +--    C2 (some more statements) +--    call f()          -- the actual last node +-- +-- plus possibly some more blocks (we may have to add some fixup code +-- between the last node and the continuation). +-- +-- C1: is the code for saving the variables across this last node onto +-- the stack, if the continuation is a call or jumps to a proc point. +-- +-- C2: if the last node is a safe foreign call, we have to inject some +-- extra code that goes *after* the Sp adjustment. + +handleLastNode +   :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff +   -> BlockEnv StackMap -> StackMap +   -> Block CmmNode O O +   -> CmmNode O C +   -> UniqSM +      ( [CmmNode O O]      -- nodes to go *before* the Sp adjustment +      , ByteOff            -- amount to adjust Sp +      , CmmNode O C        -- new last node +      , [CmmBlock]         -- new blocks +      , BlockEnv StackMap  -- stackmaps for the continuations +      ) + +handleLastNode procpoints liveness cont_info stackmaps +               stack0@StackMap { sm_sp = sp0 } middle last + = case last of +    --  At each return / tail call, +    --  adjust Sp to point to the last argument pushed, which +    --  is cml_args, after popping any other junk from the stack. +    CmmCall{ cml_cont = Nothing, .. } -> do +      let sp_off = sp0 - cml_args +      return ([], sp_off, last, [], mapEmpty) + +    --  At each CmmCall with a continuation: +    CmmCall{ cml_cont = Just cont_lbl, .. } -> +       return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off + +    CmmForeignCall{ succ = cont_lbl, .. } -> do +       return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0) +            -- one word each for args and results: the return address + +    CmmBranch{..}     ->  handleProcPoints +    CmmCondBranch{..} ->  handleProcPoints +    CmmSwitch{..}     ->  handleProcPoints + +  where +     -- Calls and ForeignCalls are handled the same way: +     lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff +              -> ( [CmmNode O O] +                 , ByteOff +                 , CmmNode O C +                 , [CmmBlock] +                 , BlockEnv StackMap +                 ) +     lastCall lbl cml_args cml_ret_args cml_ret_off +      =  ( assignments +         , spOffsetForCall sp0 cont_stack cml_args +         , last +         , [] -- no new blocks +         , mapSingleton lbl cont_stack ) +      where +         (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off + + +     prepareStack lbl cml_ret_args cml_ret_off +       | Just cont_stack <- mapLookup lbl stackmaps +             -- If we have already seen this continuation before, then +             -- we just have to make the stack look the same: +       = (fixupStack stack0 cont_stack, cont_stack) +             -- Otherwise, we have to allocate the stack frame +       | otherwise +       = (save_assignments, new_cont_stack) +       where +        (new_cont_stack, save_assignments) +           = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0 + + +     -- proc point, we have to set up the stack to match what the proc +     -- point is expecting. +     -- +     handleProcPoints :: UniqSM ( [CmmNode O O] +                                , ByteOff +                                , CmmNode O C +                                , [CmmBlock] +                                , BlockEnv StackMap ) + +     handleProcPoints +         -- Note [diamond proc point] +       | Just l <- futureContinuation middle +       , (nub $ filter (`setMember` procpoints) $ successors last) == [l] +       = do +         let cont_args = mapFindWithDefault 0 l cont_info +             (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0) +             out = mapFromList [ (l', cont_stack) +                               | l' <- successors last ] +         return ( assigs +                , spOffsetForCall sp0 cont_stack wORD_SIZE +                , last +                , [] +                , out) + +        | otherwise = do +          pps <- mapM handleProcPoint (successors last) +          let lbl_map :: LabelMap Label +              lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ] +              fix_lbl l = mapLookup l lbl_map `orElse` l +          return ( [] +                 , 0 +                 , mapSuccessors fix_lbl last +                 , concat [ blk | (_,_,_,blk) <- pps ] +                 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] ) + +     -- For each proc point that is a successor of this block +     --   (a) if the proc point already has a stackmap, we need to +     --       shuffle the current stack to make it look the same. +     --       We have to insert a new block to make this happen. +     --   (b) otherwise, call "allocate live stack0" to make the +     --       stack map for the proc point +     handleProcPoint :: BlockId +                     -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock]) +     handleProcPoint l +        | not (l `setMember` procpoints) = return (l, l, stack0, []) +        | otherwise = do +           tmp_lbl <- liftM mkBlockId $ getUniqueM +           let +               (stack2, assigs) = +                  case mapLookup l stackmaps of +                    Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm) +                    Nothing    -> +                      pprTrace "first visit to proc point" +                                   (ppr l <+> ppr stack1) $ +                      (stack1, assigs) +                      where +                       cont_args = mapFindWithDefault 0 l cont_info +                       (stack1, assigs) = +                           setupStackFrame l liveness (sm_ret_off stack0) +                                                       cont_args stack0 + +               sp_off = sp0 - sm_sp stack2 + +               block = blockJoin (CmmEntry tmp_lbl) +                                 (maybeAddSpAdj sp_off (blockFromList assigs)) +                                 (CmmBranch l) +           -- +           return (l, tmp_lbl, stack2, [block]) + + +   +-- Sp is currently pointing to current_sp, +-- we want it to point to +--    (sm_sp cont_stack - sm_args cont_stack + args) +-- so the difference is +--    sp0 - (sm_sp cont_stack - sm_args cont_stack + args) +spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff +spOffsetForCall current_sp cont_stack args +  = current_sp - (sm_sp cont_stack - sm_args cont_stack + args) + + +-- | create a sequence of assignments to establish the new StackMap, +-- given the old StackMap. +fixupStack :: StackMap -> StackMap -> [CmmNode O O] +fixupStack old_stack new_stack = concatMap move new_locs + where +     old_map :: Map LocalReg ByteOff +     old_map  = Map.fromList (stackSlotRegs old_stack) +     new_locs = stackSlotRegs new_stack + +     move (r,n) +       | Just m <- Map.lookup r old_map, n == m = [] +       | otherwise = [CmmStore (CmmStackSlot Old n) +                               (CmmReg (CmmLocal r))] + + + +setupStackFrame +             :: BlockId                 -- label of continuation +             -> BlockEnv CmmLive        -- liveness +             -> ByteOff      -- updfr +             -> ByteOff      -- bytes of return values on stack +             -> StackMap     -- current StackMap +             -> (StackMap, [CmmNode O O]) + +setupStackFrame lbl liveness updfr_off ret_args stack0 +  = (cont_stack, assignments) +  where +      -- get the set of LocalRegs live in the continuation +      live = mapFindWithDefault Set.empty lbl liveness + +      -- the stack from the base to updfr_off is off-limits. +      -- our new stack frame contains: +      --   * saved live variables +      --   * the return address [young(C) + 8] +      --   * the args for the call, +      --     which are replaced by the return values at the return +      --     point. + +      -- everything up to updfr_off is off-limits +      -- stack1 contains updfr_off, plus everything we need to save +      (stack1, assignments) = allocate updfr_off live stack0 + +      -- And the Sp at the continuation is: +      --   sm_sp stack1 + ret_args +      cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args +                         , sm_args = ret_args +                         , sm_ret_off = updfr_off +                         } + + +-- ----------------------------------------------------------------------------- +-- Note [diamond proc point] +-- +-- This special case looks for the pattern we get from a typical +-- tagged case expression: +-- +--    Sp[young(L1)] = L1 +--    if (R1 & 7) != 0 goto L1 else goto L2 +--  L2: +--    call [R1] returns to L1 +--  L1: live: {y} +--    x = R1 +-- +-- If we let the generic case handle this, we get +-- +--    Sp[-16] = L1 +--    if (R1 & 7) != 0 goto L1a else goto L2 +--  L2: +--    Sp[-8] = y +--    Sp = Sp - 16 +--    call [R1] returns to L1 +--  L1a: +--    Sp[-8] = y +--    Sp = Sp - 16 +--    goto L1 +--  L1: +--    x = R1 +-- +-- The code for saving the live vars is duplicated in each branch, and +-- furthermore there is an extra jump in the fast path (assuming L1 is +-- a proc point, which it probably is if there is a heap check). +-- +-- So to fix this we want to set up the stack frame before the +-- conditional jump.  How do we know when to do this, and when it is +-- safe?  The basic idea is, when we see the assignment +--  +--   Sp[young(L)] = L +--  +-- we know that +--   * we are definitely heading for L +--   * there can be no more reads from another stack area, because young(L) +--     overlaps with it. +-- +-- We don't necessarily know that everything live at L is live now +-- (some might be assigned between here and the jump to L).  So we +-- simplify and only do the optimisation when we see +-- +--   (1) a block containing an assignment of a return address L +--   (2) ending in a branch where one (and only) continuation goes to L, +--       and no other continuations go to proc points. +-- +-- then we allocate the stack frame for L at the end of the block, +-- before the branch. +-- +-- We could generalise (2), but that would make it a bit more +-- complicated to handle, and this currently catches the common case. + +futureContinuation :: Block CmmNode O O -> Maybe BlockId +futureContinuation middle = foldBlockNodesB f middle Nothing +   where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId +         f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _ +               = Just l +         f _ r = r + +-- ----------------------------------------------------------------------------- +-- Saving live registers + +-- | Given a set of live registers and a StackMap, save all the registers +-- on the stack and return the new StackMap and the assignments to do +-- the saving. +-- +allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O]) +allocate ret_off live stackmap@StackMap{ sm_sp = sp0 +                                       , sm_regs = regs0 } + = +  pprTrace "allocate" (ppr live $$ ppr stackmap) $ + +   -- we only have to save regs that are not already in a slot +   let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live) +       regs1   = filterUFM (\(r,_) -> elemRegSet r live) regs0 +   in + +   -- make a map of the stack +   let stack = reverse $ Array.elems $ +               accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $ +                 ret_words ++ live_words +            where ret_words = +                   [ (x, Occupied) +                   | x <- [ 1 .. toWords ret_off] ] +                  live_words = +                   [ (toWords x, Occupied) +                   | (r,off) <- eltsUFM regs1, +                     let w = localRegBytes r, +                     x <- [ off, off-wORD_SIZE .. off - w + 1] ] +   in + +   -- Pass over the stack: find slots to save all the new live variables, +   -- choosing the oldest slots first (hence a foldr). +   let +       save slot ([], stack, n, assigs, regs) -- no more regs to save +          = ([], slot:stack, n `plusW` 1, assigs, regs) +       save slot (to_save, stack, n, assigs, regs) +          = case slot of +               Occupied ->  (to_save, Occupied:stack, n `plusW` 1, assigs, regs) +               Empty +                 | Just (stack', r, to_save') <- +                       select_save to_save (slot:stack) +                 -> let assig = CmmStore (CmmStackSlot Old n') +                                         (CmmReg (CmmLocal r)) +                        n' = n `plusW` 1 +                   in +                        (to_save', stack', n', assig : assigs, (r,(r,n')):regs) + +                 | otherwise +                 -> (to_save, slot:stack, n `plusW` 1, assigs, regs) + +       -- we should do better here: right now we'll fit the smallest first, +       -- but it would make more sense to fit the biggest first. +       select_save :: [LocalReg] -> [StackSlot] +                   -> Maybe ([StackSlot], LocalReg, [LocalReg]) +       select_save regs stack = go regs [] +         where go []     _no_fit = Nothing +               go (r:rs) no_fit +                 | Just rest <- dropEmpty words stack +                 = Just (replicate words Occupied ++ rest, r, rs++no_fit) +                 | otherwise +                 = go rs (r:no_fit) +                 where words = localRegWords r + +       -- fill in empty slots as much as possible +       (still_to_save, save_stack, n, save_assigs, save_regs) +          = foldr save (to_save, [], 0, [], []) stack + +       -- push any remaining live vars on the stack +       (push_sp, push_assigs, push_regs) +          = foldr push (n, [], []) still_to_save +          where +              push r (n, assigs, regs) +                = (n', assig : assigs, (r,(r,n')) : regs) +                where +                  n' = n + localRegBytes r +                  assig = CmmStore (CmmStackSlot Old n') +                                   (CmmReg (CmmLocal r)) + +       trim_sp +          | not (null push_regs) = push_sp +          | otherwise +          = n `plusW` (- length (takeWhile isEmpty save_stack)) + +       final_regs = regs1 `addListToUFM` push_regs +                          `addListToUFM` save_regs + +   in +  -- XXX should be an assert +   if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else + +   if (trim_sp .&. (wORD_SIZE - 1)) /= 0  then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else + +   ( stackmap { sm_regs = final_regs , sm_sp = trim_sp } +   , push_assigs ++ save_assigs ) + + +-- ----------------------------------------------------------------------------- +-- Manifesting Sp + +-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp.  The +-- block looks like this: +-- +--    middle_pre       -- the middle nodes +--    Sp = Sp + sp_off -- Sp adjustment goes here +--    last             -- the last node +-- +-- And we have some extra blocks too (that don't contain Sp adjustments) +-- +-- The adjustment for middle_pre will be different from that for +-- middle_post, because the Sp adjustment intervenes. +-- +manifestSp +   :: BlockEnv StackMap  -- StackMaps for other blocks +   -> StackMap           -- StackMap for this block +   -> ByteOff            -- Sp on entry to the block +   -> ByteOff            -- SpHigh +   -> CmmNode C O        -- first node +   -> [CmmNode O O]      -- middle +   -> ByteOff            -- sp_off +   -> CmmNode O C        -- last node +   -> [CmmBlock]         -- new blocks +   -> [CmmBlock]         -- final blocks with Sp manifest + +manifestSp stackmaps stack0 sp0 sp_high +           first middle_pre sp_off last fixup_blocks +  = final_block : fixup_blocks' +  where +    area_off = getAreaOff stackmaps + +    adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x +    adj_pre_sp  = mapExpDeep (areaToSp sp0            sp_high area_off) +    adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off) + +    final_middle = maybeAddSpAdj sp_off $ +                   blockFromList $ +                   map adj_pre_sp $ +                   elimStackStores stack0 stackmaps area_off $ +                   middle_pre + +    final_last    = optStackCheck (adj_post_sp last) + +    final_block   = blockJoin first final_middle final_last + +    fixup_blocks' = map (blockMapNodes3 (id, adj_post_sp, id)) fixup_blocks + + +getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc) +getAreaOff _ Old = 0 +getAreaOff stackmaps (Young l) = +  case mapLookup l stackmaps of +    Just sm -> sm_sp sm - sm_args sm +    Nothing -> pprPanic "getAreaOff" (ppr l) + + +maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O +maybeAddSpAdj 0      block = block +maybeAddSpAdj sp_off block +   = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off) + + +{- +Sp(L) is the Sp offset on entry to block L relative to the base of the +OLD area. + +SpArgs(L) is the size of the young area for L, i.e. the number of +arguments. + + - in block L, each reference to [old + N] turns into +   [Sp + Sp(L) - N] + + - in block L, each reference to [young(L') + N] turns into +   [Sp + Sp(L) - Sp(L') + SpArgs(L') - N] + + - be careful with the last node of each block: Sp has already been adjusted +   to be Sp + Sp(L) - Sp(L') +-} + +areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr +areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) = +  cmmOffset (CmmReg spReg) (sp_old - area_off area - n) +areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm) +areaToSp _ _ _ (CmmMachOp (MO_U_Lt _)  -- Note [null stack check] +                      [CmmMachOp (MO_Sub _) +                              [ CmmReg (CmmGlobal Sp) +                              , CmmLit (CmmInt 0 _)], +                       CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth) +areaToSp _ _ _ other = other + +-- ----------------------------------------------------------------------------- +-- Note [null stack check] +-- +-- If the high-water Sp is zero, then we end up with +-- +--   if (Sp - 0 < SpLim) then .. else .. +-- +-- and possibly some dead code for the failure case.  Optimising this +-- away depends on knowing that SpLim <= Sp, so it is really the job +-- of the stack layout algorithm, hence we do it now.  This is also +-- convenient because control-flow optimisation later will drop the +-- dead code. + +optStackCheck :: CmmNode O C -> CmmNode O C +optStackCheck n = -- Note [null stack check] + case n of +   CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false +   other -> other + + +-- ----------------------------------------------------------------------------- + +-- | Eliminate stores of the form +-- +--    Sp[area+n] = r +-- +-- when we know that r is already in the same slot as Sp[area+n].  We +-- could do this in a later optimisation pass, but that would involve +-- a separate analysis and we already have the information to hand +-- here.  It helps clean up some extra stack stores in common cases. +-- +-- Note that we may have to modify the StackMap as we walk through the +-- code using procMiddle, since an assignment to a variable in the +-- StackMap will invalidate its mapping there. +-- +elimStackStores :: StackMap +                -> BlockEnv StackMap +                -> (Area -> ByteOff) +                -> [CmmNode O O] +                -> [CmmNode O O] +elimStackStores stackmap stackmaps area_off nodes +  = go stackmap nodes +  where +    go _stackmap [] = [] +    go stackmap (n:ns) +     = case n of +         CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) +            | Just (_,off) <- lookupUFM (sm_regs stackmap) r +            , area_off area + m == off +            -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns +         _otherwise +            -> n : go (procMiddle stackmaps n stackmap) ns + + +-- ----------------------------------------------------------------------------- +-- Update info tables to include stack liveness + + +setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl +setInfoTableStackMap stackmaps +    (CmmProc top_info@TopInfo{..} l g@CmmGraph{g_entry = eid}) +  = CmmProc top_info{ info_tbl = fix_info info_tbl } l g +  where +    fix_info info_tbl@CmmInfoTable{ cit_rep = StackRep _ } = +       info_tbl { cit_rep = StackRep (get_liveness eid) } +    fix_info other = other + +    get_liveness :: BlockId -> Liveness +    get_liveness lbl +      = case mapLookup lbl stackmaps of +          Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl) +          Just sm -> stackMapToLiveness sm + +setInfoTableStackMap _ d = d + + +stackMapToLiveness :: StackMap -> Liveness +stackMapToLiveness StackMap{..} = +   reverse $ Array.elems $ +        accumArray (\_ x -> x) True (toWords sm_ret_off + 1, +                                     toWords (sm_sp - sm_args)) live_words +   where +     live_words =  [ (toWords off, False) +                   | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ] + + +-- ----------------------------------------------------------------------------- +-- Lowering safe foreign calls + +{- +Note [lower safe foreign calls] + +We start with + +   Sp[young(L1)] = L1 + ,----------------------- + | r1 = foo(x,y,z) returns to L1 + '----------------------- + L1: +   R1 = r1 -- copyIn, inserted by mkSafeCall +   ... + +the stack layout algorithm will arrange to save and reload everything +live across the call.  Our job now is to expand the call so we get + +   Sp[young(L1)] = L1 + ,----------------------- + | SAVE_THREAD_STATE() + | token = suspendThread(BaseReg, interruptible) + | r = foo(x,y,z) + | BaseReg = resumeThread(token) + | LOAD_THREAD_STATE() + | R1 = r  -- copyOut + | jump L1 + '----------------------- + L1: +   r = R1 -- copyIn, inserted by mkSafeCall +   ... + +Note the copyOut, which saves the results in the places that L1 is +expecting them (see Note {safe foreign call convention]). +-} + +lowerSafeForeignCall :: CmmBlock -> UniqSM CmmBlock +lowerSafeForeignCall block +  | (entry, middle, CmmForeignCall { .. }) <- blockSplit block +  = do +    -- Both 'id' and 'new_base' are KindNonPtr because they're +    -- RTS-only objects and are not subject to garbage collection +    id <- newTemp bWord +    new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) +    let (caller_save, caller_load) = callerSaveVolatileRegs +    load_tso <- newTemp gcWord +    load_stack <- newTemp gcWord +    let suspend = saveThreadState <*> +                  caller_save <*> +                  mkMiddle (callSuspendThread id intrbl) +        midCall = mkUnsafeCall tgt res args +        resume  = mkMiddle (callResumeThread new_base id) <*> +                  -- Assign the result to BaseReg: we +                  -- might now have a different Capability! +                  mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*> +                  caller_load <*> +                  loadThreadState load_tso load_stack +        -- Note: The successor must be a procpoint, and we have already split, +        --       so we use a jump, not a branch. +        succLbl = CmmLit (CmmLabel (infoTblLbl succ)) + +        (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ) +                                           (map (CmmReg . CmmLocal) res) +                                           updfr (0, []) + +        jump = CmmCall { cml_target   = succLbl +                       , cml_cont     = Just succ +                       , cml_args     = widthInBytes wordWidth +                       , cml_ret_args = ret_args +                       , cml_ret_off  = updfr } + +    graph' <- lgraphOfAGraph $ suspend <*> +                               midCall <*> +                               resume  <*> +                               copyout <*> +                               mkLast jump + +    case toBlockList graph' of +      [one] -> let (_, middle', last) = blockSplit one +               in return (blockJoin entry (middle `blockAppend` middle') last) +      _ -> panic "lowerSafeForeignCall0" + +  -- Block doesn't end in a safe foreign call: +  | otherwise = return block + + +foreignLbl :: FastString -> CmmExpr +foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name)) + +newTemp :: CmmType -> UniqSM LocalReg +newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) + +callSuspendThread :: LocalReg -> Bool -> CmmNode O O +callSuspendThread id intrbl = +  CmmUnsafeForeignCall +       (ForeignTarget (foreignLbl (fsLit "suspendThread")) +             (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) +       [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))] + +callResumeThread :: LocalReg -> LocalReg -> CmmNode O O +callResumeThread new_base id = +  CmmUnsafeForeignCall +       (ForeignTarget (foreignLbl (fsLit "resumeThread")) +            (ForeignConvention CCallConv [AddrHint] [AddrHint])) +       [new_base] [CmmReg (CmmLocal id)] + +-- ----------------------------------------------------------------------------- + +plusW :: ByteOff -> WordOff -> ByteOff +plusW b w = b + w * wORD_SIZE + +dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot] +dropEmpty 0 ss           = Just ss +dropEmpty n (Empty : ss) = dropEmpty (n-1) ss +dropEmpty _ _            = Nothing + +isEmpty :: StackSlot -> Bool +isEmpty Empty = True +isEmpty _ = False + +localRegBytes :: LocalReg -> ByteOff +localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r))) + +localRegWords :: LocalReg -> WordOff +localRegWords = toWords . localRegBytes + +toWords :: ByteOff -> WordOff +toWords x = x `quot` wORD_SIZE + + +insertReloads :: StackMap -> [CmmNode O O] +insertReloads stackmap = +   [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp) +                                     (localRegType r)) +   | (r,sp) <- stackSlotRegs stackmap +   ] + + +stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)] +stackSlotRegs sm = eltsUFM (sm_regs sm) + +-- ----------------------------------------------------------------------------- + +-- If we do this *before* stack layout, we might be able to avoid +-- saving some things across calls/procpoints. +-- +-- *but*, that will invalidate the liveness analysis, and we'll have +-- to re-do it. + +cmmSink :: CmmGraph -> FuelUniqSM CmmGraph +cmmSink graph = do +  let liveness = cmmLiveness graph +  return $ cmmSink' liveness graph + +cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph +cmmSink' liveness graph +  = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph +  where + +  sink :: BlockEnv [(LocalReg, CmmExpr)] -> [CmmBlock] -> [CmmBlock] +  sink _ [] = [] +  sink sunk (b:bs) = +    pprTrace "sink" (ppr l) $ +    blockJoin first final_middle last : sink sunk' bs +    where +      l = entryLabel b +      (first, middle, last) = blockSplit b +      (middle', assigs) = walk (blockToList middle) emptyBlock +                               (mapFindWithDefault [] l sunk) + +      (dropped_last, assigs') = partition (`conflictsWithLast` last) assigs + +      final_middle = foldl blockSnoc middle' (toNodes dropped_last) + +      sunk' = mapUnion sunk $ +                 mapFromList [ (l, filt assigs' (getLive l)) +                             | l <- successors last ] +           where +               getLive l = mapFindWithDefault Set.empty l liveness +               filt as live = [ (r,e) | (r,e) <- as, r `Set.member` live ] + + +walk :: [CmmNode O O] -> Block CmmNode O O -> [(LocalReg, CmmExpr)] +     -> (Block CmmNode O O, [(LocalReg, CmmExpr)]) + +walk []     acc as = (acc, as) +walk (n:ns) acc as +  | Just a <- collect_it  = walk ns acc (a:as) +  | otherwise             = walk ns (foldr (flip blockSnoc) acc (n:drop_nodes)) as' +  where +    collect_it = case n of +                   CmmAssign (CmmLocal r) e@(CmmReg (CmmGlobal _)) -> Just (r,e) +--                   CmmAssign (CmmLocal r) e@(CmmLoad addr _) | +--                      foldRegsUsed (\b r -> False) True addr -> Just (r,e) +                   _ -> Nothing + +    drop_nodes = toNodes dropped +    (dropped, as') = partition should_drop as +       where should_drop a = a `conflicts` n + +toNodes as = [ CmmAssign (CmmLocal r) rhs | (r,rhs) <- as ] + +-- We only sink "r = G" assignments right now, so conflicts is very simple: +(r, rhs) `conflicts` CmmAssign reg  _  | reg `regUsedIn` rhs = True +--(r, CmmLoad _ _) `conflicts` CmmStore _ _ = True +(r, _)   `conflicts` node +  = foldRegsUsed (\b r' -> r == r' || b) False node + +(r, _) `conflictsWithLast` node +  = foldRegsUsed (\b r' -> r == r' || b) False node diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 01ebac6254..cd0558616e 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -1,67 +1,71 @@  -----------------------------------------------------------------------------  -- --- (c) The University of Glasgow 2004-2006 +-- (c) The University of Glasgow 2011  --  -- CmmLint: checking the correctness of Cmm statements and expressions  --  ----------------------------------------------------------------------------- - -{-# OPTIONS -fno-warn-tabs #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and --- detab the module (please do the detabbing in a separate patch). See ---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces --- for details - +{-# LANGUAGE GADTs #-}  module CmmLint ( -  cmmLint, cmmLintTop +    cmmLint, cmmLintDecl, cmmLintGraph    ) where +import Hoopl +import Cmm +import CmmUtils +import PprCmm ()  import BlockId -import OldCmm +import FastString  import CLabel  import Outputable -import OldPprCmm()  import Constants -import FastString -import Platform  import Data.Maybe +-- Things to check: +--     - invariant on CmmBlock in CmmExpr (see comment there) +--     - check for branches to blocks that don't exist +--     - check types +  -- -----------------------------------------------------------------------------  -- Exported entry points:  cmmLint :: (Outputable d, Outputable h) -        => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops +        => GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLint tops = runCmmLint (mapM_ lintCmmDecl) tops -cmmLintTop :: (Outputable d, Outputable h) -           => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top +cmmLintGraph :: CmmGraph -> Maybe SDoc +cmmLintGraph g = runCmmLint lintCmmGraph g -runCmmLint :: Outputable a -           => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc -runCmmLint _ l p = +runCmmLint :: Outputable a => (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint l p =     case unCL (l p) of -   Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), -                           nest 2 err, -                           ptext $ sLit ("Program was:"), -                           nest 2 (ppr p)]) -   Right _  -> Nothing - -lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint () -lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks)) -  = addLintInfo (text "in proc " <> pprCLabel platform lbl) $ -        let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks -	in  mapM_ (lintCmmBlock platform labels) blocks - -lintCmmDecl _ (CmmData {}) +     Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), +                             nest 2 err, +                             ptext $ sLit ("Program was:"), +                             nest 2 (ppr p)]) +     Right _  -> Nothing + +lintCmmDecl :: GenCmmDecl h i CmmGraph -> CmmLint () +lintCmmDecl (CmmProc _ lbl g) +  = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph g +lintCmmDecl (CmmData {})    = return () -lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint () -lintCmmBlock platform labels (BasicBlock id stmts) -  = addLintInfo (text "in basic block " <> ppr id) $ -	mapM_ (lintCmmStmt platform labels) stmts + +lintCmmGraph :: CmmGraph -> CmmLint () +lintCmmGraph g = mapM_ (lintCmmBlock labels) blocks +  where +       blocks = toBlockList g +       labels = setFromList (map entryLabel blocks) + + +lintCmmBlock :: BlockSet -> CmmBlock -> CmmLint () +lintCmmBlock labels block +  = addLintInfo (\_ -> text "in basic block " <> ppr (entryLabel block)) $ do +        let (_, middle, last) = blockSplit block +        mapM_ lintCmmMiddle (blockToList middle) +        lintCmmLast labels last  -- -----------------------------------------------------------------------------  -- lintCmmExpr @@ -69,24 +73,24 @@ lintCmmBlock platform labels (BasicBlock id stmts)  -- Checks whether a CmmExpr is "type-correct", and check for obvious-looking  -- byte/word mismatches. -lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType -lintCmmExpr platform (CmmLoad expr rep) = do -  _ <- lintCmmExpr platform expr +lintCmmExpr :: CmmExpr -> CmmLint CmmType +lintCmmExpr (CmmLoad expr rep) = do +  _ <- lintCmmExpr expr    -- Disabled, if we have the inlining phase before the lint phase,    -- we can have funny offsets due to pointer tagging. -- EZY    -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $    --   cmmCheckWordAddress expr    return rep -lintCmmExpr platform expr@(CmmMachOp op args) = do -  tys <- mapM (lintCmmExpr platform) args +lintCmmExpr expr@(CmmMachOp op args) = do +  tys <- mapM lintCmmExpr args    if map (typeWidth . cmmExprType) args == machOpArgReps op -  	then cmmCheckMachOp op args tys -	else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) -lintCmmExpr platform (CmmRegOff reg offset) -  = lintCmmExpr platform (CmmMachOp (MO_Add rep) -		[CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) +        then cmmCheckMachOp op args tys +        else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) +lintCmmExpr (CmmRegOff reg offset) +  = lintCmmExpr (CmmMachOp (MO_Add rep) +                [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])    where rep = typeWidth (cmmRegType reg) -lintCmmExpr _ expr = +lintCmmExpr expr =    return (cmmExprType expr)  -- Check for some common byte/word mismatches (eg. Sp + 1) @@ -119,43 +123,61 @@ notNodeReg :: CmmExpr -> Bool  notNodeReg (CmmReg reg) | reg == nodeReg = False  notNodeReg _                             = True -lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint () -lintCmmStmt platform labels = lint -    where lint (CmmNop) = return () -          lint (CmmComment {}) = return () -          lint stmt@(CmmAssign reg expr) = do -            erep <- lintCmmExpr platform expr -	    let reg_ty = cmmRegType reg +lintCmmMiddle :: CmmNode O O -> CmmLint () +lintCmmMiddle node = case node of +  CmmComment _ -> return () + +  CmmAssign reg expr -> do +            erep <- lintCmmExpr expr +            let reg_ty = cmmRegType reg              if (erep `cmmEqType_ignoring_ptrhood` reg_ty)                  then return () -                else cmmLintAssignErr stmt erep reg_ty -          lint (CmmStore l r) = do -            _ <- lintCmmExpr platform l -            _ <- lintCmmExpr platform r +                else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty + +  CmmStore l r -> do +            _ <- lintCmmExpr l +            _ <- lintCmmExpr r              return () -          lint (CmmCall target _res args _) = -              do lintTarget platform labels target -                 mapM_ (lintCmmExpr platform . hintlessCmm) args -          lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e -          lint (CmmSwitch e branches) = do + +  CmmUnsafeForeignCall target _formals actuals -> do +            lintTarget target +            mapM_ lintCmmExpr actuals + + +lintCmmLast :: BlockSet -> CmmNode O C -> CmmLint () +lintCmmLast labels node = case node of +  CmmBranch id -> checkTarget id + +  CmmCondBranch e t f -> do +            mapM_ checkTarget [t,f] +            _ <- lintCmmExpr e +            checkCond e + +  CmmSwitch e branches -> do              mapM_ checkTarget $ catMaybes branches -            erep <- lintCmmExpr platform e +            erep <- lintCmmExpr e              if (erep `cmmEqType_ignoring_ptrhood` bWord)                then return () -              else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <> -                               text " :: " <> ppr erep) -          lint (CmmJump e _) = lintCmmExpr platform e >> return () -          lint (CmmReturn) = return () -          lint (CmmBranch id) = checkTarget id -          checkTarget id = if setMember id labels then return () -                           else cmmLintErr (text "Branch to nonexistent id" <+> ppr id) - -lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint () -lintTarget platform _      (CmmCallee e _) = do _ <- lintCmmExpr platform e -                                                return () -lintTarget _        _      (CmmPrim _ Nothing) = return () -lintTarget platform labels (CmmPrim _ (Just stmts)) -    = mapM_ (lintCmmStmt platform labels) stmts +              else cmmLintErr (text "switch scrutinee is not a word: " <> +                               ppr e <> text " :: " <> ppr erep) + +  CmmCall { cml_target = target, cml_cont = cont } -> do +          _ <- lintCmmExpr target +          maybe (return ()) checkTarget cont + +  CmmForeignCall tgt _ args succ _ _ -> do +          lintTarget tgt +          mapM_ lintCmmExpr args +          checkTarget succ + where +  checkTarget id +     | setMember id labels = return () +     | otherwise = cmmLintErr (\_ -> text "Branch to nonexistent id" <+> ppr id) + + +lintTarget :: ForeignTarget -> CmmLint () +lintTarget (ForeignTarget e _) = lintCmmExpr e >> return () +lintTarget (PrimTarget {})     = return ()  checkCond :: CmmExpr -> CmmLint () @@ -163,7 +185,7 @@ checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return ()  checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values  checkCond expr      = cmmLintErr (hang (text "expression is not a conditional:") 2 -                       (ppr expr)) +                         (ppr expr))  -- -----------------------------------------------------------------------------  -- CmmLint monad @@ -173,37 +195,36 @@ checkCond expr  newtype CmmLint a = CmmLint { unCL :: Either SDoc a }  instance Monad CmmLint where -  CmmLint m >>= k = CmmLint $ case m of  -				Left e -> Left e -				Right a -> unCL (k a) -  return a = CmmLint (Right a) +  CmmLint m >>= k = CmmLint $ \p -> case m p of +                                      Left e -> Left e +                                      Right a -> unCL (k a) p +  return a = CmmLint (\_ -> Right a)  cmmLintErr :: SDoc -> CmmLint a -cmmLintErr msg = CmmLint (Left msg) +cmmLintErr msg = CmmLint (\p -> Left (msg p))  addLintInfo :: SDoc -> CmmLint a -> CmmLint a -addLintInfo info thing = CmmLint $  -   case unCL thing of -	Left err -> Left (hang info 2 err) -	Right a  -> Right a +addLintInfo info thing = CmmLint $ \p -> +   case unCL thing p of +        Left err -> Left (hang (info p) 2 err) +        Right a  -> Right a  cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a  cmmLintMachOpErr expr argsRep opExpectsRep -     = cmmLintErr (text "in MachOp application: " $$  -					nest 2 (ppr expr) $$ -				        (text "op is expecting: " <+> ppr opExpectsRep) $$ -					(text "arguments provide: " <+> ppr argsRep)) +     = cmmLintErr (text "in MachOp application: " $$ +                   nest 2 (ppr  expr) $$ +                      (text "op is expecting: " <+> ppr opExpectsRep) $$ +                      (text "arguments provide: " <+> ppr argsRep)) -cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a  cmmLintAssignErr stmt e_ty r_ty -  = cmmLintErr (text "in assignment: " $$  -		nest 2 (vcat [ppr stmt,  -			      text "Reg ty:" <+> ppr r_ty, -			      text "Rhs ty:" <+> ppr e_ty])) -			  -					 +  = cmmLintErr (text "in assignment: " $$ +                nest 2 (vcat [ppr stmt, +                              text "Reg ty:" <+> ppr r_ty, +                              text "Rhs ty:" <+> ppr e_ty])) +  cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a  cmmLintDubiousWordOffset expr     = cmmLintErr (text "offset is not a multiple of words: " $$ -			nest 2 (ppr expr)) +                 nest 2 (ppr expr)) diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 9a5bb2d5ae..ac9c38b448 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -18,10 +18,9 @@ import Control.Monad  import OptimizationFuel  import PprCmmExpr () -import Compiler.Hoopl +import Hoopl  import Maybes  import Outputable -import UniqSet  -----------------------------------------------------------------------------  -- Calculating what variables are live on entry to a basic block @@ -33,8 +32,10 @@ type CmmLive = RegSet  -- | The dataflow lattice  liveLattice :: DataflowLattice CmmLive  liveLattice = DataflowLattice "live LocalReg's" emptyRegSet add -    where add _ (OldFact old) (NewFact new) = case unionUniqSets old new of -            join -> (changeIf $ sizeUniqSet join > sizeUniqSet old, join) +    where add _ (OldFact old) (NewFact new) = +               (changeIf $ sizeRegSet join > sizeRegSet old, join) +              where !join = plusRegSet old new +  -- | A mapping from block labels to the variables live on entry  type BlockEntryLiveness = BlockEnv CmmLive @@ -43,16 +44,17 @@ type BlockEntryLiveness = BlockEnv CmmLive  -- | Calculated liveness info for a CmmGraph  ----------------------------------------------------------------------------- -cmmLiveness :: CmmGraph -> FuelUniqSM BlockEntryLiveness +cmmLiveness :: CmmGraph -> BlockEntryLiveness  cmmLiveness graph = -  liftM check $ liftM snd $ dataflowPassBwd graph [] $ analBwd liveLattice xferLive +  check $ dataflowAnalBwd graph [] $ analBwd liveLattice xferLive    where entry = g_entry graph -        check facts = noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts +        check facts = noLiveOnEntry entry +                        (expectJust "check" $ mapLookup entry facts) facts  -- | On entry to the procedure, there had better not be any LocalReg's live-in.  noLiveOnEntry :: BlockId -> CmmLive -> a -> a  noLiveOnEntry bid in_fact x = -  if isEmptyUniqSet in_fact then x +  if nullRegSet in_fact then x    else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact)  -- | The transfer equations use the traditional 'gen' and 'kill' @@ -60,42 +62,42 @@ noLiveOnEntry bid in_fact x =  gen  :: UserOfLocalRegs a    => a -> RegSet -> RegSet  gen  a live = foldRegsUsed extendRegSet      live a  kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet -kill a live = foldRegsDefd delOneFromUniqSet live a +kill a live = foldRegsDefd deleteFromRegSet live a -gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) => a -> CmmLive -> CmmLive +gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) +          => a -> CmmLive -> CmmLive  gen_kill a = gen a . kill a  -- | The transfer function --- EZY: Bits of this analysis are duplicated in CmmSpillReload, though --- it's not really easy to efficiently reuse all of this.  Keep in mind --- if you need to update this analysis.  xferLive :: BwdTransfer CmmNode CmmLive  xferLive = mkBTransfer3 fst mid lst    where fst _ f = f          mid :: CmmNode O O -> CmmLive -> CmmLive          mid n f = gen_kill n f          lst :: CmmNode O C -> FactBase CmmLive -> CmmLive -        -- slightly inefficient: kill is unnecessary for emptyRegSet -        lst n f = gen_kill n -                $ case n of CmmCall{}        -> emptyRegSet -                            CmmForeignCall{} -> emptyRegSet -                            _                -> joinOutFacts liveLattice n f +        lst n f = gen_kill n $ joinOutFacts liveLattice n f  -----------------------------------------------------------------------------  -- Removing assignments to dead variables  ----------------------------------------------------------------------------- -removeDeadAssignments :: CmmGraph -> FuelUniqSM CmmGraph +removeDeadAssignments :: CmmGraph -> FuelUniqSM (CmmGraph, BlockEnv CmmLive)  removeDeadAssignments g = -   liftM fst $ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites -   where rewrites = deepBwdRw3 nothing middle nothing -         -- Beware: deepBwdRw with one polymorphic function seems more reasonable here, -         -- but GHC panics while compiling, see bug #4045. +   dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites +   where rewrites = mkBRewrite3 nothing middle nothing +         -- SDM: no need for deepBwdRw here, we only rewrite to empty +         -- Beware: deepBwdRw with one polymorphic function seems more +         -- reasonable here, but GHC panics while compiling, see bug +         -- #4045.           middle :: CmmNode O O -> Fact O CmmLive -> CmmReplGraph O O -         middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` live) = return $ Just emptyGraph +         middle (CmmAssign (CmmLocal reg') _) live +                 | not (reg' `elemRegSet` live) +                 = return $ Just emptyGraph           -- XXX maybe this should be somewhere else... -         middle (CmmAssign lhs (CmmReg rhs))   _ | lhs == rhs = return $ Just emptyGraph -         middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph +         middle (CmmAssign lhs (CmmReg rhs))   _ | lhs == rhs +                 = return $ Just emptyGraph +         middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs +                 = return $ Just emptyGraph           middle _ _ = return Nothing           nothing :: CmmNode e x -> Fact x CmmLive -> CmmReplGraph e x diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index 4844af9d9a..cd46794580 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -18,7 +18,7 @@ module CmmNode (       CmmNode(..), ForeignHint(..), CmmFormal, CmmActual,       UpdFrameOffset, Convention(..), ForeignConvention(..), ForeignTarget(..),       mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf, -     mapExpM, mapExpDeepM, wrapRecExpM +     mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors    ) where  import CmmExpr @@ -35,15 +35,17 @@ import Prelude hiding (succ)  ------------------------  -- CmmNode +#define ULabel {-# UNPACK #-} !Label +  data CmmNode e x where -  CmmEntry :: Label -> CmmNode C O +  CmmEntry :: ULabel -> CmmNode C O    CmmComment :: FastString -> CmmNode O O -  CmmAssign :: CmmReg -> CmmExpr -> CmmNode O O +  CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O      -- Assign to register -  CmmStore :: CmmExpr -> CmmExpr -> CmmNode O O +  CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O      -- Assign to memory location.  Size is      -- given by cmmExprType of the rhs. @@ -60,11 +62,12 @@ data CmmNode e x where        --            bug for what can be put in arguments, see        --            Note [Register Parameter Passing] -  CmmBranch :: Label -> CmmNode O C  -- Goto another block in the same procedure +  CmmBranch :: ULabel -> CmmNode O C +                                   -- Goto another block in the same procedure    CmmCondBranch :: {                 -- conditional branch        cml_pred :: CmmExpr, -      cml_true, cml_false :: Label +      cml_true, cml_false :: ULabel    } -> CmmNode O C    CmmSwitch :: CmmExpr -> [Maybe Label] -> CmmNode O C -- Table branch @@ -78,6 +81,11 @@ data CmmNode e x where        cml_cont :: Maybe Label,            -- Label of continuation (Nothing for return or tail call) +          -- +          -- Note [Continuation BlockId]: these BlockIds are called +          -- Continuation BlockIds, and are the only BlockIds that can +          -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or +          -- (CmmStackSlot (Young b) _).  -- ToDO: add this:  --       cml_args_regs :: [GlobalReg], @@ -117,7 +125,7 @@ data CmmNode e x where        tgt   :: ForeignTarget,   -- call target and convention        res   :: [CmmFormal],     -- zero or more results        args  :: [CmmActual],     -- zero or more arguments; see Note [Register parameter passing] -      succ  :: Label,           -- Label of continuation +      succ  :: ULabel,          -- Label of continuation        updfr :: UpdFrameOffset,  -- where the update frame is (for building infotable)        intrbl:: Bool             -- whether or not the call is interruptible    } -> CmmNode O C @@ -218,14 +226,6 @@ data Convention    | GC               -- Entry to the garbage collector: uses the node reg!    | PrimOpCall       -- Calling prim ops    | PrimOpReturn     -- Returning from prim ops -  | Foreign          -- Foreign call/return -        ForeignConvention -  | Private -        -- Used for control transfers within a (pre-CPS) procedure All -        -- jump sites known, never pushed on the stack (hence no SRT) -        -- You can choose whatever calling convention you please -        -- (provided you make sure all the call sites agree)! -        -- This data type eventually to be extended to record the convention.    deriving( Eq )  data ForeignConvention @@ -283,37 +283,6 @@ instance DefinerOfLocalRegs (CmmNode e x) where            fold f z n = foldRegsDefd f z n -instance UserOfSlots (CmmNode e x) where -  foldSlotsUsed f z n = case n of -    CmmAssign _ expr -> fold f z expr -    CmmStore addr rval -> fold f (fold f z addr) rval -    CmmUnsafeForeignCall _ _ args -> fold f z args -    CmmCondBranch expr _ _ -> fold f z expr -    CmmSwitch expr _ -> fold f z expr -    CmmCall {cml_target=tgt} -> fold f z tgt -    CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args -    _ -> z -    where fold :: forall a b. -                       UserOfSlots a => -                       (b -> SubArea -> b) -> b -> a -> b -          fold f z n = foldSlotsUsed f z n - -instance UserOfSlots ForeignTarget where -  foldSlotsUsed  f z (ForeignTarget e _) = foldSlotsUsed f z e -  foldSlotsUsed _f z (PrimTarget _)      = z - -instance DefinerOfSlots (CmmNode e x) where -  foldSlotsDefd f z n = case n of -    CmmStore (CmmStackSlot a i) expr -> f z (a, i, widthInBytes $ typeWidth $ cmmExprType expr) -    CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res -    _ -> z -    where -          fold :: forall a b. -                  DefinerOfSlots a => -                  (b -> SubArea -> b) -> b -> a -> b -          fold f z n = foldSlotsDefd f z n -          foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w) -  -----------------------------------  -- mapping Expr in CmmNode @@ -416,4 +385,20 @@ foldExp f (CmmCall {cml_target=tgt}) z            = f tgt z  foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args  foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z -foldExpDeep f = foldExp $ wrapRecExpf f +foldExpDeep f = foldExp go +  where -- go :: CmmExpr -> z -> z +        go e@(CmmMachOp _ es) z = gos es $! f e z +        go e@(CmmLoad addr _) z = go addr $! f e z +        go e                  z = f e z + +        gos [] z = z +        gos (e:es) z = gos es $! f e z + +-- ----------------------------------------------------------------------------- + +mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C +mapSuccessors f (CmmBranch bid)        = CmmBranch (f bid) +mapSuccessors f (CmmCondBranch p y n)  = CmmCondBranch p (f y) (f n) +mapSuccessors f (CmmSwitch e arms)     = CmmSwitch e (map (fmap f) arms) +mapSuccessors f n = n + diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 8cc18fc1ca..7c7ed393d9 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -145,7 +145,7 @@ To inline _smi:  -}  countUses :: UserOfLocalRegs a => a -> UniqFM Int -countUses a = foldRegsUsed (\m r -> addToUFM m r (count m r + 1)) emptyUFM a +countUses a = foldRegsUsed (\m r -> addToUFM_C (+) m r 1) emptyUFM a    where count m r = lookupWithDefaultUFM m (0::Int) r  cmmMiniInline :: DynFlags -> [CmmBasicBlock] -> [CmmBasicBlock] @@ -157,25 +157,16 @@ cmmMiniInlineStmts :: DynFlags -> UniqFM Int -> [CmmStmt] -> [CmmStmt]  cmmMiniInlineStmts _      _    [] = []  cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)          -- not used: just discard this assignment -  | Nothing <- lookupUFM uses u -  = cmmMiniInlineStmts dflags uses stmts +  | 0 <- lookupWithDefaultUFM uses 0 u +  = cmmMiniInlineStmts uses stmts -        -- used (literal): try to inline at all the use sites -  | Just n <- lookupUFM uses u, isLit expr -  = -     ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $ -     case lookForInlineLit u expr stmts of -         (m, stmts') -             | n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts' -             | otherwise -> -                 stmt : cmmMiniInlineStmts dflags (adjustUFM (\x -> x - m) uses u) stmts' - -        -- used (foldable to literal): try to inline at all the use sites +        -- used (foldable to small thing): try to inline at all the use sites    | Just n <- lookupUFM uses u, -    e@(CmmLit _) <- wrapRecExp foldExp expr +    e <- wrapRecExp foldExp expr, +    isTiny e    = -     ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $ -     case lookForInlineLit u e stmts of +     ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $ +     case lookForInlineMany u e stmts of           (m, stmts')               | n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts'               | otherwise -> @@ -188,6 +179,11 @@ cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr)       ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $       cmmMiniInlineStmts dflags uses stmts'   where +  isTiny (CmmLit _) = True +  isTiny (CmmReg (CmmGlobal _)) = True +         -- not CmmLocal: that might invalidate the usage analysis results +  isTiny _ = False +    platform = targetPlatform dflags    foldExp (CmmMachOp op args) = cmmMachOpFold platform op args    foldExp e = e @@ -201,26 +197,28 @@ cmmMiniInlineStmts platform uses (stmt:stmts)  -- register, and a list of statements.  Inlines the expression at all  -- use sites of the register.  Returns the number of substituations  -- made and the, possibly modified, list of statements. -lookForInlineLit :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt]) -lookForInlineLit _ _ [] = (0, []) -lookForInlineLit u expr stmts@(stmt : rest) -  | Just n <- lookupUFM (countUses stmt) u -  = case lookForInlineLit u expr rest of -      (m, stmts) -> let z = n + m -                    in z `seq` (z, inlineStmt u expr stmt : stmts) - -  | ok_to_skip -  = case lookForInlineLit u expr rest of +lookForInlineMany :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt]) +lookForInlineMany u expr stmts = lookForInlineMany' u expr regset stmts +    where regset = foldRegsUsed extendRegSet emptyRegSet expr + +lookForInlineMany' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> (Int, [CmmStmt]) +lookForInlineMany' _ _ _ [] = (0, []) +lookForInlineMany' u expr regset stmts@(stmt : rest) +  | Just n <- lookupUFM (countUses stmt) u, okToInline expr stmt +  = let stmt' = inlineStmt u expr stmt in +    if okToSkip stmt' u expr regset +       then case lookForInlineMany' u expr regset rest of +                       (m, stmts) -> let z = n + m +                                     in z `seq` (z, stmt' : stmts) +       else (n, stmt' : rest) + +  | okToSkip stmt u expr regset +  = case lookForInlineMany' u expr regset rest of        (n, stmts) -> (n, stmt : stmts)    | otherwise    = (0, stmts) -  where -    -- We skip over assignments to registers, unless the register -    -- being assigned to is the one we're inlining. -    ok_to_skip = case stmt of -        CmmAssign (CmmLocal (LocalReg u' _)) _ | u' == u -> False -        _other -> True +  lookForInline :: Unique -> CmmExpr -> [CmmStmt] -> Maybe [CmmStmt]  lookForInline u expr stmts = lookForInline' u expr regset stmts @@ -229,10 +227,10 @@ lookForInline u expr stmts = lookForInline' u expr regset stmts  lookForInline' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> Maybe [CmmStmt]  lookForInline' _ _    _      [] = panic "lookForInline' []"  lookForInline' u expr regset (stmt : rest) -  | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline +  | Just 1 <- lookupUFM (countUses stmt) u, okToInline expr stmt    = Just (inlineStmt u expr stmt : rest) -  | ok_to_skip +  | okToSkip stmt u expr regset    = case lookForInline' u expr regset rest of             Nothing    -> Nothing             Just stmts -> Just (stmt:stmts) @@ -240,31 +238,36 @@ lookForInline' u expr regset (stmt : rest)    | otherwise     = Nothing -  where -        -- we don't inline into CmmCall if the expression refers to global -        -- registers.  This is a HACK to avoid global registers clashing with -        -- C argument-passing registers, really the back-end ought to be able -        -- to handle it properly, but currently neither PprC nor the NCG can -        -- do it.  See also CgForeignCall:load_args_into_temps. -    ok_to_inline = case stmt of -                     CmmCall{} -> hasNoGlobalRegs expr -                     _ -> True - -   -- Expressions aren't side-effecting.  Temporaries may or may not -   -- be single-assignment depending on the source (the old code -   -- generator creates single-assignment code, but hand-written Cmm -   -- and Cmm from the new code generator is not single-assignment.) -   -- So we do an extra check to make sure that the register being -   -- changed is not one we were relying on.  I don't know how much of a -   -- performance hit this is (we have to create a regset for every -   -- instruction.) -- EZY -    ok_to_skip = case stmt of -                 CmmNop -> True -                 CmmComment{} -> True -                 CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True -                 CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr) -                 _other -> False +-- we don't inline into CmmCall if the expression refers to global +-- registers.  This is a HACK to avoid global registers clashing with +-- C argument-passing registers, really the back-end ought to be able +-- to handle it properly, but currently neither PprC nor the NCG can +-- do it.  See also CgForeignCall:load_args_into_temps. +okToInline :: CmmExpr -> CmmStmt -> Bool +okToInline expr CmmCall{} = hasNoGlobalRegs expr +okToInline _ _ = True + +-- Expressions aren't side-effecting.  Temporaries may or may not +-- be single-assignment depending on the source (the old code +-- generator creates single-assignment code, but hand-written Cmm +-- and Cmm from the new code generator is not single-assignment.) +-- So we do an extra check to make sure that the register being +-- changed is not one we were relying on.  I don't know how much of a +-- performance hit this is (we have to create a regset for every +-- instruction.) -- EZY +okToSkip stmt u expr regset +   = case stmt of +         CmmNop -> True +         CmmComment{} -> True +         CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True +         CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr) +         CmmStore _ _ -> not_a_load expr +         _other -> False +  where +    not_a_load (CmmMachOp _ args) = all not_a_load args +    not_a_load (CmmLoad _ _) = False +    not_a_load _ = True  inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt  inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e) diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 075ed22ea9..f46d49e022 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -230,35 +230,31 @@ lits	:: { [ExtFCode CmmExpr] }  cmmproc :: { ExtCode }  -- TODO: add real SRT/info tables to parsed Cmm -	: info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' -		{ do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <- +        : info maybe_formals_without_hints '{' body '}' +                { do ((entry_ret_label, info, live, formals), stmts) <-  		       getCgStmtsEC' $ loopDecls $ do {  		         (entry_ret_label, info, live) <- $1;  		         formals <- sequence $2; -		         gc_block <- $3; -		         frame <- $4; -		         $6; -		         return (entry_ret_label, info, live, formals, gc_block, frame) } +                         $4; +                         return (entry_ret_label, info, live, formals) }  		     blks <- code (cgStmtsToBlocks stmts) -		     code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) } +                     code (emitInfoTableAndCode entry_ret_label info formals blks) }  	| info maybe_formals_without_hints ';'  		{ do (entry_ret_label, info, live) <- $1;  		     formals <- sequence $2; -		     code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) } +                     code (emitInfoTableAndCode entry_ret_label info formals []) } -	| NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' +        | NAME maybe_formals_without_hints '{' body '}'  		{% withThisPackage $ \pkg ->  		   do	newFunctionName $1 pkg -		   	((formals, gc_block, frame), stmts) <- +                        (formals, stmts) <-  			 	getCgStmtsEC' $ loopDecls $ do {  		          		formals <- sequence $2; -		          		gc_block <- $3; -			  		frame <- $4; -		          		$6; -		          		return (formals, gc_block, frame) } +                                        $4; +                                        return formals }  			blks <- code (cgStmtsToBlocks stmts) -			code (emitProc (CmmInfo gc_block frame CmmNonInfoTable) (mkCmmCodeLabel pkg $1) formals blks) } +                        code (emitProc CmmNonInfoTable (mkCmmCodeLabel pkg $1) formals blks) }  info	:: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) }  	: 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' @@ -599,18 +595,7 @@ formals_without_hints :: { [ExtFCode LocalReg] }  formal_without_hint :: { ExtFCode LocalReg }  	: type NAME		{ newLocal $1 $2 } -maybe_frame :: { ExtFCode (Maybe UpdateFrame) } -	: {- empty -}			{ return Nothing } -	| 'jump' expr '(' exprs0 ')'	{ do { target <- $2; -					       args <- sequence $4; -					       return $ Just (UpdateFrame target args) } } - -maybe_gc_block :: { ExtFCode (Maybe BlockId) } -	: {- empty -}			{ return Nothing } -	| 'goto' NAME -		{ do l <- lookupLabel $2; return (Just l) } - -type	:: { CmmType } +type    :: { CmmType }  	: 'bits8'		{ b8 }  	| typenot8		{ $1 } @@ -1073,7 +1058,8 @@ parseCmmFile dflags filename = do          let msg = mkPlainErrMsg dflags span err          return ((emptyBag, unitBag msg), Nothing)      POk pst code -> do -        cmm <- initC dflags no_module (getCmm (unEC code initEnv [] >> return ())) +        st <- initC +        let (cmm,_) = runC dflags no_module st (getCmm (unEC code initEnv [] >> return ()))          let ms = getMessages pst          if (errorsFound dflags ms)           then return (ms, Nothing) diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index 7361bbf385..adc27ab1ff 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -11,15 +11,17 @@ module CmmPipeline (  import CLabel  import Cmm +import CmmLint  import CmmLive  import CmmBuildInfoTables  import CmmCommonBlockElim  import CmmProcPoint -import CmmSpillReload  import CmmRewriteAssignments -import CmmStackLayout  import CmmContFlowOpt  import OptimizationFuel +import CmmLayoutStack +import Hoopl +import CmmUtils  import DynFlags  import ErrUtils @@ -28,6 +30,8 @@ import Data.Maybe  import Control.Monad  import Data.Map (Map)  import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set  import Outputable  import StaticFlags @@ -53,32 +57,28 @@ import StaticFlags  --    we actually need to do the initial pass.  cmmPipeline  :: HscEnv -- Compilation env including                         -- dynamic flags: -dcmm-lint -ddump-cps-cmm -             -> (TopSRT, [CmmGroup])    -- SRT table and accumulating list of compiled procs +             -> TopSRT     -- SRT table and accumulating list of compiled procs               -> CmmGroup             -- Input C-- with Procedures -             -> IO (TopSRT, [CmmGroup]) -- Output CPS transformed C-- -cmmPipeline hsc_env (topSRT, rst) prog = +             -> IO (TopSRT, CmmGroup) -- Output CPS transformed C-- +cmmPipeline hsc_env topSRT prog =    do let dflags = hsc_dflags hsc_env       --       showPass dflags "CPSZ" -     let tops = runCmmContFlowOpts prog -     (cafEnvs, tops) <- liftM unzip $ mapM (cpsTop hsc_env) tops +     (cafEnvs, tops) <- {-# SCC "tops" #-} liftM unzip $ mapM (cpsTop hsc_env) prog       -- tops :: [[(CmmDecl,CAFSet]]  (one list per group) -     let topCAFEnv = mkTopCAFInfo (concat cafEnvs) +     let topCAFEnv = {-# SCC "topCAFEnv" #-} mkTopCAFInfo (concat cafEnvs)       -- folding over the groups -     (topSRT, tops) <- foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops +     (topSRT, tops) <- {-# SCC "toTops" #-} foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops       let cmms :: CmmGroup           cmms = reverse (concat tops)       dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr cmms) -     -- SRT is not affected by control flow optimization pass -     let prog' = runCmmContFlowOpts cmms - -     return (topSRT, prog' : rst) +     return (topSRT, cmms)  {- [Note global fuel]  ~~~~~~~~~~~~~~~~~~~~~ @@ -92,75 +92,63 @@ global to one compiler session.  -- -ddump-cmmz  cpsTop :: HscEnv -> CmmDecl -> IO ([(CLabel, CAFSet)], [(CAFSet, CmmDecl)]) -cpsTop _ p@(CmmData {}) = return ([], [(Map.empty, p)]) +cpsTop _ p@(CmmData {}) = return ([], [(Set.empty, p)])  cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) =      do -       -- Why bother doing these early: dualLivenessWithInsertion, -       -- insertLateReloads, rewriteAssignments? +       ----------- Control-flow optimisations --------------- +       g <- {-# SCC "cmmCfgOpts(1)" #-} return $ cmmCfgOpts g +       dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g         ----------- Eliminate common blocks ------------------- -       g <- return $ elimCommonBlocks g +       g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g         dump Opt_D_dump_cmmz_cbe "Post common block elimination" g -       -- Any work storing block Labels must be performed _after_ elimCommonBlocks +       -- Any work storing block Labels must be performed _after_ +       -- elimCommonBlocks         ----------- Proc points ------------------- -       let callPPs = callProcPoints g -       procPoints <- run $ minimalProcPointSet (targetPlatform dflags) callPPs g -       g <- run $ addProcPointProtocols callPPs procPoints g -       dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g - -       ----------- Spills and reloads ------------------- -       g <- run $ dualLivenessWithInsertion procPoints g -       dump Opt_D_dump_cmmz_spills "Post spills and reloads" g - -       ----------- Sink and inline assignments ------------------- -       g <- runOptimization $ rewriteAssignments platform g -       dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g - -       ----------- Eliminate dead assignments ------------------- -       g <- runOptimization $ removeDeadAssignments g -       dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g - -       ----------- Zero dead stack slots (Debug only) --------------- -       -- Debugging: stubbing slots on death can cause crashes early -       g <- if opt_StubDeadValues -                then run $ stubSlotsOnDeath g -                else return g -       dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g - -       --------------- Stack layout ---------------- -       slotEnv <- run $ liveSlotAnal g -       let spEntryMap = getSpEntryMap entry_off g -       mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return () -       let areaMap = layout procPoints spEntryMap slotEnv entry_off g -       mbpprTrace "areaMap" (ppr areaMap) $ return () - -       ------------  Manifest the stack pointer -------- -       g  <- run $ manifestSP spEntryMap areaMap entry_off g -       dump Opt_D_dump_cmmz_sp "Post manifestSP" g -       -- UGH... manifestSP can require updates to the procPointMap. -       -- We can probably do something quicker here for the update... +       let callPPs = {-# SCC "callProcPoints" #-} callProcPoints g +       procPoints <- {-# SCC "minimalProcPointSet" #-} run $ +                     minimalProcPointSet (targetPlatform dflags) callPPs g + +       ----------- Layout the stack and manifest Sp --------------- +       -- (also does: removeDeadAssignments, and lowerSafeForeignCalls) +       (g, stackmaps) <- {-# SCC "layoutStack" #-} +                         run $ cmmLayoutStack procPoints entry_off g +       dump Opt_D_dump_cmmz_sp "Layout Stack" g + +       g <- {-# SCC "sink" #-} run $ cmmSink g +       dump Opt_D_dump_cmmz_rewrite "Sink assignments" g + +--       ----------- Sink and inline assignments ------------------- +--       g <- {-# SCC "rewriteAssignments" #-} runOptimization $ +--            rewriteAssignments platform g +--       dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g         ------------- Split into separate procedures ------------ -       procPointMap  <- run $ procPointAnalysis procPoints g -       dump Opt_D_dump_cmmz_procmap "procpoint map" procPointMap -       gs <- run $ splitAtProcPoints l callPPs procPoints procPointMap -                                       (CmmProc h l g) -       mapM_ (dump Opt_D_dump_cmmz_split "Post splitting") gs - -       ------------- More CAFs and foreign calls ------------ -       cafEnv <- run $ cafAnal g -       let localCAFs = catMaybes $ map (localCAFInfo cafEnv) gs +       procPointMap  <- {-# SCC "procPointAnalysis" #-} run $ +                        procPointAnalysis procPoints g +       dumpWith dflags Opt_D_dump_cmmz_procmap "procpoint map" procPointMap +       gs <- {-# SCC "splitAtProcPoints" #-} run $ +             splitAtProcPoints l callPPs procPoints procPointMap (CmmProc h l g) +       dumps Opt_D_dump_cmmz_split "Post splitting" gs + +       ------------- More CAFs ------------------------------ +       let cafEnv = {-# SCC "cafAnal" #-} cafAnal platform g +       let localCAFs = {-# SCC "localCAFs" #-} catMaybes $ map (localCAFInfo platform cafEnv) gs         mbpprTrace "localCAFs" (ppr localCAFs) $ return () -       gs <- run $ mapM (lowerSafeForeignCalls areaMap) gs -       mapM_ (dump Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls") gs -         -- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES -       gs <- return $ map (setInfoTableStackMap slotEnv areaMap) gs -       mapM_ (dump Opt_D_dump_cmmz_info "after setInfoTableStackMap") gs -       gs <- return $ map (bundleCAFs cafEnv) gs -       mapM_ (dump Opt_D_dump_cmmz_cafs "after bundleCAFs") gs +       gs <- {-# SCC "setInfoTableStackMap" #-} +             return $ map (setInfoTableStackMap stackmaps) gs +       dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs + +       ----------- Control-flow optimisations --------------- +       gs <- {-# SCC "cmmCfgOpts(2)" #-} return $ map cmmCfgOptsProc gs +       dumps Opt_D_dump_cmmz_cfg "Post control-flow optimsations" gs + +       gs <- {-# SCC "bundleCAFs" #-} return $ map (bundleCAFs cafEnv) gs +       dumps Opt_D_dump_cmmz_cafs "after bundleCAFs" gs +         return (localCAFs, gs)                -- gs        :: [ (CAFSet, CmmDecl) ] @@ -168,21 +156,40 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})    where dflags = hsc_dflags hsc_env          platform = targetPlatform dflags -        mbpprTrace x y z = if dopt Opt_D_dump_cmmz dflags then pprTrace x y z else z -        dump f = dumpWith ppr f -        dumpWith pprFun f txt g = do -            -- ToDo: No easy way of say "dump all the cmmz, *and* split -            -- them into files."  Also, -ddump-cmmz doesn't play nicely -            -- with -ddump-to-file, since the headers get omitted. -            dumpIfSet_dyn dflags f txt (pprFun g) -            when (not (dopt f dflags)) $ -                dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (pprFun g) +        mbpprTrace x y z | dopt Opt_D_dump_cmmz dflags = pprTrace x y z +                         | otherwise = z +        dump = dumpGraph dflags + +        dumps flag name +           = mapM_ (dumpWith dflags flag name) +          -- Runs a required transformation/analysis          run = runInfiniteFuelIO (hsc_OptFuel hsc_env)          -- Runs an optional transformation/analysis (and should          -- thus be subject to optimization fuel)          runOptimization = runFuelIO (hsc_OptFuel hsc_env) + +dumpGraph :: DynFlags -> DynFlag -> String -> CmmGraph -> IO () +dumpGraph dflags flag name g = do +  when (dopt Opt_DoCmmLinting dflags) $ do_lint g +  dumpWith dflags flag name g + where +  do_lint g = case cmmLintGraph (targetPlatform dflags) g of +                 Just err -> do { printDump err +                                ; ghcExit dflags 1 +                                } +                 Nothing  -> return () + +dumpWith :: Outputable a => DynFlags -> DynFlag -> String -> a -> IO () +dumpWith dflags flag txt g = do +         -- ToDo: No easy way of say "dump all the cmmz, *and* split +         -- them into files."  Also, -ddump-cmmz doesn't play nicely +         -- with -ddump-to-file, since the headers get omitted. +   dumpIfSet_dyn dflags flag txt (ppr g) +   when (not (dopt flag dflags)) $ +      dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (ppr g) +  -- This probably belongs in CmmBuildInfoTables?  -- We're just finishing the job here: once we know what CAFs are defined  -- in non-static closures, we can build the SRTs. diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index f50d850b3a..8dda51b9b7 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -4,7 +4,7 @@  module CmmProcPoint      ( ProcPointSet, Status(..)      , callProcPoints, minimalProcPointSet -    , addProcPointProtocols, splitAtProcPoints, procPointAnalysis +    , splitAtProcPoints, procPointAnalysis      )  where @@ -28,7 +28,7 @@ import Platform  import UniqSet  import UniqSupply -import Compiler.Hoopl +import Hoopl  import qualified Data.Map as Map @@ -103,34 +103,50 @@ instance Outputable Status where                      (hsep $ punctuate comma $ map ppr $ setElems ps)    ppr ProcPoint = text "<procpt>" -lattice :: DataflowLattice Status -lattice = DataflowLattice "direct proc-point reachability" unreached add_to -    where unreached = ReachedBy setEmpty -          add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint) -          add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint) -- because of previous case -          add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) = -              let union = setUnion p' p -              in  if setSize union > setSize p then (SomeChange, ReachedBy union) -                                               else (NoChange, ReachedBy p)  -------------------------------------------------- +-- Proc point analysis + +procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelUniqSM (BlockEnv Status) +-- Once you know what the proc-points are, figure out +-- what proc-points each block is reachable from +procPointAnalysis procPoints g = +  -- pprTrace "procPointAnalysis" (ppr procPoints) $ +  dataflowAnalFwdBlocks g initProcPoints $ analFwd lattice forward +  where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints] +  -- transfer equations  forward :: FwdTransfer CmmNode Status -forward = mkFTransfer3 first middle ((mkFactBase lattice . ) . last) -    where first :: CmmNode C O -> Status -> Status -          first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id -          first  _ x = x +forward = mkFTransfer3 first middle last +    where +      first :: CmmNode C O -> Status -> Status +      first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id +      first  _ x = x -          middle _ x = x +      middle _ x = x -          last :: CmmNode O C -> Status -> [(Label, Status)] -          last (CmmCall {cml_cont = Just k}) _ = [(k, ProcPoint)] -          last (CmmForeignCall {succ = k})   _ = [(k, ProcPoint)] -          last l x = map (\id -> (id, x)) (successors l) +      last :: CmmNode O C -> Status -> FactBase Status +      last l x = mkFactBase lattice $ map (\id -> (id, x)) (successors l) --- It is worth distinguishing two sets of proc points: --- those that are induced by calls in the original graph --- and those that are introduced because they're reachable from multiple proc points. +lattice :: DataflowLattice Status +lattice = DataflowLattice "direct proc-point reachability" unreached add_to +    where unreached = ReachedBy setEmpty +          add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint) +          add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint) +                       -- because of previous case +          add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) +             | setSize union > setSize p = (SomeChange, ReachedBy union) +             | otherwise                 = (NoChange, ReachedBy p) +           where +             union = setUnion p' p + +---------------------------------------------------------------------- + +-- It is worth distinguishing two sets of proc points: those that are +-- induced by calls in the original graph and those that are +-- introduced because they're reachable from multiple proc points. +-- +-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds].  callProcPoints      :: CmmGraph -> ProcPointSet  callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g    where add :: CmmBlock -> BlockSet -> BlockSet @@ -139,21 +155,17 @@ callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g                        CmmForeignCall {succ=k}     -> setInsert k set                        _ -> set -minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph -> FuelUniqSM ProcPointSet +minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph +                    -> FuelUniqSM ProcPointSet  -- Given the set of successors of calls (which must be proc-points)  -- figure out the minimal set of necessary proc-points -minimalProcPointSet platform callProcPoints g = extendPPSet platform g (postorderDfs g) callProcPoints - -procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelUniqSM (BlockEnv Status) --- Once you know what the proc-points are, figure out --- what proc-points each block is reachable from -procPointAnalysis procPoints g = -  liftM snd $ dataflowPassFwd g initProcPoints $ analFwd lattice forward -  where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints] +minimalProcPointSet platform callProcPoints g +  = extendPPSet platform g (postorderDfs g) callProcPoints  extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelUniqSM ProcPointSet  extendPPSet platform g blocks procPoints =      do env <- procPointAnalysis procPoints g +       -- pprTrace "extensPPSet" (ppr env) $ return ()         let add block pps = let id = entryLabel block                             in  case mapLookup id env of                                   Just ProcPoint -> setInsert id pps @@ -179,183 +191,13 @@ extendPPSet platform g blocks procPoints =             pps -> extendPPSet g blocks                      (foldl extendBlockSet procPoints' pps)  -} -       case newPoint of Just id -> -                          if setMember id procPoints' then panic "added old proc pt" -                          else extendPPSet platform g blocks (setInsert id procPoints') -                        Nothing -> return procPoints' - - ------------------------------------------------------------------------- ---                    Computing Proc-Point Protocols                  -- ------------------------------------------------------------------------- +       case newPoint of +         Just id -> +             if setMember id procPoints' +                then panic "added old proc pt" +                else extendPPSet platform g blocks (setInsert id procPoints') +         Nothing -> return procPoints' -{- - -There is one major trick, discovered by Michael Adams, which is that -we want to choose protocols in a way that enables us to optimize away -some continuations.  The optimization is very much like branch-chain -elimination, except that it involves passing results as well as -control.  The idea is that if a call's continuation k does nothing but -CopyIn its results and then goto proc point P, the call's continuation -may be changed to P, *provided* P's protocol is identical to the -protocol for the CopyIn.  We choose protocols to make this so. - -Here's an explanatory example; we begin with the source code (lines -separate basic blocks): - -  ..1..; -  x, y = g(); -  goto P; -  ------- -  P: ..2..; - -Zipperization converts this code as follows: - -  ..1..; -  call g() returns to k; -  ------- -  k: CopyIn(x, y); -     goto P; -  ------- -  P: ..2..; - -What we'd like to do is assign P the same CopyIn protocol as k, so we -can eliminate k: - -  ..1..; -  call g() returns to P; -  ------- -  P: CopyIn(x, y); ..2..; - -Of course, P may be the target of more than one continuation, and -different continuations may have different protocols.  Michael Adams -implemented a voting mechanism, but he thinks a simple greedy -algorithm would be just as good, so that's what we do. - --} - -data Protocol = Protocol Convention [CmmFormal] Area -  deriving Eq -instance Outputable Protocol where -  ppr (Protocol c fs a) = text "Protocol" <+> ppr c <+> ppr fs <+> ppr a - --- | Function 'optimize_calls' chooses protocols only for those proc --- points that are relevant to the optimization explained above. --- The others are assigned by 'add_unassigned', which is not yet clever. - -addProcPointProtocols :: ProcPointSet -> ProcPointSet -> CmmGraph -> FuelUniqSM CmmGraph -addProcPointProtocols callPPs procPoints g = -  do liveness <- cmmLiveness g -     (protos, g') <- optimize_calls liveness g -     blocks'' <- add_CopyOuts protos procPoints g' -     return $ ofBlockMap (g_entry g) blocks'' -    where optimize_calls liveness g =  -- see Note [Separate Adams optimization] -            do let (protos, blocks') = -                       foldGraphBlocks maybe_add_call (mapEmpty, mapEmpty) g -                   protos' = add_unassigned liveness procPoints protos -               let g' = ofBlockMap (g_entry g) (add_CopyIns callPPs protos' blocks') -               return (protos', removeUnreachableBlocks g') -          maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock) -                         -> (BlockEnv Protocol, BlockEnv CmmBlock) -          -- ^ If the block is a call whose continuation goes to a proc point -          -- whose protocol either matches the continuation's or is not yet set, -          -- redirect the call (cf 'newblock') and set the protocol if necessary -          maybe_add_call block (protos, blocks) = -              case lastNode block of -                CmmCall tgt (Just k) args res s -                    | Just proto <- mapLookup k protos, -                      Just pee   <- branchesToProcPoint k -                    -> let newblock = replaceLastNode block (CmmCall tgt (Just pee) -                                                                     args res s) -                           changed_blocks   = insertBlock newblock blocks -                           unchanged_blocks = insertBlock block    blocks -                       in case mapLookup pee protos of -                            Nothing -> (mapInsert pee proto protos, changed_blocks) -                            Just proto' -> -                              if proto == proto' then (protos, changed_blocks) -                              else (protos, unchanged_blocks) -                _ -> (protos, insertBlock block blocks) - -          branchesToProcPoint :: BlockId -> Maybe BlockId -          -- ^ Tells whether the named block is just a branch to a proc point -          branchesToProcPoint id = -              let block = mapLookup id (toBlockMap g) `orElse` -                                    panic "branch out of graph" -              in case blockToNodeList block of -                   (_, [], JustC (CmmBranch pee)) | setMember pee procPoints -> Just pee -                   _                                                         -> Nothing - --- | For now, following a suggestion by Ben Lippmeier, we pass all --- live variables as arguments, hoping that a clever register --- allocator might help. - -add_unassigned :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol -> -                  BlockEnv Protocol -add_unassigned = pass_live_vars_as_args - -pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet -> -                          BlockEnv Protocol -> BlockEnv Protocol -pass_live_vars_as_args _liveness procPoints protos = protos' -  where protos' = setFold addLiveVars protos procPoints -        addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol -        addLiveVars id protos = -            case mapLookup id protos of -              Just _  -> protos -              Nothing -> let live = emptyRegSet -                                    --lookupBlockEnv _liveness id `orElse` -                                    --panic ("no liveness at block " ++ show id) -                             formals = uniqSetToList live -                             prot = Protocol Private formals $ CallArea $ Young id -                         in  mapInsert id prot protos - - --- | Add copy-in instructions to each proc point that did not arise from a call --- instruction. (Proc-points that arise from calls already have their copy-in instructions.) - -add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock -add_CopyIns callPPs protos blocks = mapFold maybe_insert_CopyIns mapEmpty blocks -    where maybe_insert_CopyIns block blocks -             | not $ setMember bid callPPs -             , Just (Protocol c fs _area) <- mapLookup bid protos -             = let nodes     = copyInSlot c fs -                   (h, m, l) = blockToNodeList block -               in insertBlock (blockOfNodeList (h, nodes ++ m, l)) blocks -             | otherwise = insertBlock block blocks -           where bid = entryLabel block - - --- | Add a CopyOut node before each procpoint. --- If the predecessor is a call, then the copy outs should already be done by the callee. --- Note: If we need to add copy-out instructions, they may require stack space, --- so we accumulate a map from the successors to the necessary stack space, --- then update the successors after we have finished inserting the copy-outs. - -add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph -> -                FuelUniqSM (BlockEnv CmmBlock) -add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty) g -    where mb_copy_out :: CmmBlock -> FuelUniqSM (BlockEnv CmmBlock) -> -                                     FuelUniqSM (BlockEnv CmmBlock) -          mb_copy_out b z | entryLabel b == g_entry g = skip b z -          mb_copy_out b z = -            case lastNode b of -              CmmCall {}        -> skip b z -- copy out done by callee -              CmmForeignCall {} -> skip b z -- copy out done by callee -              _ -> copy_out b z -          copy_out b z = foldr trySucc init (successors b) >>= finish -            where init = (\bmap -> (b, bmap)) `liftM` z -                  trySucc succId z = -                    if setMember succId procPoints then -                      case mapLookup succId protos of -                        Nothing -> z -                        Just (Protocol c fs _area) -> insert z succId $ copyOutSlot c fs -                    else z -                  insert z succId m = -                    do (b, bmap) <- z -                       (b, bs)   <- insertBetween b m succId -                       -- pprTrace "insert for succ" (ppr succId <> ppr m) $ do -                       return $ (b, foldl (flip insertBlock) bmap bs) -                  finish (b, bmap) = return $ insertBlock b bmap -          skip b bs = insertBlock b `liftM` bs  -- At this point, we have found a set of procpoints, each of which should be  -- the entry point of a procedure. @@ -384,15 +226,18 @@ splitAtProcPoints entry_label callPPs procPoints procMap                   []   -> graphEnv                   [id] -> add graphEnv id bid b                    _    -> panic "Each block should be reachable from only one ProcPoint" -             Nothing -> pprPanic "block not reached by a proc point?" (ppr bid) +             Nothing -> graphEnv             where bid = entryLabel b           add graphEnv procId bid b = mapInsert procId graph' graphEnv                 where graph  = mapLookup procId graphEnv `orElse` mapEmpty                       graph' = mapInsert bid b graph +       graphEnv <- return $ foldGraphBlocks addBlock emptyBlockMap g +       -- Build a map from proc point BlockId to pairs of:       --  * Labels for their new procedures -     --  * Labels for the info tables of their new procedures (only if the proc point is a callPP) +     --  * Labels for the info tables of their new procedures (only if +     --    the proc point is a callPP)       -- Due to common blockification, we may overestimate the set of procpoints.       let add_label map pp = Map.insert pp lbls map             where lbls | pp == entry = (entry_label, Just entry_info_lbl) @@ -401,30 +246,15 @@ splitAtProcPoints entry_label callPPs procPoints procMap                   entry_info_lbl = cit_lbl info_tbl           procLabels = foldl add_label Map.empty                              (filter (flip mapMember (toBlockMap g)) (setElems procPoints)) -     -- For each procpoint, we need to know the SP offset on entry. -     -- If the procpoint is: -     --  - continuation of a call, the SP offset is in the call -     --  - otherwise, 0 (and left out of the spEntryMap) -     let add_sp_off :: CmmBlock -> BlockEnv CmmStackInfo -> BlockEnv CmmStackInfo -         add_sp_off b env = -           case lastNode b of -             CmmCall {cml_cont = Just succ, cml_ret_args = off, cml_ret_off = updfr_off} -> -               mapInsert succ (StackInfo { arg_space = off, updfr_space = Just updfr_off}) env -             CmmForeignCall {succ = succ, updfr = updfr_off} -> -               mapInsert succ (StackInfo { arg_space = wORD_SIZE, updfr_space = Just updfr_off}) env -             _ -> env -         spEntryMap = foldGraphBlocks add_sp_off (mapInsert entry stack_info emptyBlockMap) g -         getStackInfo id = mapLookup id spEntryMap `orElse` StackInfo {arg_space = 0, updfr_space = Nothing}       -- In each new graph, add blocks jumping off to the new procedures,       -- and replace branches to procpoints with branches to the jump-off blocks       let add_jump_block (env, bs) (pp, l) =             do bid <- liftM mkBlockId getUniqueM -              let b = blockOfNodeList (JustC (CmmEntry bid), [], JustC jump) -                  StackInfo {arg_space = argSpace, updfr_space = off} = getStackInfo pp -                  jump = CmmCall (CmmLit (CmmLabel l)) Nothing argSpace 0 -                                 (off `orElse` 0) -- Jump's shouldn't need the offset... +              let b = blockJoin (CmmEntry bid) emptyBlock jump +                  jump = CmmCall (CmmLit (CmmLabel l)) Nothing 0 0 0                return (mapInsert pp bid env, b : bs) -         add_jumps (newGraphEnv) (ppId, blockEnv) = + +         add_jumps newGraphEnv (ppId, blockEnv) =             do let needed_jumps = -- find which procpoints we currently branch to                      mapFold add_if_branch_to_pp [] blockEnv                    add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)] @@ -441,17 +271,16 @@ splitAtProcPoints entry_label callPPs procPoints procMap                   foldM add_jump_block (mapEmpty, []) needed_jumps                    -- update the entry block                let b = expectJust "block in env" $ mapLookup ppId blockEnv -                  off = getStackInfo ppId                    blockEnv' = mapInsert ppId b blockEnv                    -- replace branches to procpoints with branches to jumps                    blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv'                    -- add the jump blocks to the graph                    blockEnv''' = foldl (flip insertBlock) blockEnv'' jumpBlocks -              let g' = (off, ofBlockMap ppId blockEnv''') +              let g' = ofBlockMap ppId blockEnv'''                -- pprTrace "g' pre jumps" (ppr g') $ do                return (mapInsert ppId g' newGraphEnv)       graphEnv <- foldM add_jumps emptyBlockMap $ mapToList graphEnv -     let to_proc (bid, (stack_info, g)) = case expectJust "pp label" $ Map.lookup bid procLabels of +     let to_proc (bid, g) = case expectJust "pp label" $ Map.lookup bid procLabels of               (lbl, Just info_lbl)                 | bid == entry                 -> CmmProc (TopInfo {info_tbl=info_tbl, stack_info=stack_info}) @@ -462,15 +291,22 @@ splitAtProcPoints entry_label callPPs procPoints procMap               (lbl, Nothing)                 -> CmmProc (TopInfo {info_tbl=CmmNonInfoTable, stack_info=stack_info})                            lbl (replacePPIds g) -         -- References to procpoint IDs can now be replaced with the infotable's label -         replacePPIds g = mapGraphNodes (id, mapExp repl, mapExp repl) g +            where +             stack_info = panic "No StackInfo" + +         -- References to procpoint IDs can now be replaced with the +         -- infotable's label +         replacePPIds g = {-# SCC "replacePPIds" #-} +                          mapGraphNodes (id, mapExp repl, mapExp repl) g             where repl e@(CmmLit (CmmBlock bid)) =                     case Map.lookup bid procLabels of                       Just (_, Just info_lbl)  -> CmmLit (CmmLabel info_lbl)                       _ -> e                   repl e = e -     -- The C back end expects to see return continuations before the call sites. -     -- Here, we sort them in reverse order -- it gets reversed later. + +     -- The C back end expects to see return continuations before the +     -- call sites.  Here, we sort them in reverse order -- it gets +     -- reversed later.       let (_, block_order) = foldl add_block_num (0::Int, emptyBlockMap) (postorderDfs g)           add_block_num (i, map) block = (i+1, mapInsert (entryLabel block) i map)           sort_fn (bid, _) (bid', _) = @@ -482,6 +318,27 @@ splitAtProcPoints entry_label callPPs procPoints procMap              procs  splitAtProcPoints _ _ _ _ t@(CmmData _ _) = return [t] + +-- Only called from CmmProcPoint.splitAtProcPoints. NB. does a +-- recursive lookup, see comment below. +replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph +replaceBranches env cmmg +  = {-# SCC "replaceBranches" #-} +    ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg +  where +    f block = replaceLastNode block $ last (lastNode block) + +    last :: CmmNode O C -> CmmNode O C +    last (CmmBranch id)          = CmmBranch (lookup id) +    last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi) +    last (CmmSwitch e tbl)       = CmmSwitch e (map (fmap lookup) tbl) +    last l@(CmmCall {})          = l +    last l@(CmmForeignCall {})   = l +    lookup id = fmap lookup (mapLookup id env) `orElse` id +            -- XXX: this is a recursive lookup, it follows chains +            -- until the lookup returns Nothing, at which point we +            -- return the last BlockId +  ----------------------------------------------------------------  {- diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs index ecf3f7e0c3..2c33b7b5ac 100644 --- a/compiler/cmm/CmmRewriteAssignments.hs +++ b/compiler/cmm/CmmRewriteAssignments.hs @@ -27,7 +27,7 @@ import UniqFM  import Unique  import BlockId -import Compiler.Hoopl hiding (Unique) +import Hoopl  import Data.Maybe  import Prelude hiding (succ, zip) @@ -404,8 +404,8 @@ clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False  -- ToDo: Also catch MachOp case  clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)      | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?) -clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr -    where f (CmmLoad (CmmStackSlot (CallArea a') o') t) +clobbers (CmmStackSlot a o, rhs) (_, expr) = f expr +    where f (CmmLoad (CmmStackSlot a' o') t)              = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))            f (CmmLoad e _)    = containsStackSlot e            f (CmmMachOp _ es) = or (map f es) @@ -416,9 +416,6 @@ clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr            containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)            containsStackSlot (CmmStackSlot{}) = True            containsStackSlot _ = False -clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr -    where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l' -          f _ = False  clobbers _ (_, e) = f e      where f (CmmLoad (CmmStackSlot _ _) _) = False            f (CmmLoad{}) = True -- conservative @@ -432,7 +429,7 @@ clobbers _ (_, e) = f e  --      [ I32  ]  --      [    F64     ]  --      s'   -w'-    o' -type CallSubArea = (AreaId, Int, Int) -- area, offset, width +type CallSubArea = (Area, Int, Int) -- area, offset, width  overlaps :: CallSubArea -> CallSubArea -> Bool  overlaps (a, _, _) (a', _, _) | a /= a' = False  overlaps (_, o, w) (_, o', w') = @@ -457,7 +454,7 @@ invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap  invalidateVolatile k m = mapUFM p m    where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize              where exp CmmLit{} = True -                  exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _) +                  exp (CmmLoad (CmmStackSlot (Young k') _) _)                      | k' == k = False                    exp (CmmLoad (CmmStackSlot _ _) _) = True                    exp (CmmMachOp _ es) = and (map exp es) @@ -596,10 +593,6 @@ assignmentRewrite = mkFRewrite3 first middle last                            where rep = typeWidth (localRegType r)                _ -> old          -- See Note [Soundness of store rewriting] -        inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _) -          = case lookupUFM assign r of -              Just (AlwaysInline x) -> x -              _ -> old          inlineExp _ old = old          inlinable :: CmmNode e x -> Bool diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs deleted file mode 100644 index 9e762fe48a..0000000000 --- a/compiler/cmm/CmmSpillReload.hs +++ /dev/null @@ -1,166 +0,0 @@ -{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts #-} --- Norman likes local bindings --- If this module lives on I'd like to get rid of this flag in due course - -{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} - --- TODO: Get rid of this flag: -{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} - -module CmmSpillReload -  ( dualLivenessWithInsertion -  ) -where - -import BlockId -import Cmm -import CmmUtils -import CmmLive -import OptimizationFuel - -import Control.Monad -import Outputable hiding (empty) -import qualified Outputable as PP -import UniqSet - -import Compiler.Hoopl hiding (Unique) -import Data.Maybe -import Prelude hiding (succ, zip) - -{- Note [Overview of spill/reload] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The point of this module is to insert spills and reloads to establish -the invariant that at a call or any proc point with an established -protocol all live variables not expected in registers are sitting on the -stack.  We use a backward dual liveness analysis (both traditional -register liveness as well as register slot liveness on the stack) to -insert spills and reloads.  It should be followed by a forward -transformation to sink reloads as deeply as possible, so as to reduce -register pressure: this transformation is performed by -CmmRewriteAssignments. - -A variable can be expected to be live in a register, live on the -stack, or both.  This analysis ensures that spills and reloads are -inserted as needed to make sure that every live variable needed -after a call is available on the stack.  Spills are placed immediately -after their reaching definitions, but reloads are placed immediately -after a return from a call (the entry point.) - -Note that we offer no guarantees about the consistency of the value -in memory and the value in the register, except that they are -equal across calls/procpoints.  If the variable is changed, this -mapping breaks: but as the original value of the register may still -be useful in a different context, the memory location is not updated. --} - -data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet } - -changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive -changeStack f live = live { on_stack = f (on_stack live) } -changeRegs  f live = live { in_regs  = f (in_regs  live) } - -dualLiveLattice :: DataflowLattice DualLive -dualLiveLattice = DataflowLattice "variables live in registers and on stack" empty add -    where empty = DualLive emptyRegSet emptyRegSet -          add _ (OldFact old) (NewFact new) = (changeIf $ change1 || change2, DualLive stack regs) -            where (change1, stack) = add1 (on_stack old) (on_stack new) -                  (change2, regs)  = add1 (in_regs old)  (in_regs new) -          add1 old new = if sizeUniqSet join > sizeUniqSet old then (True, join) else (False, old) -            where join = unionUniqSets old new - -dualLivenessWithInsertion :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph -dualLivenessWithInsertion procPoints g = -  liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice -                                                (dualLiveTransfers (g_entry g) procPoints) -                                                (insertSpillsAndReloads g procPoints) - --- Note [Live registers on entry to procpoints] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- Remember that the transfer function is only ever run on the rewritten --- version of a graph, and the rewrite function for spills and reloads --- enforces the invariant that no local registers are live on entry to --- a procpoint.  Accordingly, we check for this invariant here.  An old --- version of this code incorrectly claimed that any live registers were --- live on the stack before entering the function: this is wrong, but --- didn't cause bugs because it never actually was invoked. - -dualLiveTransfers :: BlockId -> BlockSet -> (BwdTransfer CmmNode DualLive) -dualLiveTransfers entry procPoints = mkBTransfer3 first middle last -    where first :: CmmNode C O -> DualLive -> DualLive -          first (CmmEntry id) live -- See Note [Live registers on entry to procpoints] -            | id == entry || setMember id procPoints = noLiveOnEntry id (in_regs live) live -            | otherwise                              = live - -          middle :: CmmNode O O -> DualLive -> DualLive -          middle m = changeStack updSlots -                   . changeRegs  updRegs -            where -- Reuse middle of liveness analysis from CmmLive -                  updRegs = case getBTransfer3 xferLive of (_, middle, _) -> middle m - -                  updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m -                  spill  live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r -                  spill  live _ = live -                  reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r -                  reload live _ = live -                  -- Ensure the assignment refers to the entirety of the -                  -- register slot (and not just a slice). -                  check (RegSlot (LocalReg _ ty), o, w) x -                     | o == w && w == widthInBytes (typeWidth ty) = x -                  check _ _ = panic "dualLiveTransfers: slices unsupported" - -          -- Register analysis is identical to liveness analysis from CmmLive. -          last :: CmmNode O C -> FactBase DualLive -> DualLive -          last l fb = changeRegs (gen_kill l) $ case l of -            CmmCall {cml_cont=Nothing} -> empty -            CmmCall {cml_cont=Just k}  -> keep_stack_only k -            CmmForeignCall {succ=k}    -> keep_stack_only k -            _                          -> joinOutFacts dualLiveLattice l fb -            where empty = fact_bot dualLiveLattice -                  lkp k = fromMaybe empty (lookupFact k fb) -                  keep_stack_only k = DualLive (on_stack (lkp k)) emptyRegSet - -insertSpillsAndReloads :: CmmGraph -> BlockSet -> CmmBwdRewrite DualLive -insertSpillsAndReloads graph procPoints = deepBwdRw3 first middle nothing -  -- Beware: deepBwdRw with one polymorphic function seems more reasonable here, -  -- but GHC miscompiles it, see bug #4044. -    where first :: CmmNode C O -> Fact O DualLive -> CmmReplGraph C O -          first e@(CmmEntry id) live = return $ -            if id /= (g_entry graph) && setMember id procPoints then -              case map reload (uniqSetToList (in_regs live)) of -                [] -> Nothing -                is -> Just $ mkFirst e <*> mkMiddles is -            else Nothing -          -- EZY: There was some dead code for handling the case where -          -- we were not splitting procedures.  Check Git history if -          -- you're interested (circa e26ea0f41). - -          middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O -          -- Don't add spills next to reloads. -          middle (CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) _ | reg == reg' = return Nothing -          -- Spill if register is live on stack. -          middle m@(CmmAssign (CmmLocal reg) _) live -            | reg `elemRegSet` on_stack live = return (Just (mkMiddles [m, spill reg])) -          middle _ _ = return Nothing - -          nothing _ _ = return Nothing - -spill, reload :: LocalReg -> CmmNode O O -spill  r = CmmStore  (regSlot r) (CmmReg $ CmmLocal r) -reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r) - ---------------------- --- prettyprinting - -ppr_regs :: String -> RegSet -> SDoc -ppr_regs s regs = text s <+> commafy (map ppr $ uniqSetToList regs) -  where commafy xs = hsep $ punctuate comma xs - -instance Outputable DualLive where -  ppr (DualLive {in_regs = regs, on_stack = stack}) = -      if isEmptyUniqSet regs && isEmptyUniqSet stack then -          text "<nothing-live>" -      else -          nest 2 $ fsep [if isEmptyUniqSet regs then PP.empty -                         else (ppr_regs "live in regs =" regs), -                         if isEmptyUniqSet stack then PP.empty -                         else (ppr_regs "live on stack =" stack)] diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 0756c87583..d831a8aba5 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -60,13 +60,14 @@ module CmmUtils(          -- * Operations that probably don't belong here          modifyGraph, -        lastNode, replaceLastNode, insertBetween, +        lastNode, replaceLastNode,          ofBlockMap, toBlockMap, insertBlock,          ofBlockList, toBlockList, bodyToBlockList,          foldGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1,          analFwd, analBwd, analRewFwd, analRewBwd, -        dataflowPassFwd, dataflowPassBwd +        dataflowPassFwd, dataflowPassBwd, dataflowAnalFwd, dataflowAnalBwd, +        dataflowAnalFwdBlocks    ) where  #include "HsVersions.h" @@ -89,7 +90,7 @@ import Data.Word  import Data.Maybe  import Data.Bits  import Control.Monad -import Compiler.Hoopl hiding ( Unique ) +import Hoopl  ---------------------------------------------------  -- @@ -402,13 +403,13 @@ mkLiveness (reg:regs)  modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n'  modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)} -toBlockMap :: CmmGraph -> LabelMap CmmBlock +toBlockMap :: CmmGraph -> BlockEnv CmmBlock  toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body -ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph +ofBlockMap :: BlockId -> BlockEnv CmmBlock -> CmmGraph  ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO} -insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock +insertBlock :: CmmBlock -> BlockEnv CmmBlock -> BlockEnv CmmBlock  insertBlock block map =    ASSERT (isNothing $ mapLookup id map)    mapInsert id block map @@ -418,7 +419,8 @@ toBlockList :: CmmGraph -> [CmmBlock]  toBlockList g = mapElems $ toBlockMap g  ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph -ofBlockList entry blocks = CmmGraph {g_entry=entry, g_graph=GMany NothingO body NothingO} +ofBlockList entry blocks = CmmGraph { g_entry = entry +                                    , g_graph = GMany NothingO body NothingO }    where body = foldr addBlock emptyBody blocks  bodyToBlockList :: Body CmmNode -> [CmmBlock] @@ -439,87 +441,67 @@ foldGraphBlocks :: (CmmBlock -> a -> a) -> a -> CmmGraph -> a  foldGraphBlocks k z g = mapFold k z $ toBlockMap g  postorderDfs :: CmmGraph -> [CmmBlock] -postorderDfs g = postorder_dfs_from (toBlockMap g) (g_entry g) - -------------------------------------------------- --- Manipulating CmmBlocks - -lastNode :: CmmBlock -> CmmNode O C -lastNode block = foldBlockNodesF3 (nothing, nothing, const) block () -  where nothing :: a -> b -> () -        nothing _ _ = () - -replaceLastNode :: Block CmmNode e C -> CmmNode O C -> Block CmmNode e C -replaceLastNode block last = blockOfNodeList (first, middle, JustC last) -  where (first, middle, _) = blockToNodeList block - ----------------------------------------------------------------------- ------ Splicing between blocks --- Given a middle node, a block, and a successor BlockId, --- we can insert the middle node between the block and the successor. --- We return the updated block and a list of new blocks that must be added --- to the graph. --- The semantics is a bit tricky. We consider cases on the last node: --- o For a branch, we can just insert before the branch, ---   but sometimes the optimizer does better if we actually insert ---   a fresh basic block, enabling some common blockification. --- o For a conditional branch, switch statement, or call, we must insert ---   a new basic block. --- o For a jump or return, this operation is impossible. - -insertBetween :: MonadUnique m => CmmBlock -> [CmmNode O O] -> BlockId -> m (CmmBlock, [CmmBlock]) -insertBetween b ms succId = insert $ lastNode b -  where insert :: MonadUnique m => CmmNode O C -> m (CmmBlock, [CmmBlock]) -        insert (CmmBranch bid) = -          if bid == succId then -            do (bid', bs) <- newBlocks -               return (replaceLastNode b (CmmBranch bid'), bs) -          else panic "tried invalid block insertBetween" -        insert (CmmCondBranch c t f) = -          do (t', tbs) <- if t == succId then newBlocks else return $ (t, []) -             (f', fbs) <- if f == succId then newBlocks else return $ (f, []) -             return (replaceLastNode b (CmmCondBranch c t' f'), tbs ++ fbs) -        insert (CmmSwitch e ks) = -          do (ids, bs) <- mapAndUnzipM mbNewBlocks ks -             return (replaceLastNode b (CmmSwitch e ids), join bs) -        insert (CmmCall {}) = -          panic "unimp: insertBetween after a call -- probably not a good idea" -        insert (CmmForeignCall {}) = -          panic "unimp: insertBetween after a foreign call -- probably not a good idea" - -        newBlocks :: MonadUnique m => m (BlockId, [CmmBlock]) -        newBlocks = do id <- liftM mkBlockId $ getUniqueM -                       return $ (id, [blockOfNodeList (JustC (CmmEntry id), ms, JustC (CmmBranch succId))]) -        mbNewBlocks :: MonadUnique m => Maybe BlockId -> m (Maybe BlockId, [CmmBlock]) -        mbNewBlocks (Just k) = if k == succId then liftM fstJust newBlocks -                               else return (Just k, []) -        mbNewBlocks Nothing  = return (Nothing, []) -        fstJust (id, bs) = (Just id, bs) +postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g)  -------------------------------------------------  -- Running dataflow analysis and/or rewrites  -- Constructing forward and backward analysis-only pass -analFwd    :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdPass m n f -analBwd    :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdPass m n f +analFwd    :: DataflowLattice f -> FwdTransfer n f -> FwdPass FuelUniqSM n f +analBwd    :: DataflowLattice f -> BwdTransfer n f -> BwdPass FuelUniqSM n f  analFwd lat xfer = analRewFwd lat xfer noFwdRewrite  analBwd lat xfer = analRewBwd lat xfer noBwdRewrite  -- Constructing forward and backward analysis + rewrite pass -analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdRewrite m n f -> FwdPass m n f -analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdRewrite m n f -> BwdPass m n f +analRewFwd :: DataflowLattice f -> FwdTransfer n f +           -> FwdRewrite FuelUniqSM n f +           -> FwdPass FuelUniqSM n f + +analRewBwd :: DataflowLattice f +           -> BwdTransfer n f +           -> BwdRewrite FuelUniqSM n f +           -> BwdPass FuelUniqSM n f  analRewFwd lat xfer rew = FwdPass {fp_lattice = lat, fp_transfer = xfer, fp_rewrite = rew}  analRewBwd lat xfer rew = BwdPass {bp_lattice = lat, bp_transfer = xfer, bp_rewrite = rew}  -- Running forward and backward dataflow analysis + optional rewrite -dataflowPassFwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> FwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f) +dataflowPassFwd :: NonLocal n => +                   GenCmmGraph n -> [(BlockId, f)] +                -> FwdPass FuelUniqSM n f +                -> FuelUniqSM (GenCmmGraph n, BlockEnv f)  dataflowPassFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do    (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)    return (CmmGraph {g_entry=entry, g_graph=graph}, facts) -dataflowPassBwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> BwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f) +dataflowAnalFwd :: NonLocal n => +                   GenCmmGraph n -> [(BlockId, f)] +                -> FwdPass FuelUniqSM n f +                -> BlockEnv f +dataflowAnalFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = +  analyzeFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) + +dataflowAnalFwdBlocks :: NonLocal n => +                   GenCmmGraph n -> [(BlockId, f)] +                -> FwdPass FuelUniqSM n f +                -> FuelUniqSM (BlockEnv f) +dataflowAnalFwdBlocks (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do +--  (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) +--  return facts +  return (analyzeFwdBlocks fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)) + +dataflowAnalBwd :: NonLocal n => +                   GenCmmGraph n -> [(BlockId, f)] +                -> BwdPass FuelUniqSM n f +                -> BlockEnv f +dataflowAnalBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = +  analyzeBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts) + +dataflowPassBwd :: NonLocal n => +                   GenCmmGraph n -> [(BlockId, f)] +                -> BwdPass FuelUniqSM n f +                -> FuelUniqSM (GenCmmGraph n, BlockEnv f)  dataflowPassBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = do    (graph, facts, NothingO) <- analyzeAndRewriteBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts)    return (CmmGraph {g_entry=entry, g_graph=graph}, facts) diff --git a/compiler/cmm/Hoopl.hs b/compiler/cmm/Hoopl.hs new file mode 100644 index 0000000000..404482e047 --- /dev/null +++ b/compiler/cmm/Hoopl.hs @@ -0,0 +1,124 @@ +module Hoopl ( +    module Compiler.Hoopl, +    module Hoopl.Dataflow, +    deepBwdRw3, deepBwdRw, +    thenFwdRw +  ) where + +import Compiler.Hoopl hiding +  ( Unique, +    FwdTransfer(..), FwdRewrite(..), FwdPass(..), +    BwdTransfer(..), BwdRewrite(..), BwdPass(..), +    noFwdRewrite, noBwdRewrite, +--    analyzeAndRewriteFwd, analyzeAndRewriteBwd, +    mkFactBase, Fact, +    mkBRewrite, mkBRewrite3, mkBTransfer, mkBTransfer3, +    mkFRewrite, mkFRewrite3, mkFTransfer, mkFTransfer3, +    deepFwdRw, deepFwdRw3, thenFwdRw, iterFwdRw, +    deepBwdRw, deepBwdRw3, thenBwdRw, iterBwdRw +  ) + +import Hoopl.Dataflow +import OptimizationFuel +import Control.Monad + +deepFwdRw3 :: (n C O -> f -> FuelUniqSM (Maybe (Graph n C O))) +           -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O))) +           -> (n O C -> f -> FuelUniqSM (Maybe (Graph n O C))) +           -> (FwdRewrite FuelUniqSM n f) +deepFwdRw :: (forall e x . n e x -> f -> FuelUniqSM (Maybe (Graph n e x))) -> FwdRewrite FuelUniqSM n f +deepFwdRw3 f m l = iterFwdRw $ mkFRewrite3 f m l +deepFwdRw f = deepFwdRw3 f f f + +-- N.B. rw3, rw3', and rw3a are triples of functions. +-- But rw and rw' are single functions. +thenFwdRw :: forall n f. +             FwdRewrite FuelUniqSM n f +          -> FwdRewrite FuelUniqSM n f  +          -> FwdRewrite FuelUniqSM n f +thenFwdRw rw3 rw3' = wrapFR2 thenrw rw3 rw3' + where +  thenrw :: forall e x t t1. +               (t -> t1 -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))) +            -> (t -> t1 -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))) +            -> t +            -> t1 +            -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f)) +  thenrw rw rw' n f = rw n f >>= fwdRes +     where fwdRes Nothing   = rw' n f +           fwdRes (Just gr) = return $ Just $ fadd_rw rw3' gr + +iterFwdRw :: forall m n f. +             FwdRewrite FuelUniqSM n f +          -> FwdRewrite FuelUniqSM n f +iterFwdRw rw3 = wrapFR iter rw3 + where iter :: forall a e x t. +               (t -> a -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))) +               -> t +               -> a +               -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f)) +       iter rw n = (liftM $ liftM $ fadd_rw (iterFwdRw rw3)) . rw n + +-- | Function inspired by 'rew' in the paper +_frewrite_cps :: ((Graph n e x, FwdRewrite FuelUniqSM n f) -> FuelUniqSM a) +             -> FuelUniqSM a +             -> (forall e x . n e x -> f -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))) +             -> n e x +             -> f +             -> FuelUniqSM a +_frewrite_cps j n rw node f = +    do mg <- rw node f +       case mg of Nothing -> n +                  Just gr -> j gr + + + +-- | Function inspired by 'add' in the paper +fadd_rw :: FwdRewrite FuelUniqSM n f +        -> (Graph n e x, FwdRewrite FuelUniqSM n f) +        -> (Graph n e x, FwdRewrite FuelUniqSM n f) +fadd_rw rw2 (g, rw1) = (g, rw1 `thenFwdRw` rw2) + + + +deepBwdRw3 :: +              (n C O -> f          -> FuelUniqSM (Maybe (Graph n C O))) +           -> (n O O -> f          -> FuelUniqSM (Maybe (Graph n O O))) +           -> (n O C -> FactBase f -> FuelUniqSM (Maybe (Graph n O C))) +           -> (BwdRewrite FuelUniqSM n f) +deepBwdRw  :: (forall e x . n e x -> Fact x f -> FuelUniqSM (Maybe (Graph n e x))) +           -> BwdRewrite FuelUniqSM n f +deepBwdRw3 f m l = iterBwdRw $ mkBRewrite3 f m l +deepBwdRw  f = deepBwdRw3 f f f + + +thenBwdRw :: forall n f. BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f +thenBwdRw rw1 rw2 = wrapBR2 f rw1 rw2 +  where f :: forall t t1 t2 e x. +             t +             -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f))) +             -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f))) +             -> t1 +             -> t2 +             -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f)) +        f _ rw1 rw2' n f = do +          res1 <- rw1 n f +          case res1 of +            Nothing -> rw2' n f +            Just gr -> return $ Just $ badd_rw rw2 gr + +iterBwdRw :: forall n f. BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f +iterBwdRw rw = wrapBR f rw +  where f :: forall t e x t1 t2. +             t +             -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f))) +             -> t1 +             -> t2 +             -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f)) +        f _ rw' n f = liftM (liftM (badd_rw (iterBwdRw rw))) (rw' n f) + +-- | Function inspired by 'add' in the paper +badd_rw :: BwdRewrite FuelUniqSM n f +        -> (Graph n e x, BwdRewrite FuelUniqSM n f) +        -> (Graph n e x, BwdRewrite FuelUniqSM n f) +badd_rw rw2 (g, rw1) = (g, rw1 `thenBwdRw` rw2) diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs new file mode 100644 index 0000000000..cdab2cd2fe --- /dev/null +++ b/compiler/cmm/Hoopl/Dataflow.hs @@ -0,0 +1,890 @@ +{-# LANGUAGE RankNTypes, ScopedTypeVariables, GADTs, EmptyDataDecls, PatternGuards, TypeFamilies, MultiParamTypeClasses #-} +#if __GLASGOW_HASKELL__ >= 703 +{-# OPTIONS_GHC -fprof-auto-top #-} +#endif +#if __GLASGOW_HASKELL__ >= 701 +{-# LANGUAGE Trustworthy #-} +#endif + +module Hoopl.Dataflow +  ( DataflowLattice(..), OldFact(..), NewFact(..), Fact, mkFactBase +  , ChangeFlag(..) +  , FwdPass(..), FwdTransfer, mkFTransfer, mkFTransfer3, getFTransfer3 +  -- * Respecting Fuel + +  -- $fuel +  , FwdRewrite,  mkFRewrite,  mkFRewrite3,  getFRewrite3, noFwdRewrite +  , wrapFR, wrapFR2 +  , BwdPass(..), BwdTransfer, mkBTransfer, mkBTransfer3, getBTransfer3 +  , wrapBR, wrapBR2 +  , BwdRewrite,  mkBRewrite,  mkBRewrite3,  getBRewrite3, noBwdRewrite +  , analyzeAndRewriteFwd,  analyzeAndRewriteBwd +  , analyzeFwd, analyzeFwdBlocks, analyzeBwd +  ) +where + +import OptimizationFuel + +import Data.Maybe +import Data.Array + +import Compiler.Hoopl.Collections +import Compiler.Hoopl.Fuel +import Compiler.Hoopl.Graph hiding (Graph) -- hiding so we can redefine +                                           -- and include definition in paper +import qualified Compiler.Hoopl.GraphUtil as U +import Compiler.Hoopl.Label +import Compiler.Hoopl.Dataflow (JoinFun) +import Compiler.Hoopl.Util + +import Compiler.Hoopl.Dataflow ( +    DataflowLattice(..), OldFact(..), NewFact(..), Fact +  , ChangeFlag(..), mkFactBase +  , FwdPass(..), FwdRewrite(..), FwdTransfer(..), mkFRewrite,  getFRewrite3, mkFTransfer, mkFTransfer3 +  , wrapFR, wrapFR2 +  , BwdPass(..), BwdRewrite(..),  BwdTransfer(..), mkBTransfer, mkBTransfer3, getBTransfer3 +  , wrapBR, wrapBR2 +  , mkBRewrite,  getBRewrite3 +  ) + +-- import Debug.Trace + +noRewrite :: a -> b -> FuelUniqSM (Maybe c) +noRewrite _ _ = return Nothing + +noFwdRewrite :: FwdRewrite FuelUniqSM n f +noFwdRewrite = FwdRewrite3 (noRewrite, noRewrite, noRewrite) + +-- | Functions passed to 'mkFRewrite3' should not be aware of the fuel supply. +-- The result returned by 'mkFRewrite3' respects fuel. +mkFRewrite3 :: forall n f. +               (n C O -> f -> FuelUniqSM (Maybe (Graph n C O))) +            -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O))) +            -> (n O C -> f -> FuelUniqSM (Maybe (Graph n O C))) +            -> FwdRewrite FuelUniqSM n f +mkFRewrite3 f m l = FwdRewrite3 (lift f, lift m, lift l) +  where lift :: forall t t1 a. (t -> t1 -> FuelUniqSM (Maybe a)) +                             -> t -> t1 -> FuelUniqSM (Maybe (a, FwdRewrite FuelUniqSM n f)) +        {-# INLINE lift #-} +        lift rw node fact = do +             a <- rw node fact +             case a of +               Nothing -> return Nothing +               Just a  -> do f <- getFuel +                             if f == 0 +                                then return Nothing +                                else setFuel (f-1) >> return (Just (a,noFwdRewrite)) + +noBwdRewrite :: BwdRewrite FuelUniqSM n f +noBwdRewrite = BwdRewrite3 (noRewrite, noRewrite, noRewrite) + +mkBRewrite3 :: forall n f. +               (n C O -> f          -> FuelUniqSM (Maybe (Graph n C O))) +            -> (n O O -> f          -> FuelUniqSM (Maybe (Graph n O O))) +            -> (n O C -> FactBase f -> FuelUniqSM (Maybe (Graph n O C))) +            -> BwdRewrite FuelUniqSM n f +mkBRewrite3 f m l = BwdRewrite3 (lift f, lift m, lift l) +  where lift :: forall t t1 a. (t -> t1 -> FuelUniqSM (Maybe a)) +                             -> t -> t1 -> FuelUniqSM (Maybe (a, BwdRewrite FuelUniqSM n f)) +        {-# INLINE lift #-} +        lift rw node fact = do +             a <- rw node fact +             case a of +               Nothing -> return Nothing +               Just a  -> do f <- getFuel +                             if f == 0 +                                then return Nothing +                                else setFuel (f-1) >> return (Just (a,noBwdRewrite)) + +----------------------------------------------------------------------------- +--              Analyze and rewrite forward: the interface +----------------------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +--   be no other entry point, or all goes horribly wrong... +analyzeAndRewriteFwd +   :: forall n f e x .  NonLocal n => +      FwdPass FuelUniqSM n f +   -> MaybeC e [Label] +   -> Graph n  e x -> Fact e f +   -> FuelUniqSM (Graph n e x, FactBase f, MaybeO x f) +analyzeAndRewriteFwd pass entries g f = +  do (rg, fout) <- arfGraph pass (fmap targetLabels entries) g f +     let (g', fb) = normalizeGraph rg +     return (g', fb, distinguishedExitFact g' fout) + +distinguishedExitFact :: forall n e x f . Graph n e x -> Fact x f -> MaybeO x f +distinguishedExitFact g f = maybe g +    where maybe :: Graph n e x -> MaybeO x f +          maybe GNil       = JustO f +          maybe (GUnit {}) = JustO f +          maybe (GMany _ _ x) = case x of NothingO -> NothingO +                                          JustO _  -> JustO f + +---------------------------------------------------------------- +--       Forward Implementation +---------------------------------------------------------------- + +type Entries e = MaybeC e [Label] + +arfGraph :: forall n f e x .  NonLocal n => +            FwdPass FuelUniqSM n f -> +            Entries e -> Graph n e x -> Fact e f -> FuelUniqSM (DG f n e x, Fact x f) +arfGraph pass@FwdPass { fp_lattice = lattice, +                        fp_transfer = transfer, +                        fp_rewrite  = rewrite } entries g in_fact = graph g in_fact +  where +    {- nested type synonyms would be so lovely here  +    type ARF  thing = forall e x . thing e x -> f        -> m (DG f n e x, Fact x f) +    type ARFX thing = forall e x . thing e x -> Fact e f -> m (DG f n e x, Fact x f) +    -} +    graph ::              Graph n e x -> Fact e f -> FuelUniqSM (DG f n e x, Fact x f) +    block :: forall e x . +             Block n e x -> f -> FuelUniqSM (DG f n e x, Fact x f) + +    body  :: [Label] -> LabelMap (Block n C C) +          -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f) +                    -- Outgoing factbase is restricted to Labels *not* in +                    -- in the Body; the facts for Labels *in* +                    -- the Body are in the 'DG f n C C' + +    cat :: forall e a x f1 f2 f3. +           (f1 -> FuelUniqSM (DG f n e a, f2)) +        -> (f2 -> FuelUniqSM (DG f n a x, f3)) +        -> (f1 -> FuelUniqSM (DG f n e x, f3)) + +    graph GNil            f = return (dgnil, f) +    graph (GUnit blk)     f = block blk f +    graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f +     where +      ebcat :: MaybeO e (Block n O C) -> Body n -> Fact e f -> FuelUniqSM (DG f n e C, Fact C f) +      exit  :: MaybeO x (Block n C O)           -> Fact C f -> FuelUniqSM (DG f n C x, Fact x f) +      exit (JustO blk) f = arfx block blk f +      exit NothingO    f = return (dgnilC, f) +      ebcat entry bdy f = c entries entry f +       where c :: MaybeC e [Label] -> MaybeO e (Block n O C) +                -> Fact e f -> FuelUniqSM (DG f n e C, Fact C f) +             c NothingC (JustO entry)   f = (block entry `cat` body (successors entry) bdy) f +             c (JustC entries) NothingO f = body entries bdy f +             c _ _ _ = error "bogus GADT pattern match failure" + +    -- Lift from nodes to blocks +    block BNil            f = return (dgnil, f) +    block (BlockCO n b)   f = (node n `cat` block b) f +    block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f +    block (BlockOC   b n) f =              (block b `cat` node n) f + +    block (BMiddle n)     f = node n f +    block (BCat b1 b2)    f = (block b1 `cat` block b2) f +    block (BHead h n)     f = (block h  `cat` node n) f +    block (BTail n t)     f = (node  n  `cat` block t) f + +    {-# INLINE node #-} +    node :: forall e x . (ShapeLifter e x) +         => n e x -> f -> FuelUniqSM (DG f n e x, Fact x f) +    node n f +     = do { grw <- frewrite rewrite n f +          ; case grw of +              Nothing -> return ( singletonDG f n +                                , ftransfer transfer n f ) +              Just (g, rw) -> +                  let pass' = pass { fp_rewrite = rw } +                      f'    = fwdEntryFact n f +                  in  arfGraph pass' (fwdEntryLabel n) g f' } + +    -- | Compose fact transformers and concatenate the resulting +    -- rewritten graphs. +    {-# INLINE cat #-}  +    cat ft1 ft2 f = do { (g1,f1) <- ft1 f +                       ; (g2,f2) <- ft2 f1 +                       ; let !g = g1 `dgSplice` g2 +                       ; return (g, f2) } + +    arfx :: forall x . +            (Block n C x ->        f -> FuelUniqSM (DG f n C x, Fact x f)) +         -> (Block n C x -> Fact C f -> FuelUniqSM (DG f n C x, Fact x f)) +    arfx arf thing fb =  +      arf thing $ fromJust $ lookupFact (entryLabel thing) $ joinInFacts lattice fb +     -- joinInFacts adds debugging information + + +                    -- Outgoing factbase is restricted to Labels *not* in +                    -- in the Body; the facts for Labels *in* +                    -- the Body are in the 'DG f n C C' +    body entries blockmap init_fbase +      = fixpoint Fwd lattice do_block entries blockmap init_fbase +      where +        lattice = fp_lattice pass +        do_block :: forall x . Block n C x -> FactBase f +                 -> FuelUniqSM (DG f n C x, Fact x f) +        do_block b fb = block b entryFact +          where entryFact = getFact lattice (entryLabel b) fb + + +-- Join all the incoming facts with bottom. +-- We know the results _shouldn't change_, but the transfer +-- functions might, for example, generate some debugging traces. +joinInFacts :: DataflowLattice f -> FactBase f -> FactBase f +joinInFacts (lattice @ DataflowLattice {fact_bot = bot, fact_join = fj}) fb = +  mkFactBase lattice $ map botJoin $ mapToList fb +    where botJoin (l, f) = (l, snd $ fj l (OldFact bot) (NewFact f)) + +forwardBlockList :: (NonLocal n) +                 => [Label] -> Body n -> [Block n C C] +-- This produces a list of blocks in order suitable for forward analysis, +-- along with the list of Labels it may depend on for facts. +forwardBlockList entries blks = postorder_dfs_from blks entries + +---------------------------------------------------------------- +--       Forward Analysis only +---------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +--   be no other entry point, or all goes horribly wrong... +analyzeFwd +   :: forall n f e .  NonLocal n => +      FwdPass FuelUniqSM n f +   -> MaybeC e [Label] +   -> Graph n e C -> Fact e f +   -> FactBase f +analyzeFwd FwdPass { fp_lattice = lattice, +                     fp_transfer = FwdTransfer3 (ftr, mtr, ltr) } +  entries g in_fact = graph g in_fact +  where +    graph :: Graph n e C -> Fact e f -> FactBase f +    graph (GMany entry blockmap NothingO) +      = case (entries, entry) of +         (NothingC, JustO entry)   -> block entry `cat` body (successors entry) +         (JustC entries, NothingO) -> body entries +         _ -> error "bogus GADT pattern match failure" +     where +       body  :: [Label] -> Fact C f -> Fact C f +       body entries f +         = fixpointAnal Fwd lattice do_block entries blockmap f +         where +           do_block :: forall x . Block n C x -> FactBase f -> Fact x f +           do_block b fb = block b entryFact +             where entryFact = getFact lattice (entryLabel b) fb + +    -- NB. eta-expand block, GHC can't do this by itself.  See #5809. +    block :: forall e x . Block n e x -> f -> Fact x f +    block BNil            f = f +    block (BlockCO n b)   f = (ftr n `cat`  block b) f +    block (BlockCC l b n) f = (ftr l `cat` (block b `cat` ltr n)) f +    block (BlockOC   b n) f =              (block b `cat` ltr n) f + +    block (BMiddle n)     f = mtr n f +    block (BCat b1 b2)    f = (block b1 `cat` block b2) f +    block (BHead h n)     f = (block h  `cat` mtr n) f +    block (BTail n t)     f = (mtr  n   `cat` block t) f + +    {-# INLINE cat #-} +    cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3) +    cat ft1 ft2 = \f -> ft2 $! ft1 f + +-- | if the graph being analyzed is open at the entry, there must +--   be no other entry point, or all goes horribly wrong... +analyzeFwdBlocks +   :: forall n f e .  NonLocal n => +      FwdPass FuelUniqSM n f +   -> MaybeC e [Label] +   -> Graph n e C -> Fact e f +   -> FactBase f +analyzeFwdBlocks FwdPass { fp_lattice = lattice, +                           fp_transfer = FwdTransfer3 (ftr, _, ltr) } +  entries g in_fact = graph g in_fact +  where +    graph :: Graph n e C -> Fact e f -> FactBase f +    graph (GMany entry blockmap NothingO) +      = case (entries, entry) of +         (NothingC, JustO entry)   -> block entry `cat` body (successors entry) +         (JustC entries, NothingO) -> body entries +         _ -> error "bogus GADT pattern match failure" +     where +       body  :: [Label] -> Fact C f -> Fact C f +       body entries f +         = fixpointAnal Fwd lattice do_block entries blockmap f +         where +           do_block :: forall x . Block n C x -> FactBase f -> Fact x f +           do_block b fb = block b entryFact +             where entryFact = getFact lattice (entryLabel b) fb + +    -- NB. eta-expand block, GHC can't do this by itself.  See #5809. +    block :: forall e x . Block n e x -> f -> Fact x f +    block BNil            f = f +    block (BlockCO n _)   f = ftr n f +    block (BlockCC l _ n) f = (ftr l `cat` ltr n) f +    block (BlockOC   _ n) f = ltr n f + +    {-# INLINE cat #-} +    cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3) +    cat ft1 ft2 = \f -> ft2 $! ft1 f + +---------------------------------------------------------------- +--       Backward Analysis only +---------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +--   be no other entry point, or all goes horribly wrong... +analyzeBwd +   :: forall n f e .  NonLocal n => +      BwdPass FuelUniqSM n f +   -> MaybeC e [Label] +   -> Graph n e C -> Fact C f +   -> FactBase f +analyzeBwd BwdPass { bp_lattice = lattice, +                     bp_transfer = BwdTransfer3 (ftr, mtr, ltr) } +   entries g in_fact = graph g in_fact +  where +    graph :: Graph n e C -> Fact C f -> FactBase f +    graph (GMany entry blockmap NothingO) +      = case (entries, entry) of +         (NothingC, JustO entry)   -> body (successors entry) +         (JustC entries, NothingO) -> body entries +         _ -> error "bogus GADT pattern match failure" +     where +       body  :: [Label] -> Fact C f -> Fact C f +       body entries f +         = fixpointAnal Bwd lattice do_block entries blockmap f +         where +           do_block :: forall x . Block n C x -> Fact x f -> FactBase f +           do_block b fb = mapSingleton (entryLabel b) (block b fb) + +    -- NB. eta-expand block, GHC can't do this by itself.  See #5809. +    block :: forall e x . Block n e x -> Fact x f -> f +    block BNil            f = f +    block (BlockCO n b)   f = (ftr n `cat`  block b) f +    block (BlockCC l b n) f = ((ftr l `cat` block b) `cat` ltr n) f +    block (BlockOC   b n) f =              (block b `cat` ltr n) f + +    block (BMiddle n)     f = mtr n f +    block (BCat b1 b2)    f = (block b1 `cat` block b2) f +    block (BHead h n)     f = (block h  `cat` mtr n) f +    block (BTail n t)     f = (mtr  n   `cat` block t) f + +    {-# INLINE cat #-} +    cat :: forall f1 f2 f3 . (f2 -> f3) -> (f1 -> f2) -> (f1 -> f3) +    cat ft1 ft2 = \f -> ft1 $! ft2 f + +----------------------------------------------------------------------------- +--              Backward analysis and rewriting: the interface +----------------------------------------------------------------------------- + + +-- | if the graph being analyzed is open at the exit, I don't +--   quite understand the implications of possible other exits +analyzeAndRewriteBwd +   :: NonLocal n +   => BwdPass FuelUniqSM n f +   -> MaybeC e [Label] -> Graph n e x -> Fact x f +   -> FuelUniqSM (Graph n e x, FactBase f, MaybeO e f) +analyzeAndRewriteBwd pass entries g f = +  do (rg, fout) <- arbGraph pass (fmap targetLabels entries) g f +     let (g', fb) = normalizeGraph rg +     return (g', fb, distinguishedEntryFact g' fout) + +distinguishedEntryFact :: forall n e x f . Graph n e x -> Fact e f -> MaybeO e f +distinguishedEntryFact g f = maybe g +    where maybe :: Graph n e x -> MaybeO e f +          maybe GNil       = JustO f +          maybe (GUnit {}) = JustO f +          maybe (GMany e _ _) = case e of NothingO -> NothingO +                                          JustO _  -> JustO f + + +----------------------------------------------------------------------------- +--              Backward implementation +----------------------------------------------------------------------------- + +arbGraph :: forall n f e x . +            NonLocal n => +            BwdPass FuelUniqSM n f -> +            Entries e -> Graph n e x -> Fact x f -> FuelUniqSM (DG f n e x, Fact e f) +arbGraph pass@BwdPass { bp_lattice  = lattice, +                        bp_transfer = transfer, +                        bp_rewrite  = rewrite } entries g in_fact = graph g in_fact +  where +    {- nested type synonyms would be so lovely here  +    type ARB  thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, f) +    type ARBX thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, Fact e f) +    -} +    graph ::              Graph n e x -> Fact x f -> FuelUniqSM (DG f n e x, Fact e f) +    block :: forall e x . Block n e x -> Fact x f -> FuelUniqSM (DG f n e x, f) +    body  :: [Label] -> Body n -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f) +    node  :: forall e x . (ShapeLifter e x)  +             => n e x       -> Fact x f -> FuelUniqSM (DG f n e x, f) +    cat :: forall e a x info info' info''. +           (info' -> FuelUniqSM (DG f n e a, info'')) +        -> (info  -> FuelUniqSM (DG f n a x, info')) +        -> (info  -> FuelUniqSM (DG f n e x, info'')) + +    graph GNil            f = return (dgnil, f) +    graph (GUnit blk)     f = block blk f +    graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f +     where +      ebcat :: MaybeO e (Block n O C) -> Body n -> Fact C f -> FuelUniqSM (DG f n e C, Fact e f) +      exit  :: MaybeO x (Block n C O)           -> Fact x f -> FuelUniqSM (DG f n C x, Fact C f) +      exit (JustO blk) f = arbx block blk f +      exit NothingO    f = return (dgnilC, f) +      ebcat entry bdy f = c entries entry f +       where c :: MaybeC e [Label] -> MaybeO e (Block n O C) +                -> Fact C f -> FuelUniqSM (DG f n e C, Fact e f) +             c NothingC (JustO entry)   f = (block entry `cat` body (successors entry) bdy) f +             c (JustC entries) NothingO f = body entries bdy f +             c _ _ _ = error "bogus GADT pattern match failure" + +    -- Lift from nodes to blocks +    block BNil            f = return (dgnil, f) +    block (BlockCO n b)   f = (node n `cat` block b) f +    block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f +    block (BlockOC   b n) f =              (block b `cat` node n) f + +    block (BMiddle n)     f = node n f +    block (BCat b1 b2)    f = (block b1 `cat` block b2) f +    block (BHead h n)     f = (block h  `cat` node n) f +    block (BTail n t)     f = (node  n  `cat` block t) f + +    {-# INLINE node #-} +    node n f +      = do { bwdres <- brewrite rewrite n f +           ; case bwdres of +               Nothing -> return (singletonDG entry_f n, entry_f) +                            where entry_f = btransfer transfer n f +               Just (g, rw) -> +                          do { let pass' = pass { bp_rewrite = rw } +                             ; (g, f) <- arbGraph pass' (fwdEntryLabel n) g f +                             ; return (g, bwdEntryFact lattice n f)} } + +    -- | Compose fact transformers and concatenate the resulting +    -- rewritten graphs. +    {-# INLINE cat #-}  +    cat ft1 ft2 f = do { (g2,f2) <- ft2 f +                       ; (g1,f1) <- ft1 f2 +                       ; let !g = g1 `dgSplice` g2 +                       ; return (g, f1) } + +    arbx :: forall x . +            (Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, f)) +         -> (Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, Fact C f)) + +    arbx arb thing f = do { (rg, f) <- arb thing f +                          ; let fb = joinInFacts (bp_lattice pass) $ +                                     mapSingleton (entryLabel thing) f +                          ; return (rg, fb) } +     -- joinInFacts adds debugging information + +                    -- Outgoing factbase is restricted to Labels *not* in +                    -- in the Body; the facts for Labels *in* +                    -- the Body are in the 'DG f n C C' +    body entries blockmap init_fbase +      = fixpoint Bwd (bp_lattice pass) do_block entries blockmap init_fbase +      where +        do_block :: forall x. Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, LabelMap f) +        do_block b f = do (g, f) <- block b f +                          return (g, mapSingleton (entryLabel b) f) + + +{- + +The forward and backward cases are not dual.  In the forward case, the +entry points are known, and one simply traverses the body blocks from +those points.  In the backward case, something is known about the exit +points, but this information is essentially useless, because we don't +actually have a dual graph (that is, one with edges reversed) to +compute with.  (Even if we did have a dual graph, it would not avail +us---a backward analysis must include reachable blocks that don't +reach the exit, as in a procedure that loops forever and has side +effects.) + +-} + +----------------------------------------------------------------------------- +--      fixpoint +----------------------------------------------------------------------------- + +data Direction = Fwd | Bwd + +-- | fixpointing for analysis-only +-- +fixpointAnal :: forall n f. NonLocal n + => Direction + -> DataflowLattice f + -> (Block n C C -> Fact C f -> Fact C f) + -> [Label] + -> LabelMap (Block n C C) + -> Fact C f -> FactBase f + +fixpointAnal direction DataflowLattice{ fact_bot = bot, fact_join = join } +              do_block entries blockmap init_fbase +  = loop start init_fbase +  where +    blocks     = sortBlocks direction entries blockmap +    n          = length blocks +    block_arr  = {-# SCC "block_arr" #-} listArray (0,n-1) blocks +    start      = {-# SCC "start" #-} [0..n-1] +    dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks + +    loop +       :: IntHeap      -- blocks still to analyse +       -> FactBase f  -- current factbase (increases monotonically) +       -> FactBase f + +    loop []        fbase = fbase +    loop (ix:todo) fbase = +           let +               blk = block_arr ! ix + +               out_facts = {-# SCC "do_block" #-} do_block blk fbase + +               !(todo', fbase') = {-# SCC "mapFoldWithKey" #-} +                     mapFoldWithKey (updateFact join dep_blocks) +                                    (todo,fbase) out_facts +           in +           -- trace ("analysing: " ++ show (entryLabel blk)) $ +           -- trace ("fbase': " ++ show (mapKeys fbase')) $ return () +           -- trace ("changed: " ++ show changed) $ return () +           -- trace ("to analyse: " ++ show to_analyse) $ return () + +           loop todo' fbase' + + +-- | fixpointing for combined analysis/rewriting +-- +fixpoint :: forall n f. NonLocal n + => Direction + -> DataflowLattice f + -> (Block n C C -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f)) + -> [Label] + -> LabelMap (Block n C C) + -> (Fact C f -> FuelUniqSM (DG f n C C, Fact C f)) + +fixpoint direction DataflowLattice{ fact_bot = bot, fact_join = join } +         do_block entries blockmap init_fbase +  = do +        -- trace ("fixpoint: " ++ show (case direction of Fwd -> True; Bwd -> False) ++ " " ++ show (mapKeys blockmap) ++ show entries ++ " " ++ show (mapKeys init_fbase)) $ return() +        (fbase, newblocks) <- loop start init_fbase mapEmpty +        -- trace ("fixpoint DONE: " ++ show (mapKeys fbase) ++ show (mapKeys newblocks)) $ return() +        return (GMany NothingO newblocks NothingO, +                mapDeleteList (mapKeys blockmap) fbase) +    -- The successors of the Graph are the the Labels +    -- for which we have facts and which are *not* in +    -- the blocks of the graph +  where +    blocks     = sortBlocks direction entries blockmap +    n          = length blocks +    block_arr  = {-# SCC "block_arr" #-} listArray (0,n-1) blocks +    start      = {-# SCC "start" #-} [0..n-1] +    dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks + +    loop +       :: IntHeap +       -> FactBase f  -- current factbase (increases monotonically) +       -> LabelMap (DBlock f n C C)  -- transformed graph +       -> FuelUniqSM (FactBase f, LabelMap (DBlock f n C C)) + +    loop [] fbase newblocks = return (fbase, newblocks) +    loop (ix:todo) fbase !newblocks = do +           let blk = block_arr ! ix + +           -- trace ("analysing: " ++ show (entryLabel blk)) $ return () +           (rg, out_facts) <- do_block blk fbase +           let !(todo', fbase') = +                  mapFoldWithKey (updateFact join dep_blocks) +                                 (todo,fbase) out_facts + +           -- trace ("fbase': " ++ show (mapKeys fbase')) $ return () +           -- trace ("changed: " ++ show changed) $ return () +           -- trace ("to analyse: " ++ show to_analyse) $ return () + +           let newblocks' = case rg of +                              GMany _ blks _ -> mapUnion blks newblocks +      +           loop todo' fbase' newblocks' + + +{-  Note [TxFactBase invariants] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The TxFactBase is used only during a fixpoint iteration (or "sweep"), +and accumulates facts (and the transformed code) during the fixpoint +iteration. + +* tfb_fbase increases monotonically, across all sweeps + +* At the beginning of each sweep +      tfb_cha  = NoChange +      tfb_lbls = {} + +* During each sweep we process each block in turn.  Processing a block +  is done thus: +    1.  Read from tfb_fbase the facts for its entry label (forward) +        or successors labels (backward) +    2.  Transform those facts into new facts for its successors (forward) +        or entry label (backward) +    3.  Augment tfb_fbase with that info +  We call the labels read in step (1) the "in-labels" of the sweep + +* The field tfb_lbls is the set of in-labels of all blocks that have +  been processed so far this sweep, including the block that is +  currently being processed.  tfb_lbls is initialised to {}.  It is a +  subset of the Labels of the *original* (not transformed) blocks. + +* The tfb_cha field is set to SomeChange iff we decide we need to +  perform another iteration of the fixpoint loop. It is initialsed to NoChange. + +  Specifically, we set tfb_cha to SomeChange in step (3) iff +    (a) The fact in tfb_fbase for a block L changes +    (b) L is in tfb_lbls +  Reason: until a label enters the in-labels its accumuated fact in tfb_fbase +  has not been read, hence cannot affect the outcome + +Note [Unreachable blocks] +~~~~~~~~~~~~~~~~~~~~~~~~~ +A block that is not in the domain of tfb_fbase is "currently unreachable". +A currently-unreachable block is not even analyzed.  Reason: consider  +constant prop and this graph, with entry point L1: +  L1: x:=3; goto L4 +  L2: x:=4; goto L4 +  L4: if x>3 goto L2 else goto L5 +Here L2 is actually unreachable, but if we process it with bottom input fact, +we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4. + +* If a currently-unreachable block is not analyzed, then its rewritten +  graph will not be accumulated in tfb_rg.  And that is good: +  unreachable blocks simply do not appear in the output. + +* Note that clients must be careful to provide a fact (even if bottom) +  for each entry point. Otherwise useful blocks may be garbage collected. + +* Note that updateFact must set the change-flag if a label goes from +  not-in-fbase to in-fbase, even if its fact is bottom.  In effect the +  real fact lattice is +       UNR +       bottom +       the points above bottom + +* Even if the fact is going from UNR to bottom, we still call the +  client's fact_join function because it might give the client +  some useful debugging information. + +* All of this only applies for *forward* ixpoints.  For the backward +  case we must treat every block as reachable; it might finish with a +  'return', and therefore have no successors, for example. +-} + + +----------------------------------------------------------------------------- +--  Pieces that are shared by fixpoint and fixpoint_anal +----------------------------------------------------------------------------- + +-- | Sort the blocks into the right order for analysis. +sortBlocks :: NonLocal n => Direction -> [Label] -> LabelMap (Block n C C) +           -> [Block n C C] +sortBlocks direction entries blockmap +   = case direction of Fwd -> fwd +                       Bwd -> reverse fwd +  where fwd = forwardBlockList entries blockmap + +-- | construct a mapping from L -> block indices.  If the fact for L +-- changes, re-analyse the given blocks. +mkDepBlocks :: NonLocal n => Direction -> [Block n C C] -> LabelMap [Int] +mkDepBlocks Fwd blocks = go blocks 0 mapEmpty +  where go []     !_  m = m +        go (b:bs) !n m = go bs (n+1) $! mapInsert (entryLabel b) [n] m +mkDepBlocks Bwd blocks = go blocks 0 mapEmpty +  where go []     !_ m = m +        go (b:bs) !n m = go bs (n+1) $! go' (successors b) m +            where go' [] m = m +                  go' (l:ls) m = go' ls (mapInsertWith (++) l [n] m) + + +-- | After some new facts have been generated by analysing a block, we +-- fold this function over them to generate (a) a list of block +-- indices to (re-)analyse, and (b) the new FactBase. +-- +updateFact :: JoinFun f -> LabelMap [Int] +           -> Label -> f       -- out fact +           -> (IntHeap, FactBase f) +           -> (IntHeap, FactBase f) + +updateFact fact_join dep_blocks lbl new_fact (todo, fbase) +  = case lookupFact lbl fbase of +      Nothing       -> let !z = mapInsert lbl new_fact fbase in (changed, z) +                           -- Note [no old fact] +      Just old_fact -> +        case fact_join lbl (OldFact old_fact) (NewFact new_fact) of +          (NoChange, _) -> (todo, fbase) +          (_,        f) -> let !z = mapInsert lbl f fbase in (changed, z) +  where +     changed = foldr insertIntHeap todo $ +                 mapFindWithDefault [] lbl dep_blocks + +{- +Note [no old fact] + +We know that the new_fact is >= _|_, so we don't need to join.  However, +if the new fact is also _|_, and we have already analysed its block, +we don't need to record a change.  So there's a tradeoff here.  It turns +out that always recording a change is faster. +-} + +----------------------------------------------------------------------------- +--      DG: an internal data type for 'decorated graphs' +--          TOTALLY internal to Hoopl; each block is decorated with a fact +----------------------------------------------------------------------------- + +type Graph = Graph' Block +type DG f  = Graph' (DBlock f) +data DBlock f n e x = DBlock f (Block n e x) -- ^ block decorated with fact + +instance NonLocal n => NonLocal (DBlock f n) where +  entryLabel (DBlock _ b) = entryLabel b +  successors (DBlock _ b) = successors b + +--- constructors + +dgnil  :: DG f n O O +dgnilC :: DG f n C C +dgSplice  :: NonLocal n => DG f n e a -> DG f n a x -> DG f n e x + +---- observers + +normalizeGraph :: forall n f e x . +                  NonLocal n => DG f n e x +               -> (Graph n e x, FactBase f) +                 -- A Graph together with the facts for that graph +                 -- The domains of the two maps should be identical + +normalizeGraph g = (graphMapBlocks dropFact g, facts g) +    where dropFact :: DBlock t t1 t2 t3 -> Block t1 t2 t3 +          dropFact (DBlock _ b) = b +          facts :: DG f n e x -> FactBase f +          facts GNil = noFacts +          facts (GUnit _) = noFacts +          facts (GMany _ body exit) = bodyFacts body `mapUnion` exitFacts exit +          exitFacts :: MaybeO x (DBlock f n C O) -> FactBase f +          exitFacts NothingO = noFacts +          exitFacts (JustO (DBlock f b)) = mapSingleton (entryLabel b) f +          bodyFacts :: LabelMap (DBlock f n C C) -> FactBase f +          bodyFacts body = mapFoldWithKey f noFacts body +            where f :: forall t a x. (NonLocal t) => Label -> DBlock a t C x -> LabelMap a -> LabelMap a +                  f lbl (DBlock f _) fb = mapInsert lbl f fb + +--- implementation of the constructors (boring) + +dgnil  = GNil +dgnilC = GMany NothingO emptyBody NothingO + +dgSplice = U.splice fzCat +  where fzCat :: DBlock f n e O -> DBlock t n O x -> DBlock f n e x +        fzCat (DBlock f b1) (DBlock _ b2) = DBlock f $! b1 `U.cat` b2 +        -- NB. strictness, this function is hammered. + +---------------------------------------------------------------- +--       Utilities +---------------------------------------------------------------- + +-- Lifting based on shape: +--  - from nodes to blocks +--  - from facts to fact-like things +-- Lowering back: +--  - from fact-like things to facts +-- Note that the latter two functions depend only on the entry shape. +class ShapeLifter e x where + singletonDG   :: f -> n e x -> DG f n e x + fwdEntryFact  :: NonLocal n => n e x -> f -> Fact e f + fwdEntryLabel :: NonLocal n => n e x -> MaybeC e [Label] + ftransfer :: FwdTransfer n f -> n e x -> f -> Fact x f + frewrite  :: FwdRewrite m n f -> n e x +           -> f -> m (Maybe (Graph n e x, FwdRewrite m n f)) +-- @ end node.tex + bwdEntryFact :: NonLocal n => DataflowLattice f -> n e x -> Fact e f -> f + btransfer    :: BwdTransfer n f -> n e x -> Fact x f -> f + brewrite     :: BwdRewrite m n f -> n e x +              -> Fact x f -> m (Maybe (Graph n e x, BwdRewrite m n f)) + +instance ShapeLifter C O where +  singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil)) +  fwdEntryFact     n f  = mapSingleton (entryLabel n) f +  bwdEntryFact lat n fb = getFact lat (entryLabel n) fb +  ftransfer (FwdTransfer3 (ft, _, _)) n f = ft n f +  btransfer (BwdTransfer3 (bt, _, _)) n f = bt n f +  frewrite  (FwdRewrite3  (fr, _, _)) n f = fr n f +  brewrite  (BwdRewrite3  (br, _, _)) n f = br n f +  fwdEntryLabel n = JustC [entryLabel n] + +instance ShapeLifter O O where +  singletonDG f = gUnitOO . DBlock f . BMiddle +  fwdEntryFact   _ f = f +  bwdEntryFact _ _ f = f +  ftransfer (FwdTransfer3 (_, ft, _)) n f = ft n f +  btransfer (BwdTransfer3 (_, bt, _)) n f = bt n f +  frewrite  (FwdRewrite3  (_, fr, _)) n f = fr n f +  brewrite  (BwdRewrite3  (_, br, _)) n f = br n f +  fwdEntryLabel _ = NothingC + +instance ShapeLifter O C where +  singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n)) +  fwdEntryFact   _ f = f +  bwdEntryFact _ _ f = f +  ftransfer (FwdTransfer3 (_, _, ft)) n f = ft n f +  btransfer (BwdTransfer3 (_, _, bt)) n f = bt n f +  frewrite  (FwdRewrite3  (_, _, fr)) n f = fr n f +  brewrite  (BwdRewrite3  (_, _, br)) n f = br n f +  fwdEntryLabel _ = NothingC + +{- +class ShapeLifter e x where +  singletonDG   :: f -> n e x -> DG f n e x + +instance ShapeLifter C O where +  singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil)) + +instance ShapeLifter O O where +  singletonDG f = gUnitOO . DBlock f . BMiddle + +instance ShapeLifter O C where +  singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n)) +-} + +-- Fact lookup: the fact `orelse` bottom +getFact  :: DataflowLattice f -> Label -> FactBase f -> f +getFact lat l fb = case lookupFact l fb of Just  f -> f +                                           Nothing -> fact_bot lat + + + +{-  Note [Respects fuel] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-} +-- $fuel +-- A value of type 'FwdRewrite' or 'BwdRewrite' /respects fuel/ if  +-- any function contained within the value satisfies the following properties: +-- +--   * When fuel is exhausted, it always returns 'Nothing'. +-- +--   * When it returns @Just g rw@, it consumes /exactly/ one unit +--     of fuel, and new rewrite 'rw' also respects fuel. +-- +-- Provided that functions passed to 'mkFRewrite', 'mkFRewrite3',  +-- 'mkBRewrite', and 'mkBRewrite3' are not aware of the fuel supply, +-- the results respect fuel. +-- +-- It is an /unchecked/ run-time error for the argument passed to 'wrapFR', +-- 'wrapFR2', 'wrapBR', or 'warpBR2' to return a function that does not respect fuel. + +-- ----------------------------------------------------------------------------- +-- a Heap of Int + +-- We should really use a proper Heap here, but my attempts to make +-- one have not succeeded in beating the simple ordered list.  Another +-- alternative is IntSet (using deleteFindMin), but that was also +-- slower than the ordered list in my experiments --SDM 25/1/2012 + +type IntHeap = [Int] -- ordered + +insertIntHeap :: Int -> [Int] -> [Int] +insertIntHeap x [] = [x] +insertIntHeap x (y:ys) +  | x < y     = x : y : ys +  | x == y    = x : ys +  | otherwise = y : insertIntHeap x ys diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 04586b1029..797b785de2 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -1,29 +1,19 @@  {-# LANGUAGE GADTs #-} --- ToDo: remove -fno-warn-warnings-deprecations -{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} --- ToDo: remove -fno-warn-incomplete-patterns -{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} - --- Module for building CmmAGraphs. - --- As the CmmAGraph is a wrapper over Graph CmmNode O x, it is different --- from Hoopl's AGraph. The current clients expect functions with the --- same names Hoopl uses, so this module cannot be in the same namespace --- as Compiler.Hoopl. -  module MkGraph -  ( CmmAGraph -  , emptyAGraph, (<*>), catAGraphs, outOfLine -  , mkLabel, mkMiddle, mkLast -  , withFreshLabel, withUnique, lgraphOfAGraph, labelAGraph +  ( CmmAGraph, CgStmt(..) +  , (<*>), catAGraphs +  , mkLabel, mkMiddle, mkLast, outOfLine +  , lgraphOfAGraph, labelAGraph    , stackStubExpr -  , mkNop, mkAssign, mkStore, mkCall, mkCmmCall, mkSafeCall, mkUnsafeCall, mkFinalCall -         , mkJump, mkDirectJump, mkForeignJump, mkJumpGC, mkCbranch, mkSwitch -         , mkReturn, mkReturnSimple, mkComment, mkCallEntry -         , mkBranch, mkCmmIfThenElse, mkCmmIfThen, mkCmmWhileDo -         , copyInOflow, copyInSlot, copyOutOflow, copyOutSlot +  , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo +  , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC +  , mkCbranch, mkSwitch +  , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch +  , copyInOflow, copyOutOflow +  , noExtraStack +  , toCall, Transfer(..)    )  where @@ -31,250 +21,232 @@ import BlockId  import Cmm  import CmmCallConv (assignArgumentsPos, ParamLocation(..)) +  import Compiler.Hoopl hiding (Unique, (<*>), mkFirst, mkMiddle, mkLast, mkLabel, mkBranch, Shape(..)) -import qualified Compiler.Hoopl as H -import Compiler.Hoopl.GHC (uniqueToLbl)  import FastString  import ForeignCall  import Outputable  import Prelude hiding (succ)  import SMRep (ByteOff) -import StaticFlags -import Unique  import UniqSupply -import Util +import OrdList  #include "HsVersions.h" -{- -A 'CmmAGraph' is an abstract version of a 'Graph CmmNode O x' from module -'Cmm'.  The difference is that the 'CmmAGraph' can be eigher open of closed at -exit and it can supply fresh Labels and Uniques. - -It also supports a splicing operation <*>, which is different from the Hoopl's -<*>, because it splices two CmmAGraphs. Specifically, it can splice Graph -O C and Graph O x. In this case, the open beginning of the second graph is -thrown away.  In the debug mode this sequence is checked to be empty or -containing a branch (see note [Branch follows branch]). - -When an CmmAGraph open at exit is being converted to a CmmGraph, the output -exit sequence is considered unreachable. If the graph consist of one block -only, if it not the case and we crash. Otherwise we just throw the exit -sequence away (and in debug mode we test that it really was unreachable). --} - -{- -Node [Branch follows branch] -============================ -Why do we say it's ok for a Branch to follow a Branch? -Because the standard constructor mkLabel has fall-through -semantics. So if you do a mkLabel, you finish the current block, -giving it a label, and start a new one that branches to that label. -Emitting a Branch at this point is fine: -       goto L1; L2: ...stuff... --} - -data CmmGraphOC = Opened (Graph CmmNode O O) -                | Closed (Graph CmmNode O C) -type CmmAGraph = UniqSM CmmGraphOC     -- Graph open at entry - -{- -MS: I began with -  newtype CmmAGraph = forall x. AG (UniqSM (Graph CmmNode O x)) -but that does not work well, because we cannot take the graph -out of the monad -- we do not know the type of what we would take -out and pattern matching does not help, as we cannot pattern match -on a graph inside the monad. --} -data Transfer = Call | Jump | Ret deriving Eq +----------------------------------------------------------------------------- +-- Building Graphs + + +-- | CmmAGraph is a chunk of code consisting of: +-- +--   * ordinary statements (assignments, stores etc.) +--   * jumps +--   * labels +--   * out-of-line labelled blocks +-- +-- The semantics is that control falls through labels and out-of-line +-- blocks.  Everything after a jump up to the next label is by +-- definition unreachable code, and will be discarded. +-- +-- Two CmmAGraphs can be stuck together with <*>, with the meaning that +-- control flows from the first to the second. +-- +-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends) +-- by providing a label for the entry point; see 'labelAGraph'. +-- +type CmmAGraph = OrdList CgStmt + +data CgStmt +  = CgLabel BlockId +  | CgStmt  (CmmNode O O) +  | CgLast  (CmmNode O C) +  | CgFork  BlockId CmmAGraph + +flattenCmmAGraph :: BlockId -> CmmAGraph -> CmmGraph +flattenCmmAGraph id stmts = +    CmmGraph { g_entry = id, +               g_graph = GMany NothingO body NothingO } +  where +  (block, blocks) = flatten (fromOL stmts) +  entry = blockJoinHead (CmmEntry id) block +  body = foldr addBlock emptyBody (entry:blocks) + +  flatten :: [CgStmt] -> (Block CmmNode O C, [Block CmmNode C C]) +  flatten [] = panic "flatten []" + +  -- A label at the end of a function or fork: this label must not be reachable, +  -- but it might be referred to from another BB that also isn't reachable. +  -- Eliminating these has to be done with a dead-code analysis.  For now, +  -- we just make it into a well-formed block by adding a recursive jump. +  flatten [CgLabel id] +    = (goto_id, [blockJoinHead (CmmEntry id) goto_id] ) +    where goto_id = blockJoinTail emptyBlock (CmmBranch id) + +  -- A jump/branch: throw away all the code up to the next label, because +  -- it is unreachable.  Be careful to keep forks that we find on the way. +  flatten (CgLast stmt : stmts) +    = case dropWhile isOrdinaryStmt stmts of +        [] -> +            ( sing, [] ) +        [CgLabel id] -> +            ( sing, [blockJoin (CmmEntry id) emptyBlock (CmmBranch id)] ) +        (CgLabel id : stmts) -> +            ( sing, blockJoinHead (CmmEntry id) block : blocks ) +            where (block,blocks) = flatten stmts +        (CgFork fork_id stmts : ss) ->  +            flatten (CgFork fork_id stmts : CgLast stmt : ss) +        _ -> panic "MkGraph.flatten" +    where +      sing = blockJoinTail emptyBlock stmt + +  flatten (s:ss) =  +        case s of +          CgStmt stmt -> (blockCons stmt block, blocks) +          CgLabel id  -> (blockJoinTail emptyBlock (CmmBranch id), +                          blockJoinHead (CmmEntry id) block : blocks) +          CgFork fork_id stmts ->  +                (block, blockJoinHead (CmmEntry fork_id) fork_block : fork_blocks ++ blocks) +                where (fork_block, fork_blocks) = flatten (fromOL stmts) +          _ -> panic "MkGraph.flatten" +    where (block,blocks) = flatten ss + +isOrdinaryStmt :: CgStmt -> Bool +isOrdinaryStmt (CgStmt _) = True +isOrdinaryStmt (CgLast _) = True +isOrdinaryStmt _          = False + +  ---------- AGraph manipulation -emptyAGraph    :: CmmAGraph  (<*>)          :: CmmAGraph -> CmmAGraph -> CmmAGraph +(<*>)           = appOL +  catAGraphs     :: [CmmAGraph] -> CmmAGraph +catAGraphs      = concatOL + +-- | created a sequence "goto id; id:" as an AGraph +mkLabel        :: BlockId -> CmmAGraph +mkLabel bid     = unitOL (CgLabel bid) + +-- | creates an open AGraph from a given node +mkMiddle        :: CmmNode O O -> CmmAGraph +mkMiddle middle = unitOL (CgStmt middle) -mkLabel        :: BlockId     -> CmmAGraph  -- created a sequence "goto id; id:" as an AGraph -mkMiddle       :: CmmNode O O -> CmmAGraph  -- creates an open AGraph from a given node -mkLast         :: CmmNode O C -> CmmAGraph  -- created a closed AGraph from a given node +-- | created a closed AGraph from a given node +mkLast         :: CmmNode O C -> CmmAGraph +mkLast last     = unitOL (CgLast last) -withFreshLabel :: String -> (BlockId -> CmmAGraph) -> CmmAGraph -withUnique     :: (Unique -> CmmAGraph) -> CmmAGraph +-- | A labelled code block; should end in a last node +outOfLine      :: BlockId -> CmmAGraph -> CmmAGraph +outOfLine l g   = unitOL (CgFork l g) +-- | allocate a fresh label for the entry point  lgraphOfAGraph :: CmmAGraph -> UniqSM CmmGraph -  -- ^ allocate a fresh label for the entry point +lgraphOfAGraph g = do u <- getUniqueM +                      return (flattenCmmAGraph (mkBlockId u) g) + +-- | use the given BlockId as the label of the entry point  labelAGraph    :: BlockId -> CmmAGraph -> UniqSM CmmGraph -  -- ^ use the given BlockId as the label of the entry point +labelAGraph lbl ag = return (flattenCmmAGraph lbl ag)  ---------- No-ops  mkNop        :: CmmAGraph +mkNop         = nilOL +  mkComment    :: FastString -> CmmAGraph +#ifdef DEBUG +-- SDM: generating all those comments takes time, this saved about 4% for me +mkComment fs  = mkMiddle $ CmmComment fs +#else +mkComment _   = nilOL +#endif  ---------- Assignment and store  mkAssign     :: CmmReg  -> CmmExpr -> CmmAGraph -mkStore      :: CmmExpr -> CmmExpr -> CmmAGraph +mkAssign l r  = mkMiddle $ CmmAssign l r ----------- Calls -mkCall       :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] -> -                  UpdFrameOffset -> CmmAGraph -mkCmmCall    :: CmmExpr ->              [CmmFormal] -> [CmmActual] -> -                  UpdFrameOffset -> CmmAGraph -  -- Native C-- calling convention -mkSafeCall    :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset -> Bool -> CmmAGraph -mkUnsafeCall  :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph -mkFinalCall   :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -  -- Never returns; like exit() or barf() +mkStore      :: CmmExpr -> CmmExpr -> CmmAGraph +mkStore  l r  = mkMiddle $ CmmStore  l r  ---------- Control transfer -mkJump          ::               CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkDirectJump    ::               CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkJumpGC        ::               CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkForeignJump   :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkCbranch       :: CmmExpr -> BlockId -> BlockId          -> CmmAGraph -mkSwitch        :: CmmExpr -> [Maybe BlockId]             -> CmmAGraph +mkJump          :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkJump e actuals updfr_off = +  lastWithArgs Jump Old NativeNodeCall actuals updfr_off $ +    toCall e Nothing updfr_off 0 + +mkDirectJump    :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkDirectJump e actuals updfr_off = +  lastWithArgs Jump Old NativeDirectCall actuals updfr_off $ +    toCall e Nothing updfr_off 0 + +mkJumpGC        :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkJumpGC e actuals updfr_off = +  lastWithArgs Jump Old GC actuals updfr_off $ +    toCall e Nothing updfr_off 0 + +mkForeignJump   :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset +                -> CmmAGraph +mkForeignJump conv e actuals updfr_off = +  mkForeignJumpExtra conv e actuals updfr_off noExtraStack + +mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual] +                -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)]) +                -> CmmAGraph +mkForeignJumpExtra conv e actuals updfr_off extra_stack = +  lastWithArgsAndExtraStack Jump Old conv actuals updfr_off extra_stack $ +    toCall e Nothing updfr_off 0 + +mkCbranch       :: CmmExpr -> BlockId -> BlockId -> CmmAGraph +mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot) + +mkSwitch        :: CmmExpr -> [Maybe BlockId] -> CmmAGraph +mkSwitch e tbl   = mkLast $ CmmSwitch e tbl +  mkReturn        :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturn e actuals updfr_off = +  lastWithArgs Ret  Old NativeReturn actuals updfr_off $ +    toCall e Nothing updfr_off 0 +  mkReturnSimple  :: [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturnSimple actuals updfr_off = +  mkReturn e actuals updfr_off +  where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord  mkBranch        :: BlockId -> CmmAGraph -mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph -mkCmmIfThen     :: CmmExpr -> CmmAGraph -> CmmAGraph -mkCmmWhileDo    :: CmmExpr -> CmmAGraph -> CmmAGraph - -outOfLine       :: CmmAGraph -> CmmAGraph --- ^ The argument is an CmmAGraph that must have an --- empty entry sequence and be closed at the end. --- The result is a new CmmAGraph that is open at the --- end and goes directly from entry to exit, with the --- original graph sitting to the side out-of-line. --- --- Example:  mkMiddle (x = 3) ---           <*> outOfLine (mkLabel L <*> ...stuff...) ---           <*> mkMiddle (y = x) --- Control will flow directly from x=3 to y=x; --- the block starting with L is "on the side". --- --- N.B. algebraically forall g g' : g <*> outOfLine g' == outOfLine g' <*> g +mkBranch bid     = mkLast (CmmBranch bid) + +mkFinalCall   :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset +              -> CmmAGraph +mkFinalCall f _ actuals updfr_off = +  lastWithArgs Call Old NativeDirectCall actuals updfr_off $ +    toCall f Nothing updfr_off 0 + +mkCallReturnsTo :: CmmExpr -> Convention -> [CmmActual] +                -> BlockId +                -> ByteOff +                -> UpdFrameOffset +                -> (ByteOff, [(CmmExpr,ByteOff)]) +                -> CmmAGraph +mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do +  lastWithArgsAndExtraStack Call (Young ret_lbl) callConv actuals +     updfr_off extra_stack $ +       toCall f (Just ret_lbl) updfr_off ret_off + +mkUnsafeCall  :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph +mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as +  -------------------------------------------------------------------------- --- ================ IMPLEMENTATION ================-- - --------------------------------------------------- --- Raw CmmAGraph handling - -emptyAGraph = return $ Opened emptyGraph -ag <*> ah = do g <- ag -               h <- ah -               return (case (g, h) of -                 (Opened g, Opened h) -> Opened $ g H.<*> h -                 (Opened g, Closed h) -> Closed $ g H.<*> h -                 (Closed g, Opened GNil) -> Closed g -                 (Closed g, Opened (GUnit e)) -> note_unreachable e $ Closed g -                 (Closed g, Opened (GMany (JustO e) b x)) -> note_unreachable e $ Opened $ g H.|*><*| GMany NothingO b x -                 (Closed g, Closed (GMany (JustO e) b x)) -> note_unreachable e $ Closed $ g H.|*><*| GMany NothingO b x -                 :: CmmGraphOC) -catAGraphs = foldl (<*>) emptyAGraph - -outOfLine ag = withFreshLabel "outOfLine" $ \l -> -               do g <- ag -                  return (case g of -                    Closed (GMany (JustO e) b _) -> note_unreachable e $ Opened $ -                                                      GMany (JustO $ BLast $ CmmBranch l) b (JustO $ BFirst $ CmmEntry l) -                    _                            -> panic "outOfLine" -                    :: CmmGraphOC) - -note_unreachable :: Block CmmNode O x -> a -> a -note_unreachable block graph = -  ASSERT (block_is_empty_or_label)  -- Note [Branch follows branch] -  graph -  where block_is_empty_or_label :: Bool -        block_is_empty_or_label = case blockToNodeList block of -                                    (NothingC, [], NothingC)            -> True -                                    (NothingC, [], JustC (CmmBranch _)) -> True -                                    _                                   -> False - -mkLabel bid = return $ Opened $ H.mkLast (CmmBranch bid) |*><*| H.mkFirst (CmmEntry bid) -mkMiddle middle = return $ Opened $ H.mkMiddle middle -mkLast last = return $ Closed $ H.mkLast last - -withUnique f = getUniqueM >>= f -withFreshLabel _name f = getUniqueM >>= f . uniqueToLbl . intToUnique . getKey -lgraphOfAGraph g = do u <- getUniqueM -                      labelAGraph (mkBlockId u) g - -labelAGraph lbl ag = do g <- ag -                        return $ CmmGraph {g_entry=lbl, g_graph=H.mkFirst (CmmEntry lbl) H.<*> closed g} -  where closed :: CmmGraphOC -> Graph CmmNode O C -        closed (Closed g) = g -        closed (Opened g@(GMany entry body (JustO exit))) = -          ASSERT (entryLabel exit `notElem` map entryLabel (postorder_dfs g)) -          GMany entry body NothingO -        closed (Opened _) = panic "labelAGraph" - --------------------------------------------------- --- CmmAGraph constructions - -mkNop                     = emptyAGraph -mkComment fs              = mkMiddle $ CmmComment fs -mkStore  l r              = mkMiddle $ CmmStore  l r - --- NEED A COMPILER-DEBUGGING FLAG HERE --- Sanity check: any value assigned to a pointer must be non-zero. --- If it's 0, cause a crash immediately. -mkAssign l r = if opt_StubDeadValues then assign l r <*> check l else assign l r -  where assign l r = mkMiddle (CmmAssign l r) -        check (CmmGlobal _) = mkNop -        check l@(CmmLocal reg) = -- if a ptr arg is NULL, cause a crash! -          if isGcPtrType ty then -            mkCmmIfThen (CmmMachOp (MO_Eq w) [r, stackStubExpr w]) -                        (assign l (CmmLoad (CmmLit (CmmInt 0 w)) ty)) -          else mkNop -            where ty = localRegType reg -                  w  = typeWidth ty -                  r  = CmmReg l  -- Why are we inserting extra blocks that simply branch to the successors?  -- Because in addition to the branch instruction, @mkBranch@ will insert  -- a necessary adjustment to the stack pointer. -mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot) -mkSwitch e tbl            = mkLast $ CmmSwitch e tbl - -mkSafeCall   t fs as upd i = withFreshLabel "safe call" $ body -  where -    body k = -     (    mkStore (CmmStackSlot (CallArea (Young k)) (widthInBytes wordWidth)) -                  (CmmLit (CmmBlock k)) -      <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i}) -      <*> mkLabel k) -mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as -mkBranch bid = mkLast (CmmBranch bid) - -mkCmmIfThenElse e tbranch fbranch = -  withFreshLabel "end of if"     $ \endif -> -  withFreshLabel "start of then" $ \tid -> -  withFreshLabel "start of else" $ \fid -> -    mkCbranch e tid fid <*> -    mkLabel tid <*> tbranch <*> mkBranch endif <*> -    mkLabel fid <*> fbranch <*> mkLabel endif - -mkCmmIfThen e tbranch -  = withFreshLabel "end of if"     $ \endif -> -    withFreshLabel "start of then" $ \tid -> -      mkCbranch e tid endif <*> -      mkLabel tid <*> tbranch <*> mkLabel endif - -mkCmmWhileDo e body = -  withFreshLabel "loop test" $ \test -> -  withFreshLabel "loop head" $ \head -> -  withFreshLabel "end while" $ \endwhile -> -    -- Forrest Baskett's while-loop layout -    mkBranch test <*> mkLabel head <*> body -                  <*> mkLabel test <*> mkCbranch e head endwhile -                  <*> mkLabel endwhile  -- For debugging purposes, we can stub out dead stack slots:  stackStubExpr :: Width -> CmmExpr @@ -286,12 +258,9 @@ stackStubExpr w = CmmLit (CmmInt 0 w)  -- Therefore, for copying arguments and results, we provide different  -- functions to pass the arguments in an overflow area and to pass them in spill slots.  copyInOflow  :: Convention -> Area -> [CmmFormal] -> (Int, CmmAGraph) -copyInSlot   :: Convention -> [CmmFormal] -> [CmmNode O O] -copyOutSlot  :: Convention -> [LocalReg] -> [CmmNode O O]  copyInOflow conv area formals = (offset, catAGraphs $ map mkMiddle nodes)    where (offset, nodes) = copyIn oneCopyOflowI conv area formals -copyInSlot c f = snd $ copyIn oneCopySlotI c (panic "no area for copying to slots") f  type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) ->                            (ByteOff, [CmmNode O O]) @@ -312,104 +281,86 @@ copyIn oflow conv area formals =                  adjust rst x@(_, RegisterParam _) = x : rst  -- Copy-in one arg, using overflow space if needed. -oneCopyOflowI, oneCopySlotI :: SlotCopier +oneCopyOflowI :: SlotCopier  oneCopyOflowI area (reg, off) (n, ms) =    (max n off, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) : ms)    where ty = localRegType reg --- Copy-in one arg, using spill slots if needed -- used for calling conventions at --- a procpoint that is not a return point. The offset is irrelevant here... -oneCopySlotI _ (reg, _) (n, ms) = -  (n, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg) w) ty) : ms) -  where ty = localRegType reg -        w  = widthInBytes (typeWidth ty) - -  -- Factoring out the common parts of the copyout functions yielded something  -- more complicated: -copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset -> -                              (Int, CmmAGraph) +data Transfer = Call | Jump | Ret deriving Eq + +copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] +             -> UpdFrameOffset +             -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff +             -> (Int, CmmAGraph) +  -- Generate code to move the actual parameters into the locations --- required by the calling convention.  This includes a store for the return address. +-- required by the calling convention.  This includes a store for the +-- return address.  -- --- The argument layout function ignores the pointer to the info table, so we slot that --- in here. When copying-out to a young area, we set the info table for return --- and adjust the offsets of the other parameters. --- If this is a call instruction, we adjust the offsets of the other parameters. -copyOutOflow conv transfer area@(CallArea a) actuals updfr_off -  = foldr co (init_offset, emptyAGraph) args' +-- The argument layout function ignores the pointer to the info table, +-- so we slot that in here. When copying-out to a young area, we set +-- the info table for return and adjust the offsets of the other +-- parameters.  If this is a call instruction, we adjust the offsets +-- of the other parameters. +copyOutOflow conv transfer area actuals updfr_off +  (extra_stack_off, extra_stack_stuff) +  = foldr co (init_offset, mkNop) (args' ++ stack_params)    where       co (v, RegisterParam r) (n, ms) = (n, mkAssign (CmmGlobal r) v <*> ms)      co (v, StackParam off)  (n, ms) = (max n off, mkStore (CmmStackSlot area off) v <*> ms) +    stack_params = [ (e, StackParam (off + init_offset)) +                   | (e,off) <- extra_stack_stuff ] +      (setRA, init_offset) = -      case a of Young id -> id `seq` -- Generate a store instruction for -                                     -- the return address if making a call +      case area of +            Young id -> id `seq` -- Generate a store instruction for +           	    	     	     -- the return address if making a call                    if transfer == Call then                      ([(CmmLit (CmmBlock id), StackParam init_offset)],                       widthInBytes wordWidth)                    else ([], 0) -                Old -> ([], updfr_off) +            Old -> ([], updfr_off) + +    arg_offset = init_offset + extra_stack_off      args :: [(CmmExpr, ParamLocation)]   -- The argument and where to put it      args = assignArgumentsPos conv cmmExprType actuals      args' = foldl adjust setRA args -      where adjust rst   (v, StackParam off)  = (v, StackParam (off + init_offset)) : rst +      where adjust rst   (v, StackParam off)  = (v, StackParam (off + arg_offset)) : rst              adjust rst x@(_, RegisterParam _) = x : rst -copyOutOflow _ _ (RegSlot _) _ _ = panic "cannot copy arguments into a register slot" --- Args passed only in registers and stack slots; no overflow space. --- No return address may apply! -copyOutSlot conv actuals = foldr co [] args -  where co (v, RegisterParam r) ms = CmmAssign (CmmGlobal r) (toExp v) : ms -        co (v, StackParam off)  ms = CmmStore  (CmmStackSlot (RegSlot v) off) (toExp v) : ms -        toExp r = CmmReg (CmmLocal r) -        args = assignArgumentsPos conv localRegType actuals  mkCallEntry :: Convention -> [CmmFormal] -> (Int, CmmAGraph) -mkCallEntry conv formals = copyInOflow conv (CallArea Old) formals +mkCallEntry conv formals = copyInOflow conv Old formals -lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset -> -                (ByteOff -> CmmAGraph) -> CmmAGraph +lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] +             -> UpdFrameOffset +             -> (ByteOff -> CmmAGraph) +             -> CmmAGraph  lastWithArgs transfer area conv actuals updfr_off last = -  let (outArgs, copies) = copyOutOflow conv transfer area actuals updfr_off in +  lastWithArgsAndExtraStack transfer area conv actuals +                            updfr_off noExtraStack last + +lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual] +             -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)]) +             -> (ByteOff -> CmmAGraph) +             -> CmmAGraph +lastWithArgsAndExtraStack transfer area conv actuals updfr_off +                          extra_stack last = +  let (outArgs, copies) = copyOutOflow conv transfer area actuals +                             updfr_off extra_stack in    copies <*> last outArgs --- The area created for the jump and return arguments is the same area as the --- procedure entry. -old :: Area -old = CallArea Old -toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff -> CmmAGraph +noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)]) +noExtraStack = (0,[]) + +toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff +       -> CmmAGraph  toCall e cont updfr_off res_space arg_space =    mkLast $ CmmCall e cont arg_space res_space updfr_off -mkJump e actuals updfr_off = -  lastWithArgs Jump old NativeNodeCall actuals updfr_off $ toCall e Nothing updfr_off 0 -mkDirectJump e actuals updfr_off = -  lastWithArgs Jump old NativeDirectCall actuals updfr_off $ toCall e Nothing updfr_off 0 -mkJumpGC e actuals updfr_off = -  lastWithArgs Jump old GC actuals updfr_off $ toCall e Nothing updfr_off 0 -mkForeignJump conv e actuals updfr_off = -  lastWithArgs Jump old conv actuals updfr_off $ toCall e Nothing updfr_off 0 -mkReturn e actuals updfr_off = -  lastWithArgs Ret  old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 -    -- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord -mkReturnSimple actuals updfr_off = -  lastWithArgs Ret  old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 -    where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord - -mkFinalCall f _ actuals updfr_off = -  lastWithArgs Call old NativeDirectCall actuals updfr_off $ toCall f Nothing updfr_off 0 - -mkCmmCall f results actuals = mkCall f (NativeDirectCall, NativeReturn) results actuals - --- I'm dropping the SRT, but that should be okay: we plan to reconstruct it later. -mkCall f (callConv, retConv) results actuals updfr_off = -  withFreshLabel "call successor" $ \k -> -    let area = CallArea $ Young k -        (off, copyin) = copyInOflow retConv area results -        copyout = lastWithArgs Call area callConv actuals updfr_off  -                               (toCall f (Just k) updfr_off off) -    in (copyout <*> mkLabel k <*> copyin) diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs index fc4706c8c4..00bbe6d2ee 100644 --- a/compiler/cmm/OldCmm.hs +++ b/compiler/cmm/OldCmm.hs @@ -9,9 +9,7 @@  module OldCmm (          CmmGroup, GenCmmGroup, RawCmmGroup, CmmDecl, RawCmmDecl,          ListGraph(..), - -        CmmInfo(..), CmmInfoTable(..), ClosureTypeInfo(..), UpdateFrame(..), - +        UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..),          CmmStatic(..), CmmStatics(..), CmmFormal, CmmActual,          cmmMapGraph, cmmTopMapGraph, @@ -52,13 +50,6 @@ import ForeignCall  --     Info Tables  ----------------------------------------------------------------------------- -data CmmInfo -  = CmmInfo -        (Maybe BlockId)     -- GC target. Nothing <=> CPS won't do stack check -                            -- JD: NOT USED BY NEW CODE GEN -        (Maybe UpdateFrame) -- Update frame -        CmmInfoTable        -- Info table -  -- | A frame that is to be pushed before entry to the function.  -- Used to handle 'update' frames.  data UpdateFrame @@ -85,8 +76,8 @@ data UpdateFrame  newtype ListGraph i = ListGraph [GenBasicBlock i]  -- | Cmm with the info table as a data type -type CmmGroup = GenCmmGroup CmmStatics CmmInfo (ListGraph CmmStmt) -type CmmDecl = GenCmmDecl CmmStatics CmmInfo (ListGraph CmmStmt) +type CmmGroup = GenCmmGroup CmmStatics CmmInfoTable (ListGraph CmmStmt) +type CmmDecl = GenCmmDecl CmmStatics CmmInfoTable (ListGraph CmmStmt)  -- | Cmm with the info tables converted to a list of 'CmmStatic' along with the info  -- table label. If we are building without tables-next-to-code there will be no statics @@ -225,16 +216,9 @@ instance UserOfLocalRegs CmmCallTarget where      foldRegsUsed f set (CmmCallee e _)    = foldRegsUsed f set e      foldRegsUsed f set (CmmPrim _ mStmts) = foldRegsUsed f set mStmts -instance UserOfSlots CmmCallTarget where -    foldSlotsUsed f set (CmmCallee e _) = foldSlotsUsed f set e -    foldSlotsUsed _ set (CmmPrim {})    = set -  instance UserOfLocalRegs a => UserOfLocalRegs (CmmHinted a) where      foldRegsUsed f set a = foldRegsUsed f set (hintlessCmm a) -instance UserOfSlots a => UserOfSlots (CmmHinted a) where -    foldSlotsUsed f set a = foldSlotsUsed f set (hintlessCmm a) -  instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmHinted a) where      foldRegsDefd f set a = foldRegsDefd f set (hintlessCmm a) diff --git a/compiler/cmm/OldCmmLint.hs b/compiler/cmm/OldCmmLint.hs new file mode 100644 index 0000000000..72e40ce4f8 --- /dev/null +++ b/compiler/cmm/OldCmmLint.hs @@ -0,0 +1,209 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2004-2006 +-- +-- CmmLint: checking the correctness of Cmm statements and expressions +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -fno-warn-tabs #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and +-- detab the module (please do the detabbing in a separate patch). See +--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces +-- for details + +module OldCmmLint ( +  cmmLint, cmmLintTop +  ) where + +import BlockId +import OldCmm +import CLabel +import Outputable +import OldPprCmm() +import Constants +import FastString +import Platform + +import Data.Maybe + +-- ----------------------------------------------------------------------------- +-- Exported entry points: + +cmmLint :: (Outputable d, Outputable h) +        => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops + +cmmLintTop :: (Outputable d, Outputable h) +           => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top + +runCmmLint :: Outputable a +           => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint _ l p = +   case unCL (l p) of +   Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), +                           nest 2 err, +                           ptext $ sLit ("Program was:"), +                           nest 2 (ppr p)]) +   Right _  -> Nothing + +lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint () +lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks)) +  = addLintInfo (text "in proc " <> pprCLabel platform lbl) $ +        let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks +	in  mapM_ (lintCmmBlock platform labels) blocks + +lintCmmDecl _ (CmmData {}) +  = return () + +lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint () +lintCmmBlock platform labels (BasicBlock id stmts) +  = addLintInfo (text "in basic block " <> ppr id) $ +	mapM_ (lintCmmStmt platform labels) stmts + +-- ----------------------------------------------------------------------------- +-- lintCmmExpr + +-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking +-- byte/word mismatches. + +lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType +lintCmmExpr platform (CmmLoad expr rep) = do +  _ <- lintCmmExpr platform expr +  -- Disabled, if we have the inlining phase before the lint phase, +  -- we can have funny offsets due to pointer tagging. -- EZY +  -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ +  --   cmmCheckWordAddress expr +  return rep +lintCmmExpr platform expr@(CmmMachOp op args) = do +  tys <- mapM (lintCmmExpr platform) args +  if map (typeWidth . cmmExprType) args == machOpArgReps op +  	then cmmCheckMachOp op args tys +	else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) +lintCmmExpr platform (CmmRegOff reg offset) +  = lintCmmExpr platform (CmmMachOp (MO_Add rep) +		[CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) +  where rep = typeWidth (cmmRegType reg) +lintCmmExpr _ expr = +  return (cmmExprType expr) + +-- Check for some common byte/word mismatches (eg. Sp + 1) +cmmCheckMachOp   :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType +cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys +  = cmmCheckMachOp op [reg, lit] tys +cmmCheckMachOp op _ tys +  = return (machOpResultType op tys) + +isOffsetOp :: MachOp -> Bool +isOffsetOp (MO_Add _) = True +isOffsetOp (MO_Sub _) = True +isOffsetOp _ = False + +-- This expression should be an address from which a word can be loaded: +-- check for funny-looking sub-word offsets. +_cmmCheckWordAddress :: CmmExpr -> CmmLint () +_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) +  | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 +  = cmmLintDubiousWordOffset e +_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) +  | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 +  = cmmLintDubiousWordOffset e +_cmmCheckWordAddress _ +  = return () + +-- No warnings for unaligned arithmetic with the node register, +-- which is used to extract fields from tagged constructor closures. +notNodeReg :: CmmExpr -> Bool +notNodeReg (CmmReg reg) | reg == nodeReg = False +notNodeReg _                             = True + +lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint () +lintCmmStmt platform labels = lint +    where lint (CmmNop) = return () +          lint (CmmComment {}) = return () +          lint stmt@(CmmAssign reg expr) = do +            erep <- lintCmmExpr platform expr +	    let reg_ty = cmmRegType reg +            if (erep `cmmEqType_ignoring_ptrhood` reg_ty) +                then return () +                else cmmLintAssignErr stmt erep reg_ty +          lint (CmmStore l r) = do +            _ <- lintCmmExpr platform l +            _ <- lintCmmExpr platform r +            return () +          lint (CmmCall target _res args _) = +              do lintTarget platform labels target +                 mapM_ (lintCmmExpr platform . hintlessCmm) args +          lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e +          lint (CmmSwitch e branches) = do +            mapM_ checkTarget $ catMaybes branches +            erep <- lintCmmExpr platform e +            if (erep `cmmEqType_ignoring_ptrhood` bWord) +              then return () +              else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <> +                               text " :: " <> ppr erep) +          lint (CmmJump e _) = lintCmmExpr platform e >> return () +          lint (CmmReturn) = return () +          lint (CmmBranch id)    = checkTarget id +          checkTarget id = if setMember id labels then return () +                           else cmmLintErr (text "Branch to nonexistent id" <+> ppr id) + +lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint () +lintTarget platform _      (CmmCallee e _) = do _ <- lintCmmExpr platform e +                                                return () +lintTarget _        _      (CmmPrim _ Nothing) = return () +lintTarget platform labels (CmmPrim _ (Just stmts)) +    = mapM_ (lintCmmStmt platform labels) stmts + + +checkCond :: CmmExpr -> CmmLint () +checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return () +checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values +checkCond expr +    = cmmLintErr (hang (text "expression is not a conditional:") 2 +                       (ppr expr)) + +-- ----------------------------------------------------------------------------- +-- CmmLint monad + +-- just a basic error monad: + +newtype CmmLint a = CmmLint { unCL :: Either SDoc a } + +instance Monad CmmLint where +  CmmLint m >>= k = CmmLint $ case m of  +				Left e -> Left e +				Right a -> unCL (k a) +  return a = CmmLint (Right a) + +cmmLintErr :: SDoc -> CmmLint a +cmmLintErr msg = CmmLint (Left msg) + +addLintInfo :: SDoc -> CmmLint a -> CmmLint a +addLintInfo info thing = CmmLint $  +   case unCL thing of +	Left err -> Left (hang info 2 err) +	Right a  -> Right a + +cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a +cmmLintMachOpErr expr argsRep opExpectsRep +     = cmmLintErr (text "in MachOp application: " $$  +					nest 2 (ppr expr) $$ +				        (text "op is expecting: " <+> ppr opExpectsRep) $$ +					(text "arguments provide: " <+> ppr argsRep)) + +cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr stmt e_ty r_ty +  = cmmLintErr (text "in assignment: " $$  +		nest 2 (vcat [ppr stmt,  +			      text "Reg ty:" <+> ppr r_ty, +			      text "Rhs ty:" <+> ppr e_ty])) +			  +					 + +cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a +cmmLintDubiousWordOffset expr +   = cmmLintErr (text "offset is not a multiple of words: " $$ +			nest 2 (ppr expr)) diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs index d6a12221fb..a30be9c6c7 100644 --- a/compiler/cmm/OldPprCmm.hs +++ b/compiler/cmm/OldPprCmm.hs @@ -63,10 +63,6 @@ instance Outputable instr => Outputable (GenBasicBlock instr) where  instance Outputable CmmStmt where      ppr s = pprStmt s -instance Outputable CmmInfo where -    ppr i = pprInfo i - -  -- --------------------------------------------------------------------------  instance Outputable CmmSafety where    ppr CmmUnsafe = ptext (sLit "_unsafe_call_") @@ -74,21 +70,6 @@ instance Outputable CmmSafety where    ppr (CmmSafe srt) = ppr srt  -- -------------------------------------------------------------------------- --- Info tables. The current pretty printer needs refinement --- but will work for now. --- --- For ideas on how to refine it, they used to be printed in the --- style of C--'s 'stackdata' declaration, just inside the proc body, --- and were labelled with the procedure name ++ "_info". -pprInfo :: CmmInfo -> SDoc -pprInfo (CmmInfo _gc_target update_frame info_table) = -    vcat [{-ptext (sLit "gc_target: ") <> -                maybe (ptext (sLit "<none>")) ppr gc_target,-} -          ptext (sLit "update_frame: ") <> -                maybe (ptext (sLit "<none>")) pprUpdateFrame update_frame, -          ppr info_table] - --- --------------------------------------------------------------------------  -- Basic blocks look like assembly blocks.  --      lbl: stmt ; stmt ; ..  pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs index a85b11bcc6..6e968c0b1d 100644 --- a/compiler/cmm/OptimizationFuel.hs +++ b/compiler/cmm/OptimizationFuel.hs @@ -8,10 +8,8 @@  module OptimizationFuel      ( OptimizationFuel, amountOfFuel, tankFilledTo, unlimitedFuel, anyFuelLeft, oneLessFuel      , OptFuelState, initOptFuelState -    , FuelConsumer, FuelUsingMonad, FuelState -    , fuelGet, fuelSet, lastFuelPass, setFuelPass -    , fuelExhausted, fuelDec1, tryWithFuel -    , runFuelIO, runInfiniteFuelIO, fuelConsumingPass +    , FuelConsumer, FuelState +    , runFuelIO, runInfiniteFuelIO      , FuelUniqSM      , liftUniq      ) @@ -62,25 +60,20 @@ anyFuelLeft (OptimizationFuel f) = f > 0  oneLessFuel (OptimizationFuel f) = ASSERT (f > 0) (OptimizationFuel (f - 1))  unlimitedFuel = OptimizationFuel infiniteFuel -data FuelState = FuelState { fs_fuel :: OptimizationFuel, fs_lastpass :: String } -newtype FuelUniqSM a = FUSM { unFUSM :: FuelState -> UniqSM (a, FuelState) } - -fuelConsumingPass :: String -> FuelConsumer a -> FuelUniqSM a -fuelConsumingPass name f = do setFuelPass name -                              fuel <- fuelGet -                              let (a, fuel') = f fuel -                              fuelSet fuel' -                              return a +data FuelState = FuelState { fs_fuel :: {-# UNPACK #-} !OptimizationFuel, +                             fs_lastpass :: String } +newtype FuelUniqSM a = FUSM { unFUSM :: UniqSupply -> FuelState -> (# a, UniqSupply, FuelState #) }  runFuelIO :: OptFuelState -> FuelUniqSM a -> IO a  runFuelIO fs (FUSM f) =      do pass <- readIORef (pass_ref fs)         fuel <- readIORef (fuel_ref fs)         u    <- mkSplitUniqSupply 'u' -       let (a, FuelState fuel' pass') = initUs_ u $ f (FuelState fuel pass) -       writeIORef (pass_ref fs) pass' -       writeIORef (fuel_ref fs) fuel' -       return a +       case f u (FuelState fuel pass) of +          (# a, _, FuelState fuel' pass' #) -> do +            writeIORef (pass_ref fs) pass' +            writeIORef (fuel_ref fs) fuel' +            return a  -- ToDo: Do we need the pass_ref when we are doing infinite fueld  -- transformations? @@ -88,21 +81,32 @@ runInfiniteFuelIO :: OptFuelState -> FuelUniqSM a -> IO a  runInfiniteFuelIO fs (FUSM f) =      do pass <- readIORef (pass_ref fs)         u <- mkSplitUniqSupply 'u' -       let (a, FuelState _ pass') = initUs_ u $ f (FuelState unlimitedFuel pass) -       writeIORef (pass_ref fs) pass' -       return a +       case f u (FuelState unlimitedFuel pass) of +          (# a, _, FuelState _fuel pass' #) -> do +            writeIORef (pass_ref fs) pass' +            return a  instance Monad FuelUniqSM where -  FUSM f >>= k = FUSM (\s -> f s >>= \(a, s') -> unFUSM (k a) s') -  return a     = FUSM (\s -> return (a, s)) +  FUSM f >>= k = FUSM (\u s -> case f u s of (# a, u', s' #) -> +                                                unFUSM (k a) u' s') +  return a     = FUSM (\u s -> (# a, u, s #))  instance MonadUnique FuelUniqSM where -    getUniqueSupplyM = liftUniq getUniqueSupplyM -    getUniqueM       = liftUniq getUniqueM -    getUniquesM      = liftUniq getUniquesM +    getUniqueSupplyM = +       FUSM $ \us f -> case splitUniqSupply us of +                         (us1,us2) -> (# us1, us2, f #) + +    getUniqueM = +       FUSM $ \us f -> case splitUniqSupply us of +                         (us1,us2) -> (# uniqFromSupply us1, us2, f #) + +    getUniquesM = +       FUSM $ \us f -> case splitUniqSupply us of +                         (us1,us2) -> (# uniqsFromSupply us1, us2, f #) +  liftUniq :: UniqSM x -> FuelUniqSM x -liftUniq x = FUSM (\s -> x >>= (\u -> return (u, s))) +liftUniq x = FUSM (\u s -> case initUs u x of (a,u') -> (# a, u', s #))  class Monad m => FuelUsingMonad m where    fuelGet      :: m OptimizationFuel @@ -110,25 +114,14 @@ class Monad m => FuelUsingMonad m where    lastFuelPass :: m String    setFuelPass  :: String -> m () -fuelExhausted :: FuelUsingMonad m => m Bool -fuelExhausted = fuelGet >>= return . anyFuelLeft - -fuelDec1 :: FuelUsingMonad m => m () -fuelDec1 = fuelGet >>= fuelSet . oneLessFuel - -tryWithFuel :: FuelUsingMonad m => a -> m (Maybe a) -tryWithFuel r = do f <- fuelGet -                   if anyFuelLeft f then fuelSet (oneLessFuel f) >> return (Just r) -                                    else return Nothing -  instance FuelUsingMonad FuelUniqSM where    fuelGet          = extract fs_fuel    lastFuelPass     = extract fs_lastpass -  fuelSet fuel     = FUSM (\s -> return ((), s { fs_fuel     = fuel })) -  setFuelPass pass = FUSM (\s -> return ((), s { fs_lastpass = pass })) +  fuelSet fuel     = FUSM (\u s -> (# (), u, s { fs_fuel     = fuel } #)) +  setFuelPass pass = FUSM (\u s -> (# (), u, s { fs_lastpass = pass } #))  extract :: (FuelState -> a) -> FuelUniqSM a -extract f = FUSM (\s -> return (f s, s)) +extract f = FUSM (\u s -> (# f s, u, s #))  instance FuelMonad FuelUniqSM where    getFuel = liftM amountOfFuel fuelGet @@ -137,6 +130,6 @@ instance FuelMonad FuelUniqSM where  -- Don't bother to checkpoint the unique supply; it doesn't matter  instance CheckpointMonad FuelUniqSM where      type Checkpoint FuelUniqSM = FuelState -    checkpoint = FUSM $ \fuel -> return (fuel, fuel)  -    restart fuel = FUSM $ \_ -> return ((), fuel) +    checkpoint = FUSM $ \u fuel -> (# fuel, u, fuel #) +    restart fuel = FUSM $ \u _ -> (# (), u, fuel #) diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 183708c08e..dee6ee881e 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -146,8 +146,6 @@ pprConvention  Slow                 = text "<slow-convention>"  pprConvention  GC                   = text "<gc-convention>"  pprConvention  PrimOpCall           = text "<primop-call-convention>"  pprConvention  PrimOpReturn         = text "<primop-ret-convention>" -pprConvention (Foreign c)           = ppr c -pprConvention (Private {})          = text "<private-convention>"  pprForeignConvention :: ForeignConvention -> SDoc  pprForeignConvention (ForeignConvention c as rs) = ppr c <> ppr as <> ppr rs diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 7503127555..119f2b7239 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -237,12 +237,8 @@ pprLocalReg (LocalReg uniq rep)  -- Stack areas  pprArea :: Area -> SDoc -pprArea (RegSlot r)   = hcat [ text "slot<", ppr r, text ">" ] -pprArea (CallArea id) = pprAreaId id - -pprAreaId :: AreaId -> SDoc -pprAreaId Old        = text "old" -pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ] +pprArea Old        = text "old" +pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ]  -- needs to be kept in syn with CmmExpr.hs.GlobalReg  -- diff --git a/compiler/cmm/SMRep.lhs b/compiler/cmm/SMRep.lhs index ce30799bf6..8b3308ef97 100644 --- a/compiler/cmm/SMRep.lhs +++ b/compiler/cmm/SMRep.lhs @@ -21,6 +21,7 @@ module SMRep (  	StgWord, StgHalfWord,   	hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS,  	WordOff, ByteOff, +        roundUpToWords,          -- * Closure repesentation          SMRep(..),	-- CmmInfo sees the rep; no one else does @@ -57,6 +58,7 @@ import FastString  import Data.Char( ord )  import Data.Word +import Data.Bits  \end{code} @@ -69,6 +71,9 @@ import Data.Word  \begin{code}  type WordOff = Int	-- Word offset, or word count  type ByteOff = Int	-- Byte offset, or byte count + +roundUpToWords :: ByteOff -> ByteOff +roundUpToWords n = (n + (wORD_SIZE - 1)) .&. (complement (wORD_SIZE - 1))  \end{code}  StgWord is a type representing an StgWord on the target platform. @@ -93,6 +98,7 @@ hALF_WORD_SIZE_IN_BITS = 32  #endif  \end{code} +  %************************************************************************  %*									*  \subsubsection[SMRep-datatype]{@SMRep@---storage manager representation} diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes index 81882c8c0e..0e6a2341f2 100644 --- a/compiler/cmm/cmm-notes +++ b/compiler/cmm/cmm-notes @@ -24,27 +24,10 @@ More notes (June 11)  * Check in ClosureInfo:
       -- NB: Results here should line up with the results of SMRep.rtsClosureType
 -* Possible refactoring: Nuke AGraph in favour of 
 -      mkIfThenElse :: Expr -> Graph -> Graph -> FCode Graph
 -  or even
 -      mkIfThenElse :: HasUniques m => Expr -> Graph -> Graph -> m Graph
 -  (Remmber that the .cmm file parser must use this function)
 -
 -  or parameterise FCode over its envt; the CgState part seem useful for both
 -
  * "Remove redundant reloads" in CmmSpillReload should be redundant; since
    insertLateReloads is now gone, every reload is reloading a live variable.
    Test and nuke.
 -* Stack layout is very like register assignment: find non-conflicting assigments.
 -  In particular we can use colouring or linear scan (etc).
 -
 -  We'd fine-grain interference (on a word by word basis) to get maximum overlap.
 -  But that may make very big interference graphs.  So linear scan might be
 -  more attactive.
 -
 -  NB: linear scan does on-the-fly live range splitting.
 -
  * When stubbing dead slots be careful not to write into an area that
    overlaps with an area that's in use.  So stubbing needs to *follow* 
    stack layout.
 @@ -81,10 +64,6 @@ Things to do:     Old.Cmm. We should abstract it to work on both representations, it needs only to
     convert a CmmInfoTable to [CmmStatic].
 - - The MkGraph currenty uses a different semantics for <*> than Hoopl. Maybe
 -   we could convert codeGen/StgCmm* clients to the Hoopl's semantics?
 -   It's all deeply unsatisfactory.
 -
   - Improve performance of Hoopl.
     A nofib comparison of -fasm vs -fnewcodegen nofib compilation parameters
 @@ -101,18 +80,12 @@ Things to do:     When compiling nofib, ghc-head + libraries compiled with -fnew-codegen
     is 31.4% slower (http://fox.auryn.cz/msrc/0517_hoopl/32bit.oldghcoldgen.zipghcoldgen.txt).
 -   So we generate a bit better code, but it takes us longer!
 -
     EZY: Also importantly, Hoopl uses dramatically more memory than the
     old code generator.
   - Are all blockToNodeList and blockOfNodeList really needed? Maybe we could
     splice blocks instead?
 -   In the CmmContFlowOpt.blockConcat, using Dataflow seems too clumsy. Still,
 -   a block catenation function would be probably nicer than blockToNodeList
 -   / blockOfNodeList combo.
 -
   - lowerSafeForeignCall seems too lowlevel. Just use Dataflow. After that
     delete splitEntrySeq from HooplUtils.
 @@ -129,10 +102,6 @@ Things to do:   - NB that CmmProcPoint line 283 has a hack that works around a GADT-related
     bug in 6.10.
 - - SDM (2010-02-26) can we remove the Foreign constructor from Convention?
 -   Reason: we never generate code for a function with the Foreign
 -   calling convention, and the code for calling foreign calls is generated
 -
   - AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
     EZY (2011-04-16): The mini-inliner has been generalized and ported,
     but the constant folding and other optimizations need to still be
 @@ -161,16 +130,6 @@ Things to do:   - Top-level SRT threading is a bit ugly
 - - Add type/newtype for CmmModule = [CmmGroup]    -- A module
 -                        CmmGroup  = [CmmTop]      -- A .o file
 -                        CmmTop    = Proc | Data   -- A procedure or data
 -
 - - This is a *change*: currently a CmmGroup is one function's-worth of code
 -   regardless of SplitObjs.   Question: can we *always* generate M.o if there
 -   is just one element in the list (rather than M/M1.o, M/M2.o etc)
 -
 -   One SRT per group.
 -
   - See "CAFs" below; we want to totally refactor the way SRTs are calculated
   - Pull out Areas into its own module
 diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 6c77255a62..7cdb1b6f7e 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -36,7 +36,7 @@ import CgBindery  import CgCallConv  import CgUtils  import CgMonad -import CmmBuildInfoTables +import CmmUtils  import OldCmm  import CLabel @@ -66,10 +66,9 @@ emitClosureCodeAndInfoTable cl_info args body  -- Convert from 'ClosureInfo' to 'CmmInfo'.  -- Not used for return points.  (The 'smRepClosureTypeInt' call would panic.) -mkCmmInfo :: ClosureInfo -> FCode CmmInfo +mkCmmInfo :: ClosureInfo -> FCode CmmInfoTable  mkCmmInfo cl_info -  = return (CmmInfo gc_target Nothing $ -            CmmInfoTable { cit_lbl  = infoTableLabelFromCI cl_info, +  = return (CmmInfoTable { cit_lbl  = infoTableLabelFromCI cl_info,                     	   cit_rep  = closureSMRep cl_info,                     	   cit_prof = prof,                     	   cit_srt  = closureSRT cl_info }) @@ -79,14 +78,6 @@ mkCmmInfo cl_info      ty_descr_w8  = stringToWord8s (closureTypeDescr cl_info)      val_descr_w8 = stringToWord8s (closureValDescr cl_info) -    -- The gc_target is to inform the CPS pass when it inserts a stack check. -    -- Since that pass isn't used yet we'll punt for now. -    -- When the CPS pass is fully integrated, this should -    -- be replaced by the label that any heap check jumped to, -    -- so that branch can be shared by both the heap (from codeGen) -    -- and stack checks (from the CPS pass). -    gc_target = panic "TODO: gc_target" -  -------------------------------------------------------------------------  --  --	Generating the info table and code for a return point @@ -105,8 +96,7 @@ emitReturnTarget name stmts  	; blks <- cgStmtsToBlocks stmts          ; frame <- mkStackLayout          ; let smrep    = mkStackRep (mkLiveness frame) -              info     = CmmInfo gc_target Nothing info_tbl -              info_tbl = CmmInfoTable { cit_lbl  = info_lbl +              info     = CmmInfoTable { cit_lbl  = info_lbl                                        , cit_prof = NoProfilingInfo                                        , cit_rep  = smrep                                        , cit_srt  = srt_info } @@ -118,14 +108,6 @@ emitReturnTarget name stmts      info_lbl  = mkReturnInfoLabel uniq      entry_lbl = mkReturnPtLabel uniq -    -- The gc_target is to inform the CPS pass when it inserts a stack check. -    -- Since that pass isn't used yet we'll punt for now. -    -- When the CPS pass is fully integrated, this should -    -- be replaced by the label that any heap check jumped to, -    -- so that branch can be shared by both the heap (from codeGen) -    -- and stack checks (from the CPS pass). -    gc_target = panic "TODO: gc_target" -  -- Build stack layout information from the state of the 'FCode' monad.  -- Should go away once 'codeGen' starts using the CPS conversion  -- pass to handle the stack.  Until then, this is really just @@ -375,8 +357,8 @@ funInfoTable info_ptr  emitInfoTableAndCode   	:: CLabel 		-- Label of entry or ret -	-> CmmInfo 		-- ...the info table -	-> [CmmFormal]	-- ...args +        -> CmmInfoTable         -- ...the info table +        -> [CmmFormal]          -- ...args  	-> [CmmBasicBlock]	-- ...and body  	-> Code diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs index b96898f591..71da9e9ae0 100644 --- a/compiler/codeGen/CgMonad.lhs +++ b/compiler/codeGen/CgMonad.lhs @@ -13,8 +13,8 @@ stuff fits into the Big Picture.  module CgMonad (          Code, FCode, -        initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, -        returnFC, fixC, fixC_, checkedAbsC, +        initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, +        returnFC, fixC, fixC_, checkedAbsC,           stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC,          newUnique, newUniqSupply, @@ -386,11 +386,12 @@ instance Monad FCode where  {-# INLINE thenFC #-}  {-# INLINE returnFC #-} -initC :: DynFlags -> Module -> FCode a -> IO a -initC dflags mod (FCode code) = do -    uniqs <- mkSplitUniqSupply 'c' -    case code (initCgInfoDown dflags mod) (initCgState uniqs) of -        (res, _) -> return res +initC :: IO CgState +initC  = do { uniqs <- mkSplitUniqSupply 'c' +            ; return (initCgState uniqs) } + +runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) +runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st  returnFC :: a -> FCode a  returnFC val = FCode $ \_ state -> (val, state) @@ -708,7 +709,7 @@ emitDecl decl = do      state <- getState      setState $ state { cgs_tops = cgs_tops state `snocOL` decl } -emitProc :: CmmInfo -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code +emitProc :: CmmInfoTable -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code  emitProc info lbl [] blocks = do      let proc_block = CmmProc info lbl (ListGraph blocks)      state <- getState @@ -720,7 +721,7 @@ emitSimpleProc :: CLabel -> Code -> Code  emitSimpleProc lbl code = do      stmts <- getCgStmts code      blks <- cgStmtsToBlocks stmts -    emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] blks +    emitProc CmmNonInfoTable lbl [] blks  -- Get all the CmmTops (there should be no stmts)  -- Return a single Cmm which may be split from other Cmms by diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs index ce12d43bbf..9c936d3281 100644 --- a/compiler/codeGen/CodeGen.lhs +++ b/compiler/codeGen/CodeGen.lhs @@ -45,7 +45,13 @@ import TyCon  import Module  import ErrUtils  import Panic -import Util +import Outputable + +import OrdList +import Stream (Stream, liftIO) +import qualified Stream + +import Data.IORef  codeGen :: DynFlags          -> Module                     -- Module we are compiling @@ -53,32 +59,37 @@ codeGen :: DynFlags          -> CollectedCCs               -- (Local/global) cost-centres needing declaring/registering.          -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs          -> HpcInfo                    -- Profiling info -        -> IO [CmmGroup] +        -> Stream IO CmmGroup ()                -- N.B. returning '[Cmm]' and not 'Cmm' here makes it                -- possible for object splitting to split up the                -- pieces later. -codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info = do -    showPass dflags "CodeGen" -    code_stuff <- -        initC dflags this_mod $ do -            cmm_binds  <- mapM (getCmm . cgTopBinding dflags) stg_binds -            cmm_tycons <- mapM cgTyCon data_tycons -            cmm_init   <- getCmm (mkModuleInit dflags cost_centre_info this_mod hpc_info) -            return (cmm_init : cmm_binds ++ cmm_tycons) -                -- Put datatype_stuff after code_stuff, because the -                -- datatype closure table (for enumeration types) to -                -- (say) PrelBase_True_closure, which is defined in -                -- code_stuff - -                -- Note [codegen-split-init] the cmm_init block must -                -- come FIRST.  This is because when -split-objs is on -                -- we need to combine this block with its -                -- initialisation routines; see Note -                -- [pipeline-split-init]. - -    dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff) -    return code_stuff +codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info + +   = do { liftIO $ showPass dflags "CodeGen" + +        ; cgref <- liftIO $ newIORef =<< initC +        ; let cg :: FCode CmmGroup -> Stream IO CmmGroup () +              cg fcode = do +                cmm <- liftIO $ do +                         st <- readIORef cgref +                         let (a,st') = runC dflags this_mod st fcode + +                         dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" $ ppr a + +                         -- NB. stub-out cgs_tops and cgs_stmts.  This fixes +                         -- a big space leak.  DO NOT REMOVE! +                         writeIORef cgref $! st'{ cgs_tops = nilOL, +                                                  cgs_stmts = nilOL } +                         return a +                Stream.yield cmm + +        ; cg (getCmm $ mkModuleInit dflags cost_centre_info this_mod hpc_info) + +        ; mapM_ (cg . getCmm . cgTopBinding dflags) stg_binds + +        ; mapM_ (cg . cgTyCon) data_tycons +        }  mkModuleInit          :: DynFlags diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index 17a7062559..696af8107e 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -46,6 +46,13 @@ import TyCon  import Module  import ErrUtils  import Outputable +import Stream + +import OrdList +import MkGraph + +import Data.IORef +import Control.Monad (when)  import Util  codeGen :: DynFlags @@ -54,39 +61,51 @@ codeGen :: DynFlags           -> CollectedCCs                -- (Local/global) cost-centres needing declaring/registering.  	 -> [(StgBinding,[(Id,[Id])])]	-- Bindings to convert, with SRTs  	 -> HpcInfo -         -> IO [CmmGroup]         -- Output +         -> Stream IO CmmGroup ()       -- Output as a stream, so codegen can +                                        -- be interleaved with output  codeGen dflags this_mod data_tycons          cost_centre_info stg_binds hpc_info -  = do  { showPass dflags "New CodeGen" - --- Why? ---   ; mapM_ (\x -> seq x (return ())) data_tycons - -        ; code_stuff <- initC dflags this_mod $ do  -                { cmm_binds  <- mapM (getCmm . cgTopBinding dflags) stg_binds -                ; cmm_tycons <- mapM cgTyCon data_tycons -                ; cmm_init   <- getCmm (mkModuleInit cost_centre_info -                                             this_mod hpc_info) -                ; return (cmm_init : cmm_binds ++ cmm_tycons) -                } +  = do  { liftIO $ showPass dflags "New CodeGen" + +              -- cg: run the code generator, and yield the resulting CmmGroup +              -- Using an IORef to store the state is a bit crude, but otherwise +              -- we would need to add a state monad layer. +        ; cgref <- liftIO $ newIORef =<< initC +        ; let cg :: FCode () -> Stream IO CmmGroup () +              cg fcode = do +                cmm <- liftIO $ do +                         st <- readIORef cgref +                         let (a,st') = runC dflags this_mod st (getCmm fcode) + +                         -- NB. stub-out cgs_tops and cgs_stmts.  This fixes +                         -- a big space leak.  DO NOT REMOVE! +                         writeIORef cgref $! st'{ cgs_tops = nilOL, +                                                  cgs_stmts = mkNop } +                         return a +                yield cmm + +               -- Note [codegen-split-init] the cmm_init block must come +               -- FIRST.  This is because when -split-objs is on we need to +               -- combine this block with its initialisation routines; see +               -- Note [pipeline-split-init]. +        ; cg (mkModuleInit cost_centre_info this_mod hpc_info) + +        ; mapM_ (cg . cgTopBinding dflags) stg_binds +                  -- Put datatype_stuff after code_stuff, because the                  -- datatype closure table (for enumeration types) to                  -- (say) PrelBase_True_closure, which is defined in                  -- code_stuff - -                -- N.B. returning '[Cmm]' and not 'Cmm' here makes it -                -- possible for object splitting to split up the -                -- pieces later. - -                -- Note [codegen-split-init] the cmm_init block must -                -- come FIRST.  This is because when -split-objs is on -                -- we need to combine this block with its -                -- initialisation routines; see Note -                -- [pipeline-split-init]. - -        ; return code_stuff } - +        ; let do_tycon tycon = do +                -- Generate a table of static closures for an +                -- enumeration type Note that the closure pointers are +                -- tagged. +                 when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon) +                 mapM_ (cg . cgDataCon) (tyConDataCons tycon) + +        ; mapM_ do_tycon data_tycons +        }  ---------------------------------------------------------------  --	Top-level bindings @@ -108,7 +127,7 @@ cgTopBinding dflags (StgNonRec id rhs, _srts)  	; info <- cgTopRhs id' rhs  	; addBindC (cg_id info) info -- Add the *un-externalised* Id to the envt,  				     -- so we find it when we look up occurrences -	} +        }  cgTopBinding dflags (StgRec pairs, _srts)    = do	{ let (bndrs, rhss) = unzip pairs @@ -117,7 +136,7 @@ cgTopBinding dflags (StgRec pairs, _srts)  	; fixC_(\ new_binds -> do   		{ addBindsC new_binds  		; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) -	; return () } +        ; return () }  -- Urgh!  I tried moving the forkStatics call from the rhss of cgTopRhs  -- to enclose the listFCs in cgTopBinding, but that tickled the @@ -187,65 +206,19 @@ mkModuleInit cost_centre_info this_mod hpc_info          ; emitDecl (CmmData Data (Statics (mkPlainModuleInitLabel this_mod) []))          } +  ---------------------------------------------------------------  --	Generating static stuff for algebraic data types  --------------------------------------------------------------- -{-	[These comments are rather out of date] - -Macro  		    	     Kind of constructor -CONST_INFO_TABLE@	Zero arity (no info -- compiler uses static closure) -CHARLIKE_INFO_TABLE	Charlike   (no info -- compiler indexes fixed array) -INTLIKE_INFO_TABLE	Intlike; the one macro generates both info tbls -SPEC_INFO_TABLE		SPECish, and bigger than or equal to MIN_UPD_SIZE -GEN_INFO_TABLE		GENish (hence bigger than or equal to MIN_UPD_SIZE@) -Possible info tables for constructor con: - -* _con_info: -  Used for dynamically let(rec)-bound occurrences of -  the constructor, and for updates.  For constructors -  which are int-like, char-like or nullary, when GC occurs, -  the closure tries to get rid of itself. - -* _static_info: -  Static occurrences of the constructor macro: STATIC_INFO_TABLE. - -For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; -it's place is taken by the top level defn of the constructor. - -For charlike and intlike closures there is a fixed array of static -closures predeclared. --} - -cgTyCon :: TyCon -> FCode CmmGroup  -- All constructors merged together -cgTyCon tycon -  = do	{ constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) - -	    -- Generate a table of static closures for an enumeration type -	    -- Put the table after the data constructor decls, because the -	    -- datatype closure table (for enumeration types) -	    -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff -            -- Note that the closure pointers are tagged. - -            -- N.B. comment says to put table after constructor decls, but -            -- code puts it before --- NR 16 Aug 2007 -	; extra <- cgEnumerationTyCon tycon - -        ; return (concat (extra ++ constrs)) -        } - -cgEnumerationTyCon :: TyCon -> FCode [CmmGroup] +cgEnumerationTyCon :: TyCon -> FCode ()  cgEnumerationTyCon tycon -  | isEnumerationTyCon tycon -  = do	{ tbl <- getCmm $  -		 emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) -	      	   [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)  -				 (tagForCon con) -    	      	   | con <- tyConDataCons tycon] -	; return [tbl] } -  | otherwise -  = return [] +  = emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) +             [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) +                           (tagForCon con) +             | con <- tyConDataCons tycon] +  cgDataCon :: DataCon -> FCode ()  -- Generate the entry code, info tables, and (for niladic constructor) diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 3b166e3b6a..f98283f737 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -76,17 +76,17 @@ cgTopRhsClosure :: Id  cgTopRhsClosure id ccs _ upd_flag srt args body = do    {	-- LAY OUT THE OBJECT      let name = idName id -  ; lf_info  <- mkClosureLFInfo id TopLevel [] upd_flag args -  ; srt_info <- getSRTInfo srt +  ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args +  ; has_srt <- getSRTInfo srt    ; mod_name <- getModuleName    ; dflags   <- getDynFlags    ; let descr         = closureDescription dflags mod_name name -	closure_info  = mkClosureInfo True id lf_info 0 0 srt_info descr -	closure_label = mkLocalClosureLabel name (idCafInfo id) +        closure_info  = mkClosureInfo True id lf_info 0 0 descr +        closure_label = mkLocalClosureLabel name (idCafInfo id)      	cg_id_info    = litIdInfo id lf_info (CmmLabel closure_label)          caffy         = idCafInfo id          info_tbl      = mkCmmInfo closure_info -- XXX short-cut -        closure_rep   = mkStaticClosureFields info_tbl ccs caffy [] +        closure_rep   = mkStaticClosureFields info_tbl ccs caffy has_srt []    	 -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY)    ; emitDataLits closure_label closure_rep @@ -110,7 +110,7 @@ cgBind (StgNonRec name rhs)          ; emit (init <*> body) }  cgBind (StgRec pairs) -  = do	{ ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits -> +  = do  { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits ->                 do { addBindsC $ fst new_binds_inits -- avoid premature deconstruction                    ; liftM unzip $ listFCs [ cgRhs b e | (b,e) <- pairs ] })         ; addBindsC new_binds @@ -162,8 +162,8 @@ cgRhs :: Id -> StgRhs -> FCode (CgIdInfo, CmmAGraph)  cgRhs name (StgRhsCon cc con args)    = buildDynCon name cc con args -cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) -  = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body +cgRhs name (StgRhsClosure cc bi fvs upd_flag _srt args body) +  = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag args body  ------------------------------------------------------------------------  --		Non-constructor right hand sides @@ -171,7 +171,7 @@ cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body)  mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo  	     -> [NonVoid Id]			-- Free vars -	     -> UpdateFlag -> SRT +             -> UpdateFlag  	     -> [Id]			        -- Args  	     -> StgExpr  	     -> FCode (CgIdInfo, CmmAGraph) @@ -215,8 +215,7 @@ for semi-obvious reasons.  mkRhsClosure	bndr cc bi  		[NonVoid the_fv]   		-- Just one free var  		upd_flag		-- Updatable thunk -		_srt -		[]			-- A thunk +                []                      -- A thunk  		body@(StgCase (StgApp scrutinee [{-no args-}])  		      _ _ _ _   -- ignore uniq, etc.  		      (AlgAlt _) @@ -247,8 +246,7 @@ mkRhsClosure	bndr cc bi  mkRhsClosure    bndr cc bi  		fvs  		upd_flag -		_srt -		[]			-- No args; a thunk +                []                      -- No args; a thunk  		body@(StgApp fun_id args)    | args `lengthIs` (arity-1) @@ -269,7 +267,7 @@ mkRhsClosure    bndr cc bi  	arity 	= length fvs  ---------- Default case ------------------ -mkRhsClosure bndr cc _ fvs upd_flag srt args body +mkRhsClosure bndr cc _ fvs upd_flag args body    = do	{ 	-- LAY OUT THE OBJECT  	-- If the binder is itself a free variable, then don't store  	-- it in the closure.  Instead, just bind it to Node on entry. @@ -288,17 +286,16 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body  	-- MAKE CLOSURE INFO FOR THIS CLOSURE  	; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args  	; mod_name <- getModuleName -	; c_srt <- getSRTInfo srt -	; dflags <- getDynFlags -	; let	name  = idName bndr -		descr = closureDescription dflags mod_name name -		fv_details :: [(NonVoid Id, VirtualHpOffset)] +        ; dflags <- getDynFlags +        ; let   name  = idName bndr +                descr = closureDescription dflags mod_name name +                fv_details :: [(NonVoid Id, VirtualHpOffset)]  		(tot_wds, ptr_wds, fv_details)  		   = mkVirtHeapOffsets (isLFThunk lf_info)  				       (addIdReps (map stripNV reduced_fvs))  		closure_info = mkClosureInfo False	-- Not static  					     bndr lf_info tot_wds ptr_wds -					     c_srt descr +                                             descr  	-- BUILD ITS INFO TABLE AND CODE  	; forkClosureBody $ @@ -345,8 +342,7 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload  	descr = closureDescription dflags mod_name (idName bndr)  	closure_info = mkClosureInfo False 	-- Not static  				     bndr lf_info tot_wds ptr_wds -				     NoC_SRT	-- No SRT for a std-form closure -				     descr +                                     descr  --  ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body    ; let use_cc = curCCS; blame_cc = curCCS @@ -546,10 +542,10 @@ emitBlackHoleCode is_single_entry = do    whenC eager_blackholing $ do      tickyBlackHole (not is_single_entry) -    emit (mkStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize) -                  (CmmReg (CmmGlobal CurrentTSO))) +    emitStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize) +                  (CmmReg (CmmGlobal CurrentTSO))      emitPrimCall [] MO_WriteBarrier [] -    emit (mkStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo))) +    emitStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo))  setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()  	-- Nota Bene: this function does not change Node (even if it's a CAF), @@ -598,7 +594,7 @@ pushUpdateFrame es body         offset <- foldM push updfr es         withUpdFrameOff offset body       where push off e = -             do emit (mkStore (CmmStackSlot (CallArea Old) base) e) +             do emitStore (CmmStackSlot Old base) e                  return base               where base = off + widthInBytes (cmmExprWidth e) @@ -666,13 +662,14 @@ link_caf _is_upd = do          -- node is live, so save it.    -- see Note [atomic CAF entry] in rts/sm/Storage.c -  ; emit $ mkCmmIfThen -      (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit]) $ +  ; updfr  <- getUpdFrameOff +  ; emit =<< mkCmmIfThen +      (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit])          -- re-enter R1.  Doing this directly is slightly dodgy; we're          -- assuming lots of things, like the stack pointer hasn't          -- moved since we entered the CAF. -        let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in -        mkJump target [] 0 +       (let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in +        mkJump target [] updfr)    ; return hp_rel } diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 483a67c1fa..8023abddec 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -657,7 +657,6 @@ data ClosureInfo            -- the rest is just an unpacked CmmInfoTable.          closureInfoLabel :: !CLabel,          closureSMRep     :: !SMRep,          -- representation used by storage mgr -        closureSRT       :: !C_SRT,          -- What SRT applies to this closure          closureProf      :: !ProfilingInfo      } @@ -667,7 +666,7 @@ mkCmmInfo ClosureInfo {..}    = CmmInfoTable { cit_lbl  = closureInfoLabel                   , cit_rep  = closureSMRep                   , cit_prof = closureProf -                 , cit_srt  = closureSRT } +                 , cit_srt  = NoC_SRT }  -------------------------------------- @@ -678,16 +677,14 @@ mkClosureInfo :: Bool		-- Is static  	      -> Id  	      -> LambdaFormInfo   	      -> Int -> Int	-- Total and pointer words -	      -> C_SRT -	      -> String		-- String descriptor +              -> String         -- String descriptor  	      -> ClosureInfo -mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info val_descr +mkClosureInfo is_static id lf_info tot_wds ptr_wds val_descr    = ClosureInfo { closureName      = name,                    closureLFInfo    = lf_info, -                  closureInfoLabel = info_lbl, -                  closureSMRep     = sm_rep,    -- These four fields are a -                  closureSRT       = srt_info,  --        CmmInfoTable -                  closureProf      = prof }     -- --- +                  closureInfoLabel = info_lbl,  -- These three fields are +                  closureSMRep     = sm_rep,    -- (almost) an info table +                  closureProf      = prof }     -- (we don't have an SRT yet)    where      name       = idName id      sm_rep     = mkHeapRep is_static ptr_wds nonptr_wds (lfClosureType lf_info) @@ -920,15 +917,21 @@ cafBlackHoleInfoTable                   , cit_prof = NoProfilingInfo                   , cit_srt  = NoC_SRT } -staticClosureNeedsLink :: CmmInfoTable -> Bool +staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool  -- A static closure needs a link field to aid the GC when traversing  -- the static closure graph.  But it only needs such a field if either  -- 	a) it has an SRT  --	b) it's a constructor with one or more pointer fields  -- In case (b), the constructor's fields themselves play the role  -- of the SRT. -staticClosureNeedsLink info_tbl@CmmInfoTable{ cit_rep = smrep } +-- +-- At this point, the cit_srt field has not been calculated (that +-- happens right at the end of the Cmm pipeline), but we do have the +-- VarSet of CAFs that CoreToStg attached, and if that is empty there +-- will definitely not be an SRT. +-- +staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep }    | isConRep smrep         = not (isStaticNoCafCon smrep) -  | otherwise              = needsSRT (cit_srt info_tbl) -staticClosureNeedsLink _ = False +  | otherwise              = has_srt -- needsSRT (cit_srt info_tbl) +staticClosureNeedsLink _ _ = False diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index a7af5662e9..c348570a54 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -92,6 +92,7 @@ cgTopRhsCon id con args                               info_tbl                               dontCareCCS                -- Because it's static data                               caffy                      -- Has CAF refs +                             False                      -- no SRT                               payload                  -- BUILD THE OBJECT diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index f128e3ad60..2edd09da12 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -27,7 +27,7 @@ module StgCmmEnv (  	bindArgsToRegs, bindToReg, rebindToReg,  	bindArgToReg, idToReg, -	getArgAmode, getNonVoidArgAmodes,  +        getArgAmode, getNonVoidArgAmodes,  	getCgIdInfo,   	maybeLetNoEscape,       ) where @@ -212,7 +212,6 @@ getNonVoidArgAmodes (arg:args)  	 	   ; amodes <- getNonVoidArgAmodes args  	 	   ; return ( amode : amodes ) } -  ------------------------------------------------------------------------  --	Interface functions for binding and re-binding names  ------------------------------------------------------------------------ diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 9faad02f46..4db1dffdfc 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -77,7 +77,7 @@ cgExpr (StgLetNoEscape _ _ binds expr) =       ; let join_id = mkBlockId (uniqFromSupply us)       ; cgLneBinds join_id binds       ; cgExpr expr  -     ; emit $ mkLabel join_id} +     ; emitLabel join_id}  cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) =    cgCase expr bndr srt alt_type alts @@ -130,7 +130,7 @@ cgLetNoEscapeRhs  cgLetNoEscapeRhs join_id local_cc bndr rhs =    do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs        ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info -     ; emit (outOfLine $ mkLabel bid <*> rhs_body <*> mkBranch join_id) +     ; emitOutOfLine bid $ rhs_body <*> mkBranch join_id       ; return info       } @@ -278,21 +278,69 @@ Hence: two basic plans for  data GcPlan    = GcInAlts 		-- Put a GC check at the start the case alternatives,  	[LocalReg] 	-- which binds these registers -	SRT		-- using this SRT -  | NoGcInAlts		-- The scrutinee is a primitive value, or a call to a +  | NoGcInAlts          -- The scrutinee is a primitive value, or a call to a  			-- primitive op which does no GC.  Absorb the allocation  			-- of the case alternative(s) into the upstream check  ------------------------------------- --- See Note [case on Bool]  cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () + +cgCase (StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts +  | isEnumerationTyCon tycon -- Note [case on bool] +  = do { tag_expr <- do_enum_primop op args + +       -- If the binder is not dead, convert the tag to a constructor +       -- and assign it. +       ; when (not (isDeadBinder bndr)) $ do +            { tmp_reg <- bindArgToReg (NonVoid bndr) +            ; emitAssign (CmmLocal tmp_reg) +                         (tagToClosure tycon tag_expr) } + +       ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts Nothing +                                              (NonVoid bndr) alts +       ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1) +       } +  where +    do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr +    do_enum_primop TagToEnumOp [arg]  -- No code! +      = getArgAmode (NonVoid arg) +    do_enum_primop primop args +      = do tmp <- newTemp bWord +           cgPrimOp [tmp] primop args +           return (CmmReg (CmmLocal tmp)) +  {- -cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] -  | isBoolTy (idType bndr) -  , isDeadBndr bndr -  =  +Note [case on bool] + +This special case handles code like + +  case a <# b of +    True -> +    False -> + +If we let the ordinary case code handle it, we'll get something like + + tmp1 = a < b + tmp2 = Bool_closure_tbl[tmp1] + if (tmp2 & 7 != 0) then ... // normal tagged case + +but this junk won't optimise away.  What we really want is just an +inline comparison: + + if (a < b) then ... + +So we add a special case to generate + + tmp1 = a < b + if (tmp1 == 0) then ... + +and later optimisations will further improve this. + +We should really change all these primops to return Int# instead, that +would make this special case go away.  -} +    -- Note [ticket #3132]: we might be looking at a case of a lifted Id    -- that was cast to an unlifted type.  The Id will always be bottom,    -- but we don't want the code generator to fall over here.  If we @@ -319,7 +367,7 @@ cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts      do { when (not reps_compatible) $             panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"         ; v_info <- getCgIdInfo v -       ; emit (mkAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)) +       ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)         ; _ <- bindArgsToRegs [NonVoid bndr]         ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts }    where @@ -330,8 +378,11 @@ cgCase scrut@(StgApp v []) _ _ (PrimAlt _) _      do { mb_cc <- maybeSaveCostCentre True         ; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut)         ; restoreCurrentCostCentre mb_cc -       ; emit $ mkComment $ mkFastString "should be unreachable code" -       ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)} +       ; emitComment $ mkFastString "should be unreachable code" +       ; l <- newLabelC +       ; emitLabel l +       ; emit (mkBranch l) +       }  {-  case seq# a s of v @@ -349,7 +400,7 @@ cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr srt alt_type alts    = -- handle seq#, same return convention as vanilla 'a'.      cgCase (StgApp a []) bndr srt alt_type alts -cgCase scrut bndr srt alt_type alts  +cgCase scrut bndr _srt alt_type alts    = -- the general case      do { up_hp_usg <- getVirtHp        -- Upstream heap usage         ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts @@ -359,7 +410,7 @@ cgCase scrut bndr srt alt_type alts                        | isSingleton alts = False                        | up_hp_usg > 0    = False                        | otherwise        = True -             gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts +             gc_plan = if gcInAlts then GcInAlts alt_regs else NoGcInAlts         ; mb_cc <- maybeSaveCostCentre simple_scrut         ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) @@ -417,14 +468,14 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"  cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode ()  -- At this point the result of the case are in the binders  cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] -  = maybeAltHeapCheck gc_plan (cgExpr rhs) +  = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs)  cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] -  = maybeAltHeapCheck gc_plan (cgExpr rhs) +  = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs)  	-- Here bndrs are *already* in scope, so don't rebind them  cgAlts gc_plan bndr (PrimAlt _) alts -  = do	{ tagged_cmms <- cgAltRhss gc_plan bndr alts +  = do  { tagged_cmms <- cgAltRhss gc_plan Nothing bndr alts  	; let bndr_reg = CmmLocal (idToReg bndr)  	      (DEFAULT,deflt) = head tagged_cmms @@ -433,20 +484,17 @@ cgAlts gc_plan bndr (PrimAlt _) alts  	      tagged_cmms' = [(lit,code)   			     | (LitAlt lit, code) <- tagged_cmms] -	; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) } +        ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt }  cgAlts gc_plan bndr (AlgAlt tycon) alts -  = do	{ tagged_cmms <- cgAltRhss gc_plan bndr alts -	 +  = do  { retry_lbl <- newLabelC +        ; emitLabel retry_lbl -- Note [alg-alt heap checks] + +        ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan (Just retry_lbl) +                                               bndr alts +  	; let fam_sz   = tyConFamilySize tycon  	      bndr_reg = CmmLocal (idToReg bndr) -	      mb_deflt = case tagged_cmms of -			   ((DEFAULT,rhs) : _) -> Just rhs -			   _other	       -> Nothing -		-- DEFAULT is always first, if present - -	      branches = [ (dataConTagZ con, cmm)  -	   	         | (DataAlt con, cmm) <- tagged_cmms ]                      -- Is the constructor tag in the node reg?          ; if isSmallFamily fam_sz @@ -467,23 +515,68 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts  cgAlts _ _ _ _ = panic "cgAlts"  	-- UbxTupAlt and PolyAlt have only one alternative + +-- Note [alg-alt heap check] +-- +-- In an algebraic case with more than one alternative, we will have +-- code like +-- +-- L0: +--   x = R1 +--   goto L1 +-- L1: +--   if (x & 7 >= 2) then goto L2 else goto L3 +-- L2: +--   Hp = Hp + 16 +--   if (Hp > HpLim) then goto L4 +--   ... +-- L4: +--   call gc() returns to L5 +-- L5: +--   x = R1 +--   goto L1 +  ------------------- -cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] -cgAltRhss gc_plan bndr alts +cgAlgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] +             -> FCode ( Maybe CmmAGraph +                      , [(ConTagZ, CmmAGraph)] ) +cgAlgAltRhss gc_plan retry_lbl bndr alts +  = do { tagged_cmms <- cgAltRhss gc_plan retry_lbl bndr alts + +       ; let { mb_deflt = case tagged_cmms of +                           ((DEFAULT,rhs) : _) -> Just rhs +                           _other              -> Nothing +                            -- DEFAULT is always first, if present + +              ; branches = [ (dataConTagZ con, cmm) +                           | (DataAlt con, cmm) <- tagged_cmms ] +              } + +       ; return (mb_deflt, branches) +       } + + +------------------- +cgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] +          -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan retry_lbl bndr alts    = forkAlts (map cg_alt alts)    where      base_reg = idToReg bndr      cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)      cg_alt (con, bndrs, _uses, rhs)        = getCodeR		  $ -	maybeAltHeapCheck gc_plan $ +        maybeAltHeapCheck gc_plan retry_lbl $  	do { _ <- bindConArgs con base_reg bndrs  	   ; cgExpr rhs  	   ; return con } -maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a -maybeAltHeapCheck NoGcInAlts        code = code -maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code +maybeAltHeapCheck :: GcPlan -> Maybe BlockId -> FCode a -> FCode a +maybeAltHeapCheck NoGcInAlts      mlbl code = code +maybeAltHeapCheck (GcInAlts regs) mlbl code = +  case mlbl of +     Nothing -> altHeapCheck regs code +     Just retry_lbl -> altHeapCheckReturnsTo regs retry_lbl code  -----------------------------------------------------------------------------  -- 	Tail calls @@ -517,8 +610,8 @@ cgIdApp fun_id args  cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ()  cgLneJump blk_id lne_regs args	-- Join point; discard sequel    = do	{ cmm_args <- getNonVoidArgAmodes args -      	; emit (mkMultiAssign lne_regs cmm_args -		<*> mkBranch blk_id) } +        ; emitMultiAssign lne_regs cmm_args +        ; emit (mkBranch blk_id) }  cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ()  cgTailCall fun_id fun_info args = do @@ -529,27 +622,21 @@ cgTailCall fun_id fun_info args = do        	ReturnIt -> emitReturn [fun]	-- ToDo: does ReturnIt guarantee tagged?        	EnterIt -> ASSERT( null args )	-- Discarding arguments -      		do { let fun' = CmmLoad fun (cmmExprType fun) -                   ; [ret,call] <- forkAlts [ -      			getCode $ emitReturn [fun],	-- Is tagged; no need to untag -      			getCode $ do -- emit (mkAssign nodeReg fun) -                         emitCall (NativeNodeCall, NativeReturn) -                                  (entryCode fun') [fun]]  -- Not tagged -      		   ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) } - -      	SlowCall -> do 	    -- A slow function call via the RTS apply routines +                   emitEnter fun + +        SlowCall -> do      -- A slow function call via the RTS apply routines        		{ tickySlowCall lf_info args -                ; emit $ mkComment $ mkFastString "slowCall" +                ; emitComment $ mkFastString "slowCall"        		; slowCall fun args }        	-- A direct function call (possibly with some left-over arguments)        	DirectEntry lbl arity -> do  		{ tickyDirectCall arity args   		; if node_points then -                    do emit $ mkComment $ mkFastString "directEntry" -                       emit (mkAssign nodeReg fun) +                    do emitComment $ mkFastString "directEntry" +                       emitAssign nodeReg fun                         directCall lbl arity args -		  else do emit $ mkComment $ mkFastString "directEntry else" +                  else do emitComment $ mkFastString "directEntry else"                            directCall lbl arity args }  	JumpToIt {} -> panic "cgTailCall"	-- ??? @@ -561,33 +648,67 @@ cgTailCall fun_id fun_info args = do      node_points = nodeMustPointToIt lf_info -{- Note [case on Bool] -   ~~~~~~~~~~~~~~~~~~~ -A case on a Boolean value does two things: -  1. It looks up the Boolean in a closure table and assigns the -     result to the binder. -  2. It branches to the True or False case through analysis -     of the closure assigned to the binder. -But the indirection through the closure table is unnecessary -if the assignment to the binder will be dead code (use isDeadBndr). +emitEnter :: CmmExpr -> FCode () +emitEnter fun = do +  { adjustHpBackwards +  ; sequel <- getSequel +  ; updfr_off <- getUpdFrameOff +  ; case sequel of +      -- For a return, we have the option of generating a tag-test or +      -- not.  If the value is tagged, we can return directly, which +      -- is quicker than entering the value.  This is a code +      -- size/speed trade-off: when optimising for speed rather than +      -- size we could generate the tag test. +      -- +      -- Right now, we do what the old codegen did, and omit the tag +      -- test, just generating an enter. +      Return _ -> do +        { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg +        ; emit $ mkForeignJump NativeNodeCall entry +                    [cmmUntag fun] updfr_off +        } + +      -- The result will be scrutinised in the sequel.  This is where +      -- we generate a tag-test to avoid entering the closure if +      -- possible. +      -- +      -- The generated code will be something like this: +      -- +      --    R1 = fun  -- copyout +      --    if (fun & 7 != 0) goto Lcall else goto Lret +      --  Lcall: +      --    call [fun] returns to Lret +      --  Lret: +      --    fun' = R1  -- copyin +      --    ... +      -- +      -- Note in particular that the label Lret is used as a +      -- destination by both the tag-test and the call.  This is +      -- becase Lret will necessarily be a proc-point, and we want to +      -- ensure that we generate only one proc-point for this +      -- sequence. +      -- +      AssignTo res_regs _ -> do +       { lret <- newLabelC +       ; lcall <- newLabelC +       ; let area = Young lret +       ; let (off, copyin) = copyInOflow NativeReturn area res_regs +             (outArgs, copyout) = copyOutOflow NativeNodeCall Call area +                                          [fun] updfr_off (0,[]) +         -- refer to fun via nodeReg after the copyout, to avoid having +         -- both live simultaneously; this sometimes enables fun to be +         -- inlined in the RHS of the R1 assignment. +       ; let entry = entryCode (closureInfoPtr (CmmReg nodeReg)) +             the_call = toCall entry (Just lret) updfr_off off outArgs +       ; emit $ +           copyout <*> +           mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*> +           outOfLine lcall the_call <*> +           mkLabel lret <*> +           copyin +       } +  } -The following example illustrates how badly the code turns out: -  STG: -    case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 { -      GHC.Types.False -> <true  code> // sbH8 dead -      GHC.Types.True  -> <false code> // sbH8 dead -    }; -  Cmm: -    _s7HD::F64 = F64[_sbH7::I64 + 7];  // MidAssign -    _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64);  // MidAssign -    // emitReturn  // MidComment -    _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)];  // MidAssign -    _ccsX::I64 = _sbH8::I64 & 7;  // MidAssign -    if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI;  // LastCondBranch - -The assignments to _sbH8 and _ccsX are completely unnecessary. -Instead, we should branch based on the value of _ccsW. --}  {- Note [Better Alt Heap Checks]  If two function calls can share a return point, then they will also diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 5bc0f7af4e..c67e0e0c95 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -22,6 +22,7 @@ import StgCmmEnv  import StgCmmMonad  import StgCmmUtils  import StgCmmClosure +import StgCmmLayout  import BlockId  import Cmm @@ -45,15 +46,16 @@ import Control.Monad  -- Code generation for Foreign Calls  ----------------------------------------------------------------------------- -cgForeignCall :: [LocalReg]             -- r1,r2  where to put the results -              -> [ForeignHint] -              -> ForeignCall            -- the op +-- | emit code for a foreign call, and return the results to the sequel. +-- +cgForeignCall :: ForeignCall            -- the op                -> [StgArg]               -- x,y    arguments +              -> Type                   -- result type                -> FCode () --- Emits code for an unsafe foreign call:      r1, r2 = foo( x, y, z ) -cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args +cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty    = do  { cmm_args <- getFCallArgs stg_args +        ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty          ; let ((call_args, arg_hints), cmm_target)                  = case target of                     StaticTarget _   _      False -> @@ -63,7 +65,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a                                  = case mPkgId of                                          Nothing         -> ForeignLabelInThisPackage                                          Just pkgId      -> ForeignLabelInPackage pkgId -                            size        = call_size cmm_args +                            size = call_size cmm_args                          in  ( unzip cmm_args                              , CmmLit (CmmLabel                                          (mkForeignLabel lbl size labelSource IsFunction))) @@ -71,13 +73,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a                     DynamicTarget    ->  case cmm_args of                                             (fn,_):rest -> (unzip rest, fn)                                             [] -> panic "cgForeignCall []" -              fc = ForeignConvention cconv arg_hints result_hints +              fc = ForeignConvention cconv arg_hints res_hints                call_target = ForeignTarget cmm_target fc -        ; srt <- getSRTInfo NoSRT        -- SLPJ: Not sure what SRT -                                        -- is right here -                                        -- JD: Does it matter in the new codegen? -        ; emitForeignCall safety results call_target call_args srt CmmMayReturn } +        -- we want to emit code for the call, and then emitReturn. +        -- However, if the sequel is AssignTo, we shortcut a little +        -- and generate a foreign call that assigns the results +        -- directly.  Otherwise we end up generating a bunch of +        -- useless "r = r" assignments, which are not merely annoying: +        -- they prevent the common block elimination from working correctly +        -- in the case of a safe foreign call. +        -- See Note [safe foreign call convention] +        -- +        ; sequel <- getSequel +        ; case sequel of +            AssignTo assign_to_these _ -> +                do { emitForeignCall safety assign_to_these call_target +                                     call_args CmmMayReturn +                   } + +            _something_else -> +                do { emitForeignCall safety res_regs call_target +                                     call_args CmmMayReturn +                   ; emitReturn (map (CmmReg . CmmLocal) res_regs) +                   } +         }    where          -- in the stdcall calling convention, the symbol needs @size appended          -- to it, where size is the total number of bytes of arguments.  We @@ -88,16 +108,83 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a          | otherwise            = Nothing          -- ToDo: this might not be correct for 64-bit API -      arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE +      arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) +                               wORD_SIZE + +{- Note [safe foreign call convention] + +The simple thing to do for a safe foreign call would be the same as an +unsafe one: just + +    emitForeignCall ... +    emitReturn ... + +but consider what happens in this case + +   case foo x y z of +     (# s, r #) -> ... + +The sequel is AssignTo [r].  The call to newUnboxedTupleRegs picks [r] +as the result reg, and we generate + +  r = foo(x,y,z) returns to L1  -- emitForeignCall + L1: +  r = r  -- emitReturn +  goto L2 +L2: +  ... + +Now L1 is a proc point (by definition, it is the continuation of the +safe foreign call).  If L2 does a heap check, then L2 will also be a +proc point. + +Furthermore, the stack layout algorithm has to arrange to save r +somewhere between the call and the jump to L1, which is annoying: we +would have to treat r differently from the other live variables, which +have to be saved *before* the call. + +So we adopt a special convention for safe foreign calls: the results +are copied out according to the NativeReturn convention by the call, +and the continuation of the call should copyIn the results.  (The +copyOut code is actually inserted when the safe foreign call is +lowered later).  The result regs attached to the safe foreign call are +only used temporarily to hold the results before they are copied out. + +We will now generate this: + +  r = foo(x,y,z) returns to L1 + L1: +  r = R1  -- copyIn, inserted by mkSafeCall +  goto L2 + L2: +  ... r ... + +And when the safe foreign call is lowered later (see Note [lower safe +foreign calls]) we get this: + +  suspendThread() +  r = foo(x,y,z) +  resumeThread() +  R1 = r  -- copyOut, inserted by lowerSafeForeignCall +  jump L1 + L1: +  r = R1  -- copyIn, inserted by mkSafeCall +  goto L2 + L2: +  ... r ... + +Now consider what happens if L2 does a heap check: the Adams +optimisation kicks in and commons up L1 with the heap-check +continuation, resulting in just one proc point instead of two. Yay! +-} +  emitCCall :: [(CmmFormal,ForeignHint)]            -> CmmExpr            -> [(CmmActual,ForeignHint)]            -> FCode ()  emitCCall hinted_results fn hinted_args -  = emitForeignCall PlayRisky results target args -                    NoC_SRT -- No SRT b/c we PlayRisky -                    CmmMayReturn +  = emitForeignCall PlayRisky results target args CmmMayReturn    where      (args, arg_hints) = unzip hinted_args      (results, result_hints) = unzip hinted_results @@ -107,7 +194,7 @@ emitCCall hinted_results fn hinted_args  emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()  emitPrimCall res op args -  = emitForeignCall PlayRisky res (PrimTarget op) args NoC_SRT CmmMayReturn +  = emitForeignCall PlayRisky res (PrimTarget op) args CmmMayReturn  -- alternative entry point, used by CmmParse  emitForeignCall @@ -115,11 +202,10 @@ emitForeignCall          -> [CmmFormal]          -- where to put the results          -> ForeignTarget        -- the op          -> [CmmActual]          -- arguments -        -> C_SRT                -- the SRT of the calls continuation          -> CmmReturnInfo        -- This can say "never returns"                                  --   only RTS procedures do this          -> FCode () -emitForeignCall safety results target args _srt _ret +emitForeignCall safety results target args _ret    | not (playSafe safety) = do      let (caller_save, caller_load) = callerSaveVolatileRegs      emit caller_save @@ -129,7 +215,9 @@ emitForeignCall safety results target args _srt _ret    | otherwise = do      updfr_off <- getUpdFrameOff      temp_target <- load_target_into_temp target -    emit $ mkSafeCall temp_target results args updfr_off (playInterruptible safety) +    emit =<< mkSafeCall temp_target results args updfr_off +                (playInterruptible safety) +  {- @@ -162,7 +250,7 @@ maybe_assign_temp e          -- expressions, which are wrong here.          -- this is a NonPtr because it only duplicates an existing          reg <- newTemp (cmmExprType e) --TODO FIXME NOW -        emit (mkAssign (CmmLocal reg) e) +        emitAssign (CmmLocal reg) e          return (CmmReg (CmmLocal reg))  -- ----------------------------------------------------------------------------- @@ -184,12 +272,12 @@ saveThreadState =  emitSaveThreadState :: BlockId -> FCode ()  emitSaveThreadState bid = do    -- CurrentTSO->stackobj->sp = Sp; -  emit $ mkStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP) -                 (CmmStackSlot (CallArea (Young bid)) (widthInBytes (typeWidth gcWord))) +  emitStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP) +                 (CmmStackSlot (Young bid) (widthInBytes (typeWidth gcWord)))    emit closeNursery    -- and save the current cost centre stack in the TSO when profiling:    when opt_SccProfilingOn $ -        emit (mkStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) +        emitStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS     -- CurrentNursery->free = Hp+1;  closeNursery :: CmmAGraph diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 25161722f7..856b04367d 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -10,7 +10,7 @@ module StgCmmHeap (          getVirtHp, setVirtHp, setRealHp,          getHpRelOffset, hpRel, -        entryHeapCheck, altHeapCheck, +        entryHeapCheck, altHeapCheck, altHeapCheckReturnsTo,          mkVirtHeapOffsets, mkVirtConstrOffsets,          mkStaticClosureFields, mkStaticClosure, @@ -20,7 +20,6 @@ module StgCmmHeap (  #include "HsVersions.h" -import CmmType  import StgSyn  import CLabel  import StgCmmLayout @@ -34,6 +33,7 @@ import StgCmmEnv  import MkGraph +import Hoopl hiding ((<*>), mkBranch)  import SMRep  import Cmm  import CmmUtils @@ -109,7 +109,7 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets          -- ALLOCATE THE OBJECT          ; base <- getHpRelOffset info_offset -        ; emit (mkComment $ mkFastString "allocDynClosure") +        ; emitComment $ mkFastString "allocDynClosure"          ; emitSetDynHdr base info_ptr  use_cc          ; let (cmm_args, offsets) = unzip amodes_w_offsets          ; hpStore base cmm_args offsets @@ -151,9 +151,10 @@ mkStaticClosureFields          :: CmmInfoTable          -> CostCentreStack          -> CafInfo +        -> Bool                 -- SRT is non-empty?          -> [CmmLit]             -- Payload          -> [CmmLit]             -- The full closure -mkStaticClosureFields info_tbl ccs caf_refs payload +mkStaticClosureFields info_tbl ccs caf_refs has_srt payload    = mkStaticClosure info_lbl ccs payload padding          static_link_field saved_info_field    where @@ -178,8 +179,10 @@ mkStaticClosureFields info_tbl ccs caf_refs payload          | otherwise  = ASSERT(null payload) [mkIntCLit 0]      static_link_field -        | is_caf || staticClosureNeedsLink info_tbl = [static_link_value] -        | otherwise                                 = [] +        | is_caf || staticClosureNeedsLink has_srt info_tbl +        = [static_link_value] +        | otherwise +        = []      saved_info_field          | is_caf     = [mkIntCLit 0] @@ -335,11 +338,12 @@ entryHeapCheck cl_info offset nodeSet arity args code             args' = map (CmmReg . CmmLocal) args             setN = case nodeSet of -                          Just n  -> mkAssign nodeReg (CmmReg $ CmmLocal n) +                          Just n  -> mkNop -- No need to assign R1, it already +                                           -- points to the closure                            Nothing -> mkAssign nodeReg $                                CmmLit (CmmLabel $ staticClosureLabel cl_info) -           {- Thunks:          Set R1 = node, jump GCEnter1 +           {- Thunks:          jump GCEnter1                Function (fast): Set R1 = node, jump GCFun                Function (slow): Set R1 = node, call generic_gc -}             gc_call upd = setN <*> gc_lbl upd @@ -354,7 +358,10 @@ entryHeapCheck cl_info offset nodeSet arity args code              - GC calls, but until then this fishy code works -}         updfr_sz <- getUpdFrameOff -       heapCheck True (gc_call updfr_sz) code + +       loop_id <- newLabelC +       emitLabel loop_id +       heapCheck True (gc_call updfr_sz <*> mkBranch loop_id) code  {-      -- This code is slightly outdated now and we could easily keep the above @@ -400,21 +407,29 @@ entryHeapCheck cl_info offset nodeSet arity args code  -} --------------------------------------------------------------- --- A heap/stack check at in a case alternative +-- ------------------------------------------------------------ +-- A heap/stack check in a case alternative  altHeapCheck :: [LocalReg] -> FCode a -> FCode a  altHeapCheck regs code +  = do loop_id <- newLabelC +       emitLabel loop_id +       altHeapCheckReturnsTo regs loop_id code + +altHeapCheckReturnsTo :: [LocalReg] -> Label -> FCode a -> FCode a +altHeapCheckReturnsTo regs retry_lbl code    = do updfr_sz <- getUpdFrameOff -       heapCheck False (gc_call updfr_sz) code +       gc_call_code <- gc_call updfr_sz +       heapCheck False (gc_call_code <*> mkBranch retry_lbl) code    where      reg_exprs = map (CmmReg . CmmLocal) regs +      -- Note [stg_gc arguments]      gc_call sp =          case rts_label regs of -             Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp -             Nothing -> mkCall generic_gc (GC, GC) [] [] sp +             Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[]) +             Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[])      rts_label [reg]          | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1") @@ -432,6 +447,23 @@ altHeapCheck regs code      rts_label _ = Nothing +-- Note [stg_gc arguments] +-- It might seem that we could avoid passing the arguments to the +-- stg_gc function, because they are already in the right registers. +-- While this is usually the case, it isn't always.  Sometimes the +-- code generator has cleverly avoided the eval in a case, e.g. in +-- ffi/should_run/4221.hs we found +-- +--   case a_r1mb of z +--     FunPtr x y -> ... +-- +-- where a_r1mb is bound a top-level constructor, and is known to be +-- evaluated.  The codegen just assigns x, y and z, and continues; +-- R1 is never assigned. +-- +-- So we'll have to rely on optimisations to eliminatethese +-- assignments where possible. +  -- | The generic GC procedure; no params, no results  generic_gc :: CmmExpr @@ -447,7 +479,7 @@ heapCheck checkStack do_gc code    = getHeapUsage $ \ hpHw ->      -- Emit heap checks, but be sure to do it lazily so      -- that the conditionals on hpHw don't cause a black hole -    do  { emit $ do_checks checkStack hpHw do_gc +    do  { codeOnly $ do_checks checkStack hpHw do_gc          ; tickyAllocHeap hpHw          ; doGranAllocate hpHw          ; setRealHp hpHw @@ -456,22 +488,25 @@ heapCheck checkStack do_gc code  do_checks :: Bool       -- Should we check the stack?            -> WordOff    -- Heap headroom            -> CmmAGraph  -- What to do on failure -          -> CmmAGraph -do_checks checkStack alloc do_gc -  = withFreshLabel "gc" $ \ loop_id -> -    withFreshLabel "gc" $ \ gc_id   -> -      mkLabel loop_id -      <*> (let hpCheck = if alloc == 0 then mkNop -                         else mkAssign hpReg bump_hp <*> -                              mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) -           in if checkStack -                 then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck -                 else hpCheck) -      <*> mkComment (mkFastString "outOfLine should follow:") -      <*> outOfLine (mkLabel gc_id -                     <*> mkComment (mkFastString "outOfLine here") -                     <*> do_gc -                     <*> mkBranch loop_id) +          -> FCode () +do_checks checkStack alloc do_gc = do +  gc_id <- newLabelC +  hp_check <- if alloc == 0 +                 then return mkNop +                 else do +                   ifthen <- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) +                   return (mkAssign hpReg bump_hp <*> ifthen) + +  if checkStack +     then emit =<< mkCmmIfThenElse sp_oflo (mkBranch gc_id) hp_check +     else emit hp_check + +  emit $ mkComment (mkFastString "outOfLine should follow:") + +  emitOutOfLine gc_id $ +     mkComment (mkFastString "outOfLine here") <*> +     do_gc -- this is expected to jump back somewhere +                  -- Test for stack pointer exhaustion, then                  -- bump heap pointer, and test for heap exhaustion                  -- Note that we don't move the heap pointer unless the diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 86986efdfa..9593af1f50 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -15,7 +15,7 @@  module StgCmmLayout (  	mkArgDescr,  -	emitCall, emitReturn, +        emitCall, emitReturn, adjustHpBackwards,  	emitClosureProcAndInfoTable,  	emitClosureAndInfoTable, @@ -41,10 +41,12 @@ import StgCmmEnv  import StgCmmTicky  import StgCmmMonad  import StgCmmUtils +import StgCmmProf  import MkGraph  import SMRep  import Cmm +import CmmUtils  import CLabel  import StgSyn  import Id @@ -52,6 +54,7 @@ import Name  import TyCon		( PrimRep(..) )  import BasicTypes	( RepArity )  import StaticFlags +import Module  import Constants  import Util @@ -63,38 +66,60 @@ import FastString  --		Call and return sequences  ------------------------------------------------------------------------ -emitReturn :: [CmmExpr] -> FCode () --- Return multiple values to the sequel +-- | Return multiple values to the sequel +-- +-- If the sequel is @Return@ +-- +-- >     return (x,y) +-- +-- If the sequel is @AssignTo [p,q]@ +-- +-- >    p=x; q=y;  -- --- If the sequel is Return ---	return (x,y) --- If the sequel is AssignTo [p,q] ---	p=x; q=y;  +emitReturn :: [CmmExpr] -> FCode ()  emitReturn results    = do { sequel    <- getSequel;         ; updfr_off <- getUpdFrameOff -       ; emit $ mkComment $ mkFastString ("emitReturn: " ++ show sequel) +       ; emitComment $ mkFastString ("emitReturn: " ++ show sequel)         ; case sequel of             Return _ ->               do { adjustHpBackwards                  ; emit (mkReturnSimple results updfr_off) }             AssignTo regs adjust ->               do { if adjust then adjustHpBackwards else return () -                ; emit (mkMultiAssign  regs results) } +                ; emitMultiAssign  regs results }         } + +-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@, +-- using the call/return convention @conv@, passing @args@, and +-- returning the results to the current sequel. +--  emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode () --- (cgCall fun args) makes a call to the entry-code of 'fun',  --- passing 'args', and returning the results to the current sequel -emitCall convs@(callConv, _) fun args +emitCall convs fun args +  = emitCallWithExtraStack convs fun args noExtraStack + + +-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the +-- entry-code of @fun@, using the call/return convention @conv@, +-- passing @args@, pushing some extra stack frames described by +-- @stack@, and returning the results to the current sequel. +-- +emitCallWithExtraStack +   :: (Convention, Convention) -> CmmExpr -> [CmmExpr] +   -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode () +emitCallWithExtraStack convs@(callConv, _) fun args extra_stack    = do	{ adjustHpBackwards  	; sequel <- getSequel  	; updfr_off <- getUpdFrameOff -        ; emit $ mkComment $ mkFastString ("emitCall: " ++ show sequel) -	; case sequel of -	    Return _            -> emit (mkForeignJump callConv fun args updfr_off) -	    AssignTo res_regs _ -> emit (mkCall fun convs res_regs args updfr_off) -    } +        ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel) +        ; case sequel of +            Return _ -> +              emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack +            AssignTo res_regs _ -> do +              emit =<< mkCall fun convs res_regs args updfr_off extra_stack +      } +  adjustHpBackwards :: FCode ()  -- This function adjusts and heap pointers just before a tail call or @@ -127,59 +152,137 @@ adjustHpBackwards  --	Making calls: directCall and slowCall  ------------------------------------------------------------------------- +-- General plan is: +--   - we'll make *one* fast call, either to the function itself +--     (directCall) or to stg_ap_<pat>_fast (slowCall) +--     Any left-over arguments will be pushed on the stack, +-- +--     e.g. Sp[old+8]  = arg1 +--          Sp[old+16] = arg2 +--          Sp[old+32] = stg_ap_pp_info +--          R2 = arg3 +--          R3 = arg4 +--          call f() return to Nothing updfr_off: 32 + +  directCall :: CLabel -> RepArity -> [StgArg] -> FCode ()  -- (directCall f n args)  -- calls f(arg1, ..., argn), and applies the result to the remaining args  -- The function f has arity n, and there are guaranteed at least n args  -- Both arity and args include void args  directCall lbl arity stg_args  -  = do	{ cmm_args <- getNonVoidArgAmodes stg_args -	; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) } +  = do  { argreps <- getArgRepsAmodes stg_args +        ; direct_call "directCall" lbl arity argreps } +  slowCall :: CmmExpr -> [StgArg] -> FCode ()  -- (slowCall fun args) applies fun to args, returning the results to Sequel  slowCall fun stg_args  -  = do	{ cmm_args <- getNonVoidArgAmodes stg_args -	; slow_call fun cmm_args (argsReps stg_args) } +  = do  { dflags <- getDynFlags +        ; argsreps <- getArgRepsAmodes stg_args +        ; let (rts_fun, arity) = slowCallPattern (map fst argsreps) +        ; call <- getCode $ direct_call "slow_call" +                       (mkRtsApFastLabel rts_fun) arity argsreps +        ; emitComment $ mkFastString ("slow_call for " ++ +                                      showSDoc dflags (ppr fun) ++ +                                      " with pat " ++ unpackFS rts_fun) +        ; emit (mkAssign nodeReg fun <*> call) +        } +  -------------- -direct_call :: String -> CLabel -> RepArity -> [CmmExpr] -> [ArgRep] -> FCode () --- NB1: (length args) may be less than (length reps), because ---     the args exclude the void ones --- NB2: 'arity' refers to the *reps*  -direct_call caller lbl arity args reps -  | debugIsOn && arity > length reps	-- Too few args +direct_call :: String -> CLabel -> RepArity -> [(ArgRep,Maybe CmmExpr)] -> FCode () +direct_call caller lbl arity args +  | debugIsOn && arity > length args  -- Too few args    = do -- Caller should ensure that there enough args! -       pprPanic "direct_call" (text caller <+> ppr arity -                           <+> ppr lbl <+> ppr (length reps) -                           <+> ppr args <+> ppr reps ) - -  | null rest_reps     -- Precisely the right number of arguments -  = emitCall (NativeDirectCall, NativeReturn) target args - -  | otherwise		-- Over-saturated call -  = ASSERT( arity == length initial_reps ) -    do	{ pap_id <- newTemp gcWord -	; withSequel (AssignTo [pap_id] True) -		     (emitCall (NativeDirectCall, NativeReturn) target fast_args) -	; slow_call (CmmReg (CmmLocal pap_id))  -		    rest_args rest_reps } +       pprPanic "direct_call" $ +            text caller <+> ppr arity <+> +            ppr lbl <+> ppr (length args) <+> +            ppr (map snd args) <+> ppr (map fst args) + +  | null rest_args  -- Precisely the right number of arguments +  = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args) + +  | otherwise       -- Note [over-saturated calls] +  = emitCallWithExtraStack (NativeDirectCall, NativeReturn) +                           target (nonVArgs fast_args) (mkStkOffsets stack_args)    where      target = CmmLit (CmmLabel lbl) -    (initial_reps, rest_reps) = splitAt arity reps -    arg_arity = count isNonV initial_reps -    (fast_args, rest_args) = splitAt arg_arity args +    (fast_args, rest_args) = splitAt arity args +    stack_args = slowArgs rest_args --------------- -slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode () -slow_call fun args reps -  = do dflags <- getDynFlags -       call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps -       emit $ mkComment $ mkFastString ("slow_call for " ++ showSDoc dflags (ppr fun) ++ -                                        " with pat " ++ unpackFS rts_fun) -       emit (mkAssign nodeReg fun <*> call) + +-- When constructing calls, it is easier to keep the ArgReps and the +-- CmmExprs zipped together.  However, a void argument has no +-- representation, so we need to use Maybe CmmExpr (the alternative of +-- using zeroCLit or even undefined would work, but would be ugly). +-- +getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)] +getArgRepsAmodes = mapM getArgRepAmode +  where getArgRepAmode arg +           | V <- rep  = return (V, Nothing) +           | otherwise = do expr <- getArgAmode (NonVoid arg) +                            return (rep, Just expr) +           where rep = toArgRep (argPrimRep arg) + +nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr] +nonVArgs [] = [] +nonVArgs ((_,Nothing)  : args) = nonVArgs args +nonVArgs ((_,Just arg) : args) = arg : nonVArgs args + +{- +Note [over-saturated calls] + +The natural thing to do for an over-saturated call would be to call +the function with the correct number of arguments, and then apply the +remaining arguments to the value returned, e.g. + +  f a b c d   (where f has arity 2) +  --> +  r = call f(a,b) +  call r(c,d) + +but this entails +  - saving c and d on the stack +  - making a continuation info table +  - at the continuation, loading c and d off the stack into regs +  - finally, call r + +Note that since there are a fixed number of different r's +(e.g.  stg_ap_pp_fast), we can also pre-compile continuations +that correspond to each of them, rather than generating a fresh +one for each over-saturated call. + +Not only does this generate much less code, it is faster too.  We will +generate something like: + +Sp[old+16] = c +Sp[old+24] = d +Sp[old+32] = stg_ap_pp_info +call f(a,b) -- usual calling convention + +For the purposes of the CmmCall node, we count this extra stack as +just more arguments that we are passing on the stack (cml_args). +-} + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] +slowArgs [] = [] +slowArgs args -- careful: reps contains voids (V), but args does not +  | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args +  | otherwise          =              this_pat ++ slowArgs rest_args    where -    (rts_fun, arity) = slowCallPattern reps +    (arg_pat, n)            = slowCallPattern (map fst args) +    (call_args, rest_args)  = splitAt n args + +    stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat +    this_pat   = (N, Just (mkLblExpr stg_ap_pat)) : call_args +    save_cccs  = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)] +    save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") + +  -- These cases were found to cover about 99% of all slow calls:  slowCallPattern :: [ArgRep] -> (FastString, RepArity) @@ -202,6 +305,30 @@ slowCallPattern []		      = (fsLit "stg_ap_0", 0)  ------------------------------------------------------------------------- +-- Fix the byte-offsets of a bunch of things to push on the stack + +-- This is used for pushing slow-call continuations. +-- See Note [over-saturated calls]. + +mkStkOffsets +  :: [(ArgRep, Maybe CmmExpr)]    -- things to make offsets for +  -> ( ByteOff                    -- OUTPUTS: Topmost allocated word +     , [(CmmExpr, ByteOff)] )     -- things with offsets (voids filtered out) +mkStkOffsets things +    = loop 0 [] (reverse things) +  where +    loop offset offs [] = (offset,offs) +    loop offset offs ((_,Nothing):things) = loop offset offs things +	-- ignore Void arguments +    loop offset offs ((rep,Just thing):things) +        = loop thing_off ((thing, thing_off):offs) things +	where +          thing_off = offset + argRepSizeW rep * wORD_SIZE +	    -- offset of thing is offset+size, because we're  +	    -- growing the stack *downwards* as the offsets increase. + + +-------------------------------------------------------------------------  --	Classifying arguments: ArgRep  ------------------------------------------------------------------------- @@ -237,10 +364,7 @@ isNonV :: ArgRep -> Bool  isNonV V = False  isNonV _ = True -argsReps :: [StgArg] -> [ArgRep] -argsReps = map (toArgRep . argPrimRep) - -argRepSizeW :: ArgRep -> WordOff		-- Size in words +argRepSizeW :: ArgRep -> WordOff                -- Size in words  argRepSizeW N = 1  argRepSizeW P = 1  argRepSizeW F = 1 diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 4eea38e22c..cc9919a4a0 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE GADTs #-}  -----------------------------------------------------------------------------  --  -- Monad for Stg to C-- code generation @@ -16,16 +17,21 @@  module StgCmmMonad (  	FCode,	-- type -	initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, +        initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,  	returnFC, fixC, fixC_, nopC, whenC,   	newUnique, newUniqSupply,  +        newLabelC, emitLabel, +  	emit, emitDecl, emitProc, emitProcWithConvention, emitSimpleProc, +        emitOutOfLine, emitAssign, emitStore, emitComment,  	getCmm, cgStmtsToBlocks,  	getCodeR, getCode, getHeapUsage, -	forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly, +        mkCmmIfThenElse, mkCmmIfThen, mkCall, mkCmmCall, mkSafeCall, + +        forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly,  	ConTagZ, @@ -69,12 +75,12 @@ import VarEnv  import OrdList  import Unique  import UniqSupply -import FastString(sLit) +import FastString  import Outputable  import Control.Monad  import Data.List -import Prelude hiding( sequence ) +import Prelude hiding( sequence, succ )  import qualified Prelude( sequence )  infixr 9 `thenC`	-- Right-associative! @@ -95,12 +101,12 @@ instance Monad FCode where  {-# INLINE thenFC #-}  {-# INLINE returnFC #-} -initC :: DynFlags -> Module -> FCode a -> IO a -initC dflags mod (FCode code) -  = do	{ uniqs <- mkSplitUniqSupply 'c' -	; case code (initCgInfoDown dflags mod) (initCgState uniqs) of -	      (res, _) -> return res -	} +initC :: IO CgState +initC  = do { uniqs <- mkSplitUniqSupply 'c' +            ; return (initCgState uniqs) } + +runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) +runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st  returnFC :: a -> FCode a  returnFC val = FCode (\_info_down state -> (val, state)) @@ -270,6 +276,8 @@ data HeapUsage =  type VirtualHpOffset = WordOff + +  initCgState :: UniqSupply -> CgState  initCgState uniqs    = MkCgState { cgs_stmts      = mkNop, cgs_tops = nilOL, @@ -308,7 +316,6 @@ initHpUsage = HeapUsage { virtHp = 0, realHp = 0 }  maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage  hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } -  --------------------------------------------------------  -- Operators for getting and setting the state and "info_down".  -------------------------------------------------------- @@ -591,6 +598,33 @@ getHeapUsage fcode  -- ----------------------------------------------------------------------------  -- Combinators for emitting code +emitCgStmt :: CgStmt -> FCode () +emitCgStmt stmt +  = do  { state <- getState +        ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt } +        } + +emitLabel :: BlockId -> FCode () +emitLabel id = emitCgStmt (CgLabel id) + +emitComment :: FastString -> FCode () +#if 0 /* def DEBUG */ +emitComment s = emitCgStmt (CgStmt (CmmComment s)) +#else +emitComment _ = return () +#endif + +emitAssign :: CmmReg  -> CmmExpr -> FCode () +emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r)) + +emitStore :: CmmExpr  -> CmmExpr -> FCode () +emitStore l r = emitCgStmt (CgStmt (CmmStore l r)) + + +newLabelC :: FCode BlockId +newLabelC = do { u <- newUnique +               ; return $ mkBlockId u } +  emit :: CmmAGraph -> FCode ()  emit ag    = do	{ state <- getState @@ -601,6 +635,9 @@ emitDecl decl    = do 	{ state <- getState  	; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } } +emitOutOfLine :: BlockId -> CmmAGraph -> FCode () +emitOutOfLine l stmts = emitCgStmt (CgFork l stmts) +  emitProcWithConvention :: Convention -> CmmInfoTable -> CLabel -> [CmmFormal] ->                            CmmAGraph -> FCode ()  emitProcWithConvention conv info lbl args blocks @@ -629,6 +666,55 @@ getCmm code  	; setState $ state2 { cgs_tops = cgs_tops state1 }           ; return (fromOL (cgs_tops state2)) } + +mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph +mkCmmIfThenElse e tbranch fbranch = do +  endif <- newLabelC +  tid   <- newLabelC +  fid   <- newLabelC +  return $ mkCbranch e tid fid <*> +            mkLabel tid <*> tbranch <*> mkBranch endif <*> +            mkLabel fid <*> fbranch <*> mkLabel endif + +mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph +mkCmmIfThen e tbranch = do +  endif <- newLabelC +  tid <- newLabelC +  return $ mkCbranch e tid endif <*> +         mkLabel tid <*> tbranch <*> mkLabel endif + + +mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] +       -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph +mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do +  k <- newLabelC +  let area = Young k +      (off, copyin) = copyInOflow retConv area results +      copyout = mkCallReturnsTo f callConv actuals k off updfr_off extra_stack +  return (copyout <*> mkLabel k <*> copyin) + +mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset +          -> FCode CmmAGraph +mkCmmCall f results actuals updfr_off +   = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[]) + + +mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] +           -> UpdFrameOffset -> Bool +           -> FCode CmmAGraph +mkSafeCall   t fs as upd i = do +  k <- newLabelC +  let (_off, copyout) = copyInOflow NativeReturn (Young k) fs +    -- see Note [safe foreign call convention] +  return +     (    mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth)) +                  (CmmLit (CmmBlock k)) +      <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k +                                 , updfr=upd, intrbl=i }) +      <*> mkLabel k +      <*> copyout +     ) +  -- ----------------------------------------------------------------------------  -- CgStmts @@ -640,4 +726,3 @@ cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph  cgStmtsToBlocks stmts    = do  { us <- newUniqSupply  	; return (initUs_ us (lgraphOfAGraph stmts)) }	 - diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index efa234b5a6..bd783a3b30 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -14,7 +14,9 @@  -- for details  module StgCmmPrim ( -   cgOpApp +   cgOpApp, +   cgPrimOp -- internal(ish), used by cgCase to get code for a +            -- comparison without also turning it into a Bool.   ) where  #include "HsVersions.h" @@ -67,14 +69,9 @@ cgOpApp :: StgOp	-- The op  -- Foreign calls   cgOpApp (StgFCallOp fcall _) stg_args res_ty  -  = do	{ (res_regs, res_hints) <- newUnboxedTupleRegs res_ty -	 	-- Choose result regs r1, r2 -		-- Note [Foreign call results] -	; cgForeignCall res_regs res_hints fcall stg_args -		-- r1, r2 = foo( x, y ) -	; emitReturn (map (CmmReg . CmmLocal) res_regs) } -		-- return (r1, r2)  -       +  = cgForeignCall fcall stg_args res_ty +      -- Note [Foreign call results] +  -- tagToEnum# is special: we need to pull the constructor   -- out of the table, and perform an appropriate return. @@ -229,23 +226,23 @@ emitPrimOp [res] SparkOp [arg]              [(tmp2,NoHint)]              (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))              [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] -        emit (mkAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) +        emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))  emitPrimOp [res] GetCCSOfOp [arg] -  = emit (mkAssign (CmmLocal res) val) +  = emitAssign (CmmLocal res) val    where      val | opt_SccProfilingOn = costCentreFrom (cmmUntag arg)          | otherwise          = CmmLit zeroCLit  emitPrimOp [res] GetCurrentCCSOp [_dummy_arg] -   = emit (mkAssign (CmmLocal res) curCCS) +   = emitAssign (CmmLocal res) curCCS  emitPrimOp [res] ReadMutVarOp [mutv] -   = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) +   = emitAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)  emitPrimOp [] WriteMutVarOp [mutv,var]     = do -	emit (mkStore (cmmOffsetW mutv fixedHdrSize) var) +        emitStore (cmmOffsetW mutv fixedHdrSize) var  	emitCCall  		[{-no results-}]  		(CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) @@ -269,32 +266,32 @@ emitPrimOp res@[] TouchOp args@[_arg]  --  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)  emitPrimOp [res] ByteArrayContents_Char [arg] -   = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)) +   = emitAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)  --  #define stableNameToIntzh(r,s)   (r = ((StgStableName *)s)->sn)  emitPrimOp [res] StableNameToIntOp [arg] -   = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) +   = emitAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)  --  #define eqStableNamezh(r,sn1,sn2)					\  --    (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))  emitPrimOp [res] EqStableNameOp [arg1,arg2] -   = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [ +   = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [  				cmmLoadIndexW arg1 fixedHdrSize bWord,  				cmmLoadIndexW arg2 fixedHdrSize bWord -			 ])) +                         ])  emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] -   = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])) +   = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])  --  #define addrToHValuezh(r,a) r=(P_)a  emitPrimOp [res] AddrToAnyOp [arg] -   = emit (mkAssign (CmmLocal res) arg) +   = emitAssign (CmmLocal res) arg  --  #define dataToTagzh(r,a)  r=(GET_TAG(((StgClosure *)a)->header.info))  --  Note: argument may be tagged!  emitPrimOp [res] DataToTagOp [arg] -   = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) +   = emitAssign (CmmLocal res) (getConstrTag (cmmUntag arg))  {- Freezing arrays-of-ptrs requires changing an info table, for the     benefit of the generational collector.  It needs to scavenge mutable @@ -317,7 +314,7 @@ emitPrimOp [res] UnsafeFreezeArrayArrayOp [arg]  --  #define unsafeFreezzeByteArrayzh(r,a)	r=(a)  emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] -   = emit (mkAssign (CmmLocal res) arg) +   = emitAssign (CmmLocal res) arg  -- Copying pointer arrays @@ -497,11 +494,11 @@ emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth  -- The rest just translate straightforwardly  emitPrimOp [res] op [arg]     | nopOp op -   = emit (mkAssign (CmmLocal res) arg) +   = emitAssign (CmmLocal res) arg     | Just (mop,rep) <- narrowOp op -   = emit (mkAssign (CmmLocal res) $ -	   CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) +   = emitAssign (CmmLocal res) $ +           CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]  emitPrimOp r@[res] op args     | Just prim <- callishOp op @@ -746,15 +743,15 @@ loadArrPtrsSize addr = CmmLoad (cmmOffsetB addr off) bWord  mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType  		   -> LocalReg -> CmmExpr -> CmmExpr -> FCode ()  mkBasicIndexedRead off Nothing read_rep res base idx -   = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) +   = emitAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)  mkBasicIndexedRead off (Just cast) read_rep res base idx -   = emit (mkAssign (CmmLocal res) (CmmMachOp cast [ -				cmmLoadIndexOffExpr off read_rep base idx])) +   = emitAssign (CmmLocal res) (CmmMachOp cast [ +                                cmmLoadIndexOffExpr off read_rep base idx])  mkBasicIndexedWrite :: ByteOff -> Maybe MachOp  		   -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()  mkBasicIndexedWrite off Nothing base idx val -   = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val) +   = emitStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val  mkBasicIndexedWrite off (Just cast) base idx val     = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val]) @@ -805,7 +802,7 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy              getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit 1)),              getCode $ emitMemcpyCall  dst_p src_p bytes (CmmLit (mkIntCLit 1))              ] -        emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall +        emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall  emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr                        -> FCode ()) @@ -875,7 +872,7 @@ doCopyMutableArrayOp = emitCopyArray copy              getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)),              getCode $ emitMemcpyCall  dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE))              ] -        emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall +        emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall  emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr                    -> FCode ()) diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index 6a53317385..9ff4d0be07 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -103,7 +103,7 @@ initUpdFrameProf :: CmmExpr -> FCode ()  -- Initialise the profiling field of an update frame  initUpdFrameProf frame_amode     = ifProfiling $	-- frame->header.prof.ccs = CCCS -    emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) +    emitStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS  	-- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)   	-- is unnecessary because it is not used anyhow. @@ -143,7 +143,7 @@ saveCurrentCostCentre    = return Nothing    | otherwise    = do	{ local_cc <- newTemp ccType -	; emit (mkAssign (CmmLocal local_cc) curCCS) +        ; emitAssign (CmmLocal local_cc) curCCS  	; return (Just local_cc) }  restoreCurrentCostCentre :: Maybe LocalReg -> FCode () @@ -338,9 +338,9 @@ ldvEnter cl_ptr       -- if (era > 0) {       --    LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |       --                era | LDV_STATE_USE } -    emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) +    emit =<< mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])  		(mkStore ldv_wd new_ldv_wd) -		mkNop) +                mkNop    where          -- don't forget to substract node's tag      ldv_wd = ldvWord cl_ptr diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index d0432315ab..698bf32709 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -180,7 +180,7 @@ registerTickyCtr :: CLabel -> FCode ()  --	    ticky_entry_ctrs = & (f_ct);	/* mark it as "registered" */  --	    f_ct.registeredp = 1 }  registerTickyCtr ctr_lbl -  = emit (mkCmmIfThen test (catAGraphs register_stmts)) +  = emit =<< mkCmmIfThen test (catAGraphs register_stmts)    where      -- krc: code generator doesn't handle Not, so we test for Eq 0 instead      test = CmmMachOp (MO_Eq wordWidth) @@ -352,7 +352,7 @@ bumpHistogram _lbl _n  bumpHistogramE :: LitString -> CmmExpr -> FCode ()  bumpHistogramE lbl n     = do  t <- newTemp cLong -	emit (mkAssign (CmmLocal t) n) +        emitAssign (CmmLocal t) n  	emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight])  	  		  (mkAssign (CmmLocal t) eight))  	emit (addToMem cLong diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index bb4a653c05..7609cfe38d 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -18,12 +18,11 @@ module StgCmmUtils (  	emitDataLits, mkDataLits,          emitRODataLits, mkRODataLits,          emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, emitRtsCallGen, -        assignTemp, newTemp, withTemp, +        assignTemp, newTemp,  	newUnboxedTupleRegs, -	mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch, -	emitSwitch, +        emitMultiAssign, emitCmmLitSwitch, emitSwitch,  	tagToClosure, mkTaggedObjectLoad, @@ -72,6 +71,7 @@ import Module  import Literal  import Digraph  import ListSetOps +import VarSet  import Util  import Unique  import DynFlags @@ -204,14 +204,14 @@ emitRtsCallGen  emitRtsCallGen res pkg fun args _vols safe    = do { updfr_off <- getUpdFrameOff         ; emit caller_save -       ; emit $ call updfr_off +       ; call updfr_off         ; emit caller_load }    where      call updfr_off =        if safe then -        mkCmmCall fun_expr res' args' updfr_off +        emit =<< mkCmmCall fun_expr res' args' updfr_off        else -        mkUnsafeCall (ForeignTarget fun_expr +        emit $ mkUnsafeCall (ForeignTarget fun_expr                           (ForeignConvention CCallConv arg_hints res_hints)) res' args'      (args', arg_hints) = unzip args      (res',  res_hints) = unzip res @@ -441,7 +441,7 @@ assignTemp :: CmmExpr -> FCode LocalReg  assignTemp (CmmReg (CmmLocal reg)) = return reg  assignTemp e = do { uniq <- newUnique  		  ; let reg = LocalReg uniq (cmmExprType e) -		  ; emit (mkAssign (CmmLocal reg) e) +                  ; emitAssign (CmmLocal reg) e  		  ; return reg }  newTemp :: CmmType -> FCode LocalReg @@ -471,10 +471,10 @@ newUnboxedTupleRegs res_ty  ------------------------------------------------------------------------- ---	mkMultiAssign +--      emitMultiAssign  ------------------------------------------------------------------------- -mkMultiAssign :: [LocalReg] -> [CmmExpr] -> CmmAGraph +emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode ()  -- Emit code to perform the assignments in the  -- input simultaneously, using temporary variables when necessary. @@ -489,14 +489,13 @@ type Stmt = (LocalReg, CmmExpr)	-- r := e  --		s1 assigns to something s2 uses  --	  that is, if s1 should *follow* s2 in the final order -mkMultiAssign []    []    = mkNop -mkMultiAssign [reg] [rhs] = mkAssign (CmmLocal reg) rhs -mkMultiAssign regs  rhss  = ASSERT( equalLength regs rhss ) -			    unscramble ([1..] `zip` (regs `zip` rhss)) +emitMultiAssign []    []    = return () +emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs +emitMultiAssign regs  rhss  = ASSERT( equalLength regs rhss ) +                              unscramble ([1..] `zip` (regs `zip` rhss)) -unscramble :: [Vrtx] -> CmmAGraph -unscramble vertices -  = catAGraphs (map do_component components) +unscramble :: [Vrtx] -> FCode () +unscramble vertices = mapM_ do_component components    where  	edges :: [ (Vrtx, Key, [Key]) ]  	edges = [ (vertex, key1, edges_from stmt1) @@ -511,19 +510,19 @@ unscramble vertices  	-- do_components deal with one strongly-connected component  	-- Not cyclic, or singleton?  Just do it -	do_component :: SCC Vrtx -> CmmAGraph -	do_component (AcyclicSCC (_,stmt))  = mk_graph stmt +        do_component :: SCC Vrtx -> FCode () +        do_component (AcyclicSCC (_,stmt))  = mk_graph stmt  	do_component (CyclicSCC []) 	    = panic "do_component"  	do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt  		-- Cyclic?  Then go via temporaries.  Pick one to  		-- break the loop and try again with the rest. -	do_component (CyclicSCC ((_,first_stmt) : rest)) -	  = withUnique 		$ \u ->  +        do_component (CyclicSCC ((_,first_stmt) : rest)) = do +            u <- newUnique  	    let (to_tmp, from_tmp) = split u first_stmt -	    in mk_graph to_tmp -	       <*> unscramble rest -	       <*> mk_graph from_tmp +            mk_graph to_tmp +            unscramble rest +            mk_graph from_tmp  	split :: Unique -> Stmt -> (Stmt, Stmt)  	split uniq (reg, rhs) @@ -532,8 +531,8 @@ unscramble vertices  	    rep = cmmExprType rhs  	    tmp = LocalReg uniq rep -	mk_graph :: Stmt -> CmmAGraph -	mk_graph (reg, rhs) = mkAssign (CmmLocal reg) rhs +        mk_graph :: Stmt -> FCode () +        mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs  mustFollow :: Stmt -> Stmt -> Bool  (reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs @@ -551,7 +550,7 @@ emitSwitch :: CmmExpr  		-- Tag to switch on  	   -> FCode ()  emitSwitch tag_expr branches mb_deflt lo_tag hi_tag    = do	{ dflags <- getDynFlags -	; emit (mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag) } +        ; mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag }    where      via_C dflags | HscC <- hscTarget dflags = True  		 | otherwise                = False @@ -563,38 +562,40 @@ mkCmmSwitch :: Bool			-- True <=> never generate a conditional tree  	    -> Maybe CmmAGraph	    	-- Default branch (if any)  	    -> ConTagZ -> ConTagZ	-- Min and Max possible values; behaviour  	    			        -- 	outside this range is undefined -	    -> CmmAGraph +            -> FCode ()  -- First, two rather common cases in which there is no work to do -mkCmmSwitch _ _ []         (Just code) _ _ = code -mkCmmSwitch _ _ [(_,code)] Nothing     _ _ = code +mkCmmSwitch _ _ []         (Just code) _ _ = emit code +mkCmmSwitch _ _ [(_,code)] Nothing     _ _ = emit code  -- Right, off we go -mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag -  = withFreshLabel "switch join" 	$ \ join_lbl -> -    label_default join_lbl mb_deflt	$ \ mb_deflt -> -    label_branches join_lbl branches	$ \ branches -> -    assignTemp' tag_expr		$ \tag_expr' ->  +mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag = do +    join_lbl      <- newLabelC +    mb_deflt_lbl  <- label_default join_lbl mb_deflt +    branches_lbls <- label_branches join_lbl branches +    tag_expr'     <- assignTemp' tag_expr -    mk_switch tag_expr' (sortBy (comparing fst) branches) mb_deflt  -	      lo_tag hi_tag via_C -	  -- Sort the branches before calling mk_switch -    <*> mkLabel join_lbl +    emit =<< mk_switch tag_expr' (sortBy (comparing fst) branches) mb_deflt_lbl +                lo_tag hi_tag via_C + +          -- Sort the branches before calling mk_switch + +    emitLabel join_lbl  mk_switch :: CmmExpr -> [(ConTagZ, BlockId)]  	  -> Maybe BlockId   	  -> ConTagZ -> ConTagZ -> Bool -	  -> CmmAGraph +          -> FCode CmmAGraph  -- SINGLETON TAG RANGE: no case analysis to do  mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C    | lo_tag == hi_tag    = ASSERT( tag == lo_tag ) -    mkBranch lbl +    return (mkBranch lbl)  -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do  mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _ -  = mkBranch lbl +  = return (mkBranch lbl)  	-- The simplifier might have eliminated a case  	-- 	 so we may have e.g. case xs of   	--				 [] -> e @@ -603,7 +604,7 @@ mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _  -- SINGLETON BRANCH: one equality check to do  mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _ -  = mkCbranch cond deflt lbl +  = return (mkCbranch cond deflt lbl)    where      cond =  cmmNeWord tag_expr (CmmLit (mkIntCLit tag))  	-- We have lo_tag < hi_tag, but there's only one branch,  @@ -636,30 +637,34 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C  	arms :: [Maybe BlockId]  	arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]      in -    mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms +    return (mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms)    -- if we can knock off a bunch of default cases with one if, then do so    | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches -  = mkCmmIfThenElse  +  = do stmts <- mk_switch tag_expr branches mb_deflt +                        lowest_branch hi_tag via_C +       mkCmmIfThenElse  	(cmmULtWord tag_expr (CmmLit (mkIntCLit lowest_branch)))  	(mkBranch deflt) -	(mk_switch tag_expr branches mb_deflt  -			lowest_branch hi_tag via_C) +        stmts    | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches -  = mkCmmIfThenElse  +  = do stmts <- mk_switch tag_expr branches mb_deflt +                        lo_tag highest_branch via_C +       mkCmmIfThenElse  	(cmmUGtWord tag_expr (CmmLit (mkIntCLit highest_branch)))  	(mkBranch deflt) -	(mk_switch tag_expr branches mb_deflt  -			lo_tag highest_branch via_C) +        stmts    | otherwise	-- Use an if-tree -  = mkCmmIfThenElse  +  = do lo_stmts <- mk_switch tag_expr lo_branches mb_deflt +                             lo_tag (mid_tag-1) via_C +       hi_stmts <- mk_switch tag_expr hi_branches mb_deflt +                             mid_tag hi_tag via_C +       mkCmmIfThenElse  	(cmmUGeWord tag_expr (CmmLit (mkIntCLit mid_tag))) -	(mk_switch tag_expr hi_branches mb_deflt  -			     mid_tag hi_tag via_C) -	(mk_switch tag_expr lo_branches mb_deflt  -			     lo_tag (mid_tag-1) via_C) +        hi_stmts +        lo_stmts  	-- we test (e >= mid_tag) rather than (e < mid_tag), because  	-- the former works better when e is a comparison, and there  	-- are two tags 0 & 1 (mid_tag == 1).  In this case, the code @@ -714,30 +719,31 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C      is_lo (t,_) = t < mid_tag  -------------- -mkCmmLitSwitch :: CmmExpr		  -- Tag to switch on +emitCmmLitSwitch :: CmmExpr               -- Tag to switch on  	       -> [(Literal, CmmAGraph)]  -- Tagged branches  	       -> CmmAGraph		  -- Default branch (always) -	       -> CmmAGraph		  -- Emit the code +               -> FCode ()                -- Emit the code  -- Used for general literals, whose size might not be a word,   -- where there is always a default case, and where we don't know  -- the range of values for certain.  For simplicity we always generate a tree.  --  -- ToDo: for integers we could do better here, perhaps by generalising  -- mk_switch and using that.  --SDM 15/09/2004 -mkCmmLitSwitch _scrut []       deflt = deflt -mkCmmLitSwitch scrut  branches deflt -  = assignTemp' scrut		$ \ scrut' -> -    withFreshLabel "switch join" 	$ \ join_lbl -> -    label_code join_lbl deflt		$ \ deflt -> -    label_branches join_lbl branches	$ \ branches -> -    mk_lit_switch scrut' deflt (sortBy (comparing fst) branches) -    <*> mkLabel join_lbl +emitCmmLitSwitch _scrut []       deflt = emit deflt +emitCmmLitSwitch scrut  branches deflt = do +    scrut' <- assignTemp' scrut +    join_lbl <- newLabelC +    deflt_lbl <- label_code join_lbl deflt +    branches_lbls <- label_branches join_lbl branches +    emit =<< mk_lit_switch scrut' deflt_lbl +               (sortBy (comparing fst) branches_lbls) +    emitLabel join_lbl  mk_lit_switch :: CmmExpr -> BlockId    	      -> [(Literal,BlockId)] -	      -> CmmAGraph +              -> FCode CmmAGraph  mk_lit_switch scrut deflt [(lit,blk)]  -  = mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk +  = return (mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk)    where      cmm_lit = mkSimpleLit lit      cmm_ty  = cmmLitType cmm_lit @@ -745,9 +751,9 @@ mk_lit_switch scrut deflt [(lit,blk)]      ne      = if isFloatType cmm_ty then MO_F_Ne rep else MO_Ne rep  mk_lit_switch scrut deflt_blk_id branches -  = mkCmmIfThenElse cond -	(mk_lit_switch scrut deflt_blk_id lo_branches) -	(mk_lit_switch scrut deflt_blk_id hi_branches) +  = do lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches +       hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches +       mkCmmIfThenElse cond lo_blk hi_blk    where      n_branches = length branches      (mid_lit,_) = branches !! (n_branches `div` 2) @@ -761,49 +767,42 @@ mk_lit_switch scrut deflt_blk_id branches  -------------- -label_default :: BlockId -> Maybe CmmAGraph -	      -> (Maybe BlockId -> CmmAGraph) -	      -> CmmAGraph -label_default _ Nothing thing_inside  -  = thing_inside Nothing -label_default join_lbl (Just code) thing_inside  -  = label_code join_lbl code 	$ \ lbl -> -    thing_inside (Just lbl) +label_default :: BlockId -> Maybe CmmAGraph -> FCode (Maybe BlockId) +label_default _ Nothing +  = return  Nothing +label_default join_lbl (Just code) +  = do lbl <- label_code join_lbl code +       return (Just lbl)  -------------- -label_branches :: BlockId -> [(a,CmmAGraph)] -	       -> ([(a,BlockId)] -> CmmAGraph)  -	       -> CmmAGraph -label_branches _join_lbl [] thing_inside  -  = thing_inside [] -label_branches join_lbl ((tag,code):branches) thing_inside -  = label_code join_lbl code		$ \ lbl -> -    label_branches join_lbl branches 	$ \ branches' -> -    thing_inside ((tag,lbl):branches') +label_branches :: BlockId -> [(a,CmmAGraph)] -> FCode [(a,BlockId)] +label_branches _join_lbl [] +  = return [] +label_branches join_lbl ((tag,code):branches) +  = do lbl <- label_code join_lbl code +       branches' <- label_branches join_lbl branches +       return ((tag,lbl):branches')  -------------- -label_code :: BlockId -> CmmAGraph -> (BlockId -> CmmAGraph) -> CmmAGraph --- (label_code J code fun) +label_code :: BlockId -> CmmAGraph -> FCode BlockId +--  label_code J code  --	generates ---  [L: code; goto J] fun L -label_code join_lbl code thing_inside -  = withFreshLabel "switch" 	$ \lbl ->  -    outOfLine (mkLabel lbl <*> code <*> mkBranch join_lbl) -    <*> thing_inside lbl -  +--  [L: code; goto J] +-- and returns L +label_code join_lbl code = do +    lbl <- newLabelC +    emitOutOfLine lbl (code <*> mkBranch join_lbl) +    return lbl  -------------- -assignTemp' :: CmmExpr -> (CmmExpr -> CmmAGraph) -> CmmAGraph -assignTemp' e thing_inside -  | isTrivialCmmExpr e = thing_inside e -  | otherwise          = withTemp (cmmExprType e)	$ \ lreg -> -			 let reg = CmmLocal lreg in  -			 mkAssign reg e <*> thing_inside (CmmReg reg) - -withTemp :: CmmType -> (LocalReg -> CmmAGraph) -> CmmAGraph -withTemp rep thing_inside -  = withUnique $ \uniq -> thing_inside (LocalReg uniq rep) - +assignTemp' :: CmmExpr -> FCode CmmExpr +assignTemp' e +  | isTrivialCmmExpr e = return e +  | otherwise = do +       lreg <- newTemp (cmmExprType e) +       let reg = CmmLocal lreg +       emitAssign reg e +       return (CmmReg reg)  -------------------------------------------------------------------------  -- @@ -811,36 +810,13 @@ withTemp rep thing_inside  --  ------------------------------------------------------------------------- --- There is just one SRT for each top level binding; all the nested --- bindings use sub-sections of this SRT.  The label is passed down to --- the nested bindings via the monad. - -getSRTInfo :: SRT -> FCode C_SRT -getSRTInfo (SRTEntries {}) = panic "getSRTInfo" - -getSRTInfo (SRT off len bmp) -  | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] -  = do 	{ id <- newUnique -	-- ; top_srt <- getSRTLabel -        ; let srt_desc_lbl = mkLargeSRTLabel id -        -- JD: We're not constructing and emitting SRTs in the back end, -        -- which renders this code wrong (it now names a now-non-existent label). -	-- ; emitRODataLits srt_desc_lbl -        --      ( cmmLabelOffW top_srt off -	--        : mkWordCLit (fromIntegral len) -	--        : map mkWordCLit bmp) -	; return (C_SRT srt_desc_lbl 0 srt_escape) } - -  | otherwise -  = do	{ top_srt <- getSRTLabel -	; return (C_SRT top_srt off (fromIntegral (head bmp))) } -	-- The fromIntegral converts to StgHalfWord - -getSRTInfo NoSRT  -  = -- TODO: Should we panic in this case? -    -- Someone obviously thinks there should be an SRT -    return NoC_SRT - +-- | Returns 'True' if there is a non-empty SRT, or 'False' otherwise +-- NB. the SRT attached to an StgBind is still used in the new codegen +-- to decide whether we need a static link field on a static closure +-- or not. +getSRTInfo :: SRT -> FCode Bool +getSRTInfo (SRTEntries vs) = return (not (isEmptyVarSet vs)) +getSRTInfo _               = return False  srt_escape :: StgHalfWord  srt_escape = -1 diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 4371bca95e..0969f5b078 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -185,13 +185,13 @@ Library          CmmOpt          CmmParse          CmmProcPoint -        CmmSpillReload          CmmRewriteAssignments -        CmmStackLayout          CmmType          CmmUtils +        CmmLayoutStack          MkGraph          OldCmm +        OldCmmLint          OldCmmUtils          OldPprCmm          OptimizationFuel @@ -440,6 +440,7 @@ Library          Pretty          Serialized          State +        Stream          StringBuffer          UniqFM          UniqSet @@ -473,6 +474,8 @@ Library          Vectorise.Env          Vectorise.Exp          Vectorise +        Hoopl.Dataflow +        Hoopl      Exposed-Modules:              AsmCodeGen diff --git a/compiler/ghc.mk b/compiler/ghc.mk index f541841046..1ea6159812 100644 --- a/compiler/ghc.mk +++ b/compiler/ghc.mk @@ -355,7 +355,7 @@ ifeq "$(GhcProfiled)" "YES"  # parts of the compiler of interest, and then add further cost centres  # as necessary.  Turn on -auto-all for individual modules like this: -compiler/main/DriverPipeline_HC_OPTS += -auto-all +# compiler/main/DriverPipeline_HC_OPTS += -auto-all  compiler/main/GhcMake_HC_OPTS        += -auto-all  compiler/main/GHC_HC_OPTS            += -auto-all diff --git a/compiler/main/CodeOutput.lhs b/compiler/main/CodeOutput.lhs index 24906671cd..b4d6371a5d 100644 --- a/compiler/main/CodeOutput.lhs +++ b/compiler/main/CodeOutput.lhs @@ -14,14 +14,16 @@ import LlvmCodeGen ( llvmCodeGen )  import UniqSupply       ( mkSplitUniqSupply )  import Finder           ( mkStubPaths ) -import PprC             ( writeCs ) -import CmmLint          ( cmmLint ) +import PprC		( writeCs ) +import OldCmmLint       ( cmmLint )  import Packages  import OldCmm           ( RawCmmGroup )  import HscTypes  import DynFlags  import Config  import SysTools +import Stream           (Stream) +import qualified Stream  import ErrUtils  import Outputable @@ -45,32 +47,39 @@ import System.IO  \begin{code}  codeOutput :: DynFlags             -> Module -           -> ModLocation -           -> ForeignStubs -           -> [PackageId] -           -> [RawCmmGroup]                       -- Compiled C-- +	   -> ModLocation +	   -> ForeignStubs +	   -> [PackageId] +           -> Stream IO RawCmmGroup ()                       -- Compiled C--             -> IO (Bool{-stub_h_exists-}, Maybe FilePath{-stub_c_exists-}) -codeOutput dflags this_mod location foreign_stubs pkg_deps flat_abstractC +codeOutput dflags this_mod location foreign_stubs pkg_deps cmm_stream    =  -    do  { when (dopt Opt_DoCmmLinting dflags) $ do +    do  { +        -- Lint each CmmGroup as it goes past +        ; let linted_cmm_stream = +                 if dopt Opt_DoCmmLinting dflags +                    then Stream.mapM do_lint cmm_stream +                    else cmm_stream + +              do_lint cmm = do                  { showPass dflags "CmmLint" -                ; let lints = map (cmmLint (targetPlatform dflags)) flat_abstractC -                ; case firstJusts lints of +                ; case cmmLint (targetPlatform dflags) cmm of                          Just err -> do { log_action dflags dflags SevDump noSrcSpan defaultDumpStyle err -                                       ; ghcExit dflags 1 -                                       } -                        Nothing  -> return () +				       ; ghcExit dflags 1 +				       } +			Nothing  -> return () +                ; return cmm                  } -        ; showPass dflags "CodeOutput" -        ; let filenm = hscOutName dflags  -        ; stubs_exist <- outputForeignStubs dflags this_mod location foreign_stubs -        ; case hscTarget dflags of { +	; showPass dflags "CodeOutput" +	; let filenm = hscOutName dflags  +	; stubs_exist <- outputForeignStubs dflags this_mod location foreign_stubs +	; case hscTarget dflags of {               HscInterpreted -> return (); -             HscAsm         -> outputAsm dflags filenm flat_abstractC; -             HscC           -> outputC dflags filenm flat_abstractC pkg_deps; -             HscLlvm        -> outputLlvm dflags filenm flat_abstractC; +             HscAsm         -> outputAsm dflags filenm linted_cmm_stream; +             HscC           -> outputC dflags filenm linted_cmm_stream pkg_deps; +             HscLlvm        -> outputLlvm dflags filenm linted_cmm_stream;               HscNothing     -> panic "codeOutput: HscNothing"            }          ; return stubs_exist @@ -90,12 +99,16 @@ doOutput filenm io_action = bracket (openFile filenm WriteMode) hClose io_action  \begin{code}  outputC :: DynFlags          -> FilePath -        -> [RawCmmGroup] +        -> Stream IO RawCmmGroup ()          -> [PackageId]          -> IO () -outputC dflags filenm flat_absC packages +outputC dflags filenm cmm_stream packages    = do  +       -- ToDo: make the C backend consume the C-- incrementally, by +       -- pushing the cmm_stream inside (c.f. nativeCodeGen) +       rawcmms <- Stream.collect cmm_stream +         -- figure out which header files to #include in the generated .hc file:         --         --   * extra_includes from packages @@ -116,8 +129,8 @@ outputC dflags filenm flat_absC packages         doOutput filenm $ \ h -> do            hPutStr h ("/* GHC_PACKAGES " ++ unwords pkg_names ++ "\n*/\n") -          hPutStr h cc_injects -          writeCs dflags h flat_absC +	  hPutStr h cc_injects +          writeCs dflags h rawcmms  \end{code} @@ -128,14 +141,14 @@ outputC dflags filenm flat_absC packages  %************************************************************************  \begin{code} -outputAsm :: DynFlags -> FilePath -> [RawCmmGroup] -> IO () -outputAsm dflags filenm flat_absC +outputAsm :: DynFlags -> FilePath -> Stream IO RawCmmGroup () -> IO () +outputAsm dflags filenm cmm_stream   | cGhcWithNativeCodeGen == "YES"    = do ncg_uniqs <- mkSplitUniqSupply 'n'         {-# SCC "OutputAsm" #-} doOutput filenm $             \f -> {-# SCC "NativeCodeGen" #-} -                 nativeCodeGen dflags f ncg_uniqs flat_absC +                 nativeCodeGen dflags f ncg_uniqs cmm_stream   | otherwise    = panic "This compiler was built without a native code generator" @@ -149,12 +162,17 @@ outputAsm dflags filenm flat_absC  %************************************************************************  \begin{code} -outputLlvm :: DynFlags -> FilePath -> [RawCmmGroup] -> IO () -outputLlvm dflags filenm flat_absC +outputLlvm :: DynFlags -> FilePath -> Stream IO RawCmmGroup () -> IO () +outputLlvm dflags filenm cmm_stream    = do ncg_uniqs <- mkSplitUniqSupply 'n' + +       -- ToDo: make the LLVM backend consume the C-- incrementally, +       -- by pushing the cmm_stream inside (c.f. nativeCodeGen) +       rawcmms <- Stream.collect cmm_stream +         {-# SCC "llvm_output" #-} doOutput filenm $             \f -> {-# SCC "llvm_CodeGen" #-} -                 llvmCodeGen dflags f ncg_uniqs flat_absC +                 llvmCodeGen dflags f ncg_uniqs rawcmms  \end{code} diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 53aa39f04e..60b6e82bb7 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -164,9 +164,9 @@ data DynFlag     = Opt_D_dump_cmm     | Opt_D_dump_raw_cmm     | Opt_D_dump_cmmz -   | Opt_D_dump_cmmz_pretty     -- All of the cmmz subflags (there are a lot!)  Automatically     -- enabled if you run -ddump-cmmz +   | Opt_D_dump_cmmz_cfg     | Opt_D_dump_cmmz_cbe     | Opt_D_dump_cmmz_proc     | Opt_D_dump_cmmz_spills @@ -1675,7 +1675,7 @@ dynamic_flags = [    , Flag "ddump-cmm"               (setDumpFlag Opt_D_dump_cmm)    , Flag "ddump-raw-cmm"           (setDumpFlag Opt_D_dump_raw_cmm)    , Flag "ddump-cmmz"              (setDumpFlag Opt_D_dump_cmmz) -  , Flag "ddump-cmmz-pretty"       (setDumpFlag Opt_D_dump_cmmz_pretty) +  , Flag "ddump-cmmz-cfg"          (setDumpFlag Opt_D_dump_cmmz_cbe)    , Flag "ddump-cmmz-cbe"          (setDumpFlag Opt_D_dump_cmmz_cbe)    , Flag "ddump-cmmz-spills"       (setDumpFlag Opt_D_dump_cmmz_spills)    , Flag "ddump-cmmz-proc"         (setDumpFlag Opt_D_dump_cmmz_proc) diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs index 562332d52a..000c9ead31 100644 --- a/compiler/main/HscMain.hs +++ b/compiler/main/HscMain.hs @@ -119,7 +119,8 @@ import TyCon  import Name  import SimplStg         ( stg2stg )  import CodeGen          ( codeGen ) -import OldCmm as Old    ( CmmGroup ) +import qualified OldCmm as Old +import qualified Cmm as New  import PprCmm           ( pprCmms )  import CmmParse         ( parseCmmFile )  import CmmBuildInfoTables @@ -147,6 +148,10 @@ import UniqFM           ( emptyUFM )  import UniqSupply       ( initUs_ )  import Bag  import Exception +import qualified Stream +import Stream (Stream) + +import CLabel  import Util  import Data.List @@ -1276,20 +1281,27 @@ hscGenHardCode cgguts mod_summary = do                               cost_centre_info                               stg_binds hpc_info                      else {-# SCC "CodeGen" #-} -                         codeGen dflags this_mod data_tycons -                             cost_centre_info -                             stg_binds hpc_info +                         return (codeGen dflags this_mod data_tycons +                               cost_centre_info +                               stg_binds hpc_info) +          ------------------  Code output ----------------------- -        rawcmms <- {-# SCC "cmmToRawCmm" #-} +        rawcmms0 <- {-# SCC "cmmToRawCmm" #-}                     cmmToRawCmm platform cmms -        dumpIfSet_dyn dflags Opt_D_dump_raw_cmm "Raw Cmm" (ppr rawcmms) + +        let dump a = do dumpIfSet_dyn dflags Opt_D_dump_raw_cmm "Raw Cmm" +                           (ppr a) +                        return a +            rawcmms1 = Stream.mapM dump rawcmms0 +          (_stub_h_exists, stub_c_exists)              <- {-# SCC "codeOutput" #-}                 codeOutput dflags this_mod location foreign_stubs -               dependencies rawcmms +               dependencies rawcmms1          return stub_c_exists +  hscInteractive :: (ModIface, ModDetails, CgGuts)                 -> ModSummary                 -> Hsc (InteractiveStatus, ModIface, ModDetails) @@ -1335,7 +1347,7 @@ hscCompileCmmFile hsc_env filename = runHsc hsc_env $ do      let dflags = hsc_dflags hsc_env      cmm <- ioMsgMaybe $ parseCmmFile dflags filename      liftIO $ do -        rawCmms <- cmmToRawCmm (targetPlatform dflags) [cmm] +        rawCmms <- cmmToRawCmm (targetPlatform dflags) (Stream.yield cmm)          _ <- codeOutput dflags no_mod no_loc NoStubs [] rawCmms          return ()    where @@ -1350,24 +1362,52 @@ tryNewCodeGen   :: HscEnv -> Module -> [TyCon]                  -> CollectedCCs                  -> [(StgBinding,[(Id,[Id])])]                  -> HpcInfo -                -> IO [Old.CmmGroup] +                -> IO (Stream IO Old.CmmGroup ()) +         -- Note we produce a 'Stream' of CmmGroups, so that the +         -- backend can be run incrementally.  Otherwise it generates all +         -- the C-- up front, which has a significant space cost.  tryNewCodeGen hsc_env this_mod data_tycons                cost_centre_info stg_binds hpc_info = do      let dflags = hsc_dflags hsc_env -    prog <- StgCmm.codeGen dflags this_mod data_tycons + +    let cmm_stream :: Stream IO New.CmmGroup () +        cmm_stream = {-# SCC "StgCmm" #-} +            StgCmm.codeGen dflags this_mod data_tycons                             cost_centre_info stg_binds hpc_info -    dumpIfSet_dyn dflags Opt_D_dump_cmmz "Cmm produced by new codegen" -                  (pprCmms prog) + +        -- codegen consumes a stream of CmmGroup, and produces a new +        -- stream of CmmGroup (not necessarily synchronised: one +        -- CmmGroup on input may produce many CmmGroups on output due +        -- to proc-point splitting). + +    let dump1 a = do dumpIfSet_dyn dflags Opt_D_dump_cmmz +                       "Cmm produced by new codegen" (ppr a) +                     return a + +        ppr_stream1 = Stream.mapM dump1 cmm_stream      -- We are building a single SRT for the entire module, so      -- we must thread it through all the procedures as we cps-convert them.      us <- mkSplitUniqSupply 'S'      let initTopSRT = initUs_ us emptySRT -    (topSRT, prog) <- foldM (cmmPipeline hsc_env) (initTopSRT, []) prog -    let prog' = map cmmOfZgraph (srtToData topSRT : prog) -    dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" (ppr prog') -    return prog' +    let run_pipeline topSRT cmmgroup = do +           (topSRT, cmmgroup) <- cmmPipeline hsc_env topSRT cmmgroup +           return (topSRT,cmmOfZgraph cmmgroup) + +    let pipeline_stream = {-# SCC "cmmPipeline" #-} do +          topSRT <- Stream.mapAccumL run_pipeline initTopSRT ppr_stream1 +          Stream.yield (cmmOfZgraph (srtToData topSRT)) + +    let +        dump2 a = do dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" $ ppr a +                     return a + +        ppr_stream2 = Stream.mapM dump2 pipeline_stream + +    return ppr_stream2 + +  myCoreToStg :: DynFlags -> Module -> CoreProgram              -> IO ( [(StgBinding,[(Id,[Id])])] -- output program diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 51adf46005..732508bffc 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -71,6 +71,8 @@ import FastString  import UniqSet  import ErrUtils  import Module +import Stream (Stream) +import qualified Stream  -- DEBUGGING ONLY  --import OrdList @@ -147,7 +149,7 @@ data NcgImpl statics instr jumpDest = NcgImpl {      }  -------------------- -nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmmGroup] -> IO () +nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO ()  nativeCodeGen dflags h us cmms   = let platform = targetPlatform dflags         nCG' :: (Outputable statics, Outputable instr, Instruction instr) => NcgImpl statics instr jumpDest -> IO () @@ -209,16 +211,16 @@ nativeCodeGen dflags h us cmms  nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)                 => DynFlags                 -> NcgImpl statics instr jumpDest -               -> Handle -> UniqSupply -> [RawCmmGroup] -> IO () +               -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO ()  nativeCodeGen' dflags ncgImpl h us cmms   = do          let platform = targetPlatform dflags -            split_cmms  = concat $ map add_split cmms +            split_cmms  = Stream.map add_split cmms          -- BufHandle is a performance hack.  We could hide it inside          -- Pretty if it weren't for the fact that we do lots of little          -- printDocs here (in order to do codegen in constant space).          bufh <- newBufHandle h -        (imports, prof) <- cmmNativeGens dflags ncgImpl bufh us split_cmms [] [] 0 +        (imports, prof) <- cmmNativeGenStream dflags ncgImpl bufh us split_cmms [] [] 0          bFlush bufh          let (native, colorStats, linearStats) @@ -272,6 +274,34 @@ nativeCodeGen' dflags ncgImpl h us cmms          split_marker = CmmProc Nothing mkSplitMarkerLabel (ListGraph []) +cmmNativeGenStream :: (PlatformOutputable statics, PlatformOutputable instr, Instruction instr) +              => DynFlags +              -> NcgImpl statics instr jumpDest +              -> BufHandle +              -> UniqSupply +              -> Stream IO RawCmmGroup () +              -> [[CLabel]] +              -> [ ([NatCmmDecl statics instr], +                   Maybe [Color.RegAllocStats statics instr], +                   Maybe [Linear.RegAllocStats]) ] +              -> Int +              -> IO ( [[CLabel]], +                      [([NatCmmDecl statics instr], +                      Maybe [Color.RegAllocStats statics instr], +                      Maybe [Linear.RegAllocStats])] ) + +cmmNativeGenStream dflags ncgImpl h us cmm_stream impAcc profAcc count + = do +        r <- Stream.runStream cmm_stream +        case r of +          Left () -> return (reverse impAcc, reverse profAcc) +          Right (cmms, cmm_stream') -> do +            (impAcc,profAcc,us') <- cmmNativeGens dflags ncgImpl h us cmms +                                              impAcc profAcc count +            cmmNativeGenStream dflags ncgImpl h us' cmm_stream' +                                              impAcc profAcc count + +  -- | Do native code generation on all these cmms.  --  cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr) @@ -287,11 +317,12 @@ cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)                -> Int                -> IO ( [[CLabel]],                        [([NatCmmDecl statics instr], -                      Maybe [Color.RegAllocStats statics instr], -                      Maybe [Linear.RegAllocStats])] ) +                       Maybe [Color.RegAllocStats statics instr], +                       Maybe [Linear.RegAllocStats])], +                      UniqSupply ) -cmmNativeGens _ _ _ _ [] impAcc profAcc _ -        = return (reverse impAcc, reverse profAcc) +cmmNativeGens _ _ _ us [] impAcc profAcc _ +        = return (impAcc,profAcc,us)  cmmNativeGens dflags ncgImpl h us (cmm : cmms) impAcc profAcc count   = do @@ -817,7 +848,11 @@ Ideas for other things we could do (put these in Hoopl please!):  cmmToCmm :: DynFlags -> RawCmmDecl -> (RawCmmDecl, [CLabel])  cmmToCmm _ top@(CmmData _ _) = (top, [])  cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do -  blocks' <- mapM cmmBlockConFold (cmmMiniInline dflags (cmmEliminateDeadBlocks blocks)) +  let reachable_blocks | dopt Opt_TryNewCodeGen dflags = blocks +                       | otherwise = cmmEliminateDeadBlocks blocks +      -- The new codegen path has already eliminated unreachable blocks by now + +  blocks' <- mapM cmmBlockConFold (cmmMiniInline dflags reachable_blocks)    return $ CmmProc info lbl (ListGraph blocks')  newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #)) @@ -911,7 +946,8 @@ cmmExprConFold referenceKind expr = do      dflags <- getDynFlags      -- Skip constant folding if new code generator is running      -- (this optimization is done in Hoopl) -    let expr' = if dopt Opt_TryNewCodeGen dflags +    -- SDM: re-enabled for now, while cmmRewriteAssignments is turned off +    let expr' = if False -- dopt Opt_TryNewCodeGen dflags                      then expr                      else cmmExprCon (targetPlatform dflags) expr      cmmExprNative referenceKind expr' diff --git a/compiler/simplStg/SimplStg.lhs b/compiler/simplStg/SimplStg.lhs index d5024ab2e0..635df3ce41 100644 --- a/compiler/simplStg/SimplStg.lhs +++ b/compiler/simplStg/SimplStg.lhs @@ -55,7 +55,10 @@ stg2stg dflags module_name binds  	; (processed_binds, _, cost_centres)   		<- foldl_mn do_stg_pass (binds', us0, ccs) (getStgToDo dflags) -	; let srt_binds = computeSRTs (unarise us1 processed_binds) +        ; let un_binds = unarise us1 processed_binds +        ; let srt_binds +               | dopt Opt_TryNewCodeGen dflags = zip un_binds (repeat []) +               | otherwise = computeSRTs un_binds  	; dumpIfSet_dyn dflags Opt_D_dump_stg "STG syntax:"   	     		(pprStgBindingsWithSRTs srt_binds) diff --git a/compiler/utils/OrdList.lhs b/compiler/utils/OrdList.lhs index a56cdf3f58..7e3b24a5da 100644 --- a/compiler/utils/OrdList.lhs +++ b/compiler/utils/OrdList.lhs @@ -27,11 +27,14 @@ infixl 5  `snocOL`  infixr 5  `consOL`  data OrdList a -  = Many [a]        -- Invariant: non-empty +  = None +  | One a +  | Many [a]          -- Invariant: non-empty +  | Cons a (OrdList a) +  | Snoc (OrdList a) a    | Two (OrdList a) -- Invariant: non-empty          (OrdList a) -- Invariant: non-empty -  | One  a -  | None +  nilOL    :: OrdList a  isNilOL  :: OrdList a -> Bool @@ -44,22 +47,33 @@ concatOL :: [OrdList a] -> OrdList a  nilOL        = None  unitOL as    = One as -snocOL None b    = One b -snocOL as   b    = Two as (One b) -consOL a    None = One a -consOL a    bs   = Two (One a) bs +snocOL as   b    = Snoc as b +consOL a    bs   = Cons a bs  concatOL aas = foldr appOL None aas  isNilOL None = True  isNilOL _    = False -appOL None bs   = bs -appOL as   None = as -appOL as   bs   = Two as bs +None  `appOL` b     = b +a     `appOL` None  = a +One a `appOL` b     = Cons a b +a     `appOL` One b = Snoc a b +a     `appOL` b     = Two a b + +fromOL :: OrdList a -> [a] +fromOL a = go a [] +  where go None       acc = acc +        go (One a)    acc = a : acc +        go (Cons a b) acc = a : go b acc +        go (Snoc a b) acc = go a (b:acc) +        go (Two a b)  acc = go a (go b acc) +        go (Many xs)  acc = xs ++ acc  mapOL :: (a -> b) -> OrdList a -> OrdList b  mapOL _ None = None  mapOL f (One x) = One (f x) +mapOL f (Cons x xs) = Cons (f x) (mapOL f xs) +mapOL f (Snoc xs x) = Snoc (mapOL f xs) (f x)  mapOL f (Two x y) = Two (mapOL f x) (mapOL f y)  mapOL f (Many xs) = Many (map f xs) @@ -69,24 +83,19 @@ instance Functor OrdList where  foldrOL :: (a->b->b) -> b -> OrdList a -> b  foldrOL _ z None        = z  foldrOL k z (One x)     = k x z +foldrOL k z (Cons x xs) = k x (foldrOL k z xs) +foldrOL k z (Snoc xs x) = foldrOL k (k x z) xs  foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1  foldrOL k z (Many xs)   = foldr k z xs  foldlOL :: (b->a->b) -> b -> OrdList a -> b  foldlOL _ z None        = z  foldlOL k z (One x)     = k z x +foldlOL k z (Cons x xs) = foldlOL k (k z x) xs +foldlOL k z (Snoc xs x) = k (foldlOL k z xs) x  foldlOL k z (Two b1 b2) = foldlOL k (foldlOL k z b1) b2  foldlOL k z (Many xs)   = foldl k z xs -fromOL :: OrdList a -> [a] -fromOL ol  -   = flat ol [] -     where -        flat None      rest = rest -        flat (One x)   rest = x:rest -        flat (Two a b) rest = flat a (flat b rest) -        flat (Many xs) rest = xs ++ rest -  toOL :: [a] -> OrdList a  toOL [] = None  toOL xs = Many xs diff --git a/compiler/utils/Outputable.lhs b/compiler/utils/Outputable.lhs index 93dfd33606..7ffce77a47 100644 --- a/compiler/utils/Outputable.lhs +++ b/compiler/utils/Outputable.lhs @@ -699,6 +699,8 @@ instance (Outputable key, Outputable elt) => Outputable (M.Map key elt) where      ppr m = ppr (M.toList m)  instance (Outputable elt) => Outputable (IM.IntMap elt) where      ppr m = ppr (IM.toList m) +instance (PlatformOutputable elt) => PlatformOutputable (Set.Set elt) where +    pprPlatform platform m = pprPlatform platform (Set.toList m)  \end{code}  %************************************************************************ diff --git a/compiler/utils/Stream.hs b/compiler/utils/Stream.hs new file mode 100644 index 0000000000..2fa76d2345 --- /dev/null +++ b/compiler/utils/Stream.hs @@ -0,0 +1,97 @@ +-- ----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2012 +-- +-- Monadic streams +-- +-- ----------------------------------------------------------------------------- + +module Stream ( +    Stream(..), yield, liftIO, +    collect, fromList, +    Stream.map, Stream.mapM, Stream.mapAccumL +  ) where + +-- | +-- @Stream m a b@ is a computation in some Monad @m@ that delivers a sequence +-- of elements of type @a@ followed by a result of type @b@. +-- +-- More concretely, a value of type @Stream m a b@ can be run using @runStream@ +-- in the Monad @m@, and it delivers either +-- +--  * the final result: @Left b@, or +--  * @Right (a,str)@, where @a@ is the next element in the stream, and @str@ +--    is a computation to get the rest of the stream. +-- +-- Stream is itself a Monad, and provides an operation 'yield' that +-- produces a new element of the stream.  This makes it convenient to turn +-- existing monadic computations into streams. +-- +-- The idea is that Stream is useful for making a monadic computation +-- that produces values from time to time.  This can be used for +-- knitting together two complex monadic operations, so that the +-- producer does not have to produce all its values before the +-- consumer starts consuming them.  We make the producer into a +-- Stream, and the consumer pulls on the stream each time it wants a +-- new value. +-- +newtype Stream m a b = Stream { runStream :: m (Either b (a, Stream m a b)) } + +instance Monad m => Monad (Stream m a) where +  return a = Stream (return (Left a)) + +  Stream m >>= k = Stream $ do +                r <- m +                case r of +                  Left b        -> runStream (k b) +                  Right (a,str) -> return (Right (a, str >>= k)) + +yield :: Monad m => a -> Stream m a () +yield a = Stream (return (Right (a, return ()))) + +liftIO :: IO a -> Stream IO b a +liftIO io = Stream $ io >>= return . Left + +-- | Turn a Stream into an ordinary list, by demanding all the elements. +collect :: Monad m => Stream m a () -> m [a] +collect str = go str [] + where +  go str acc = do +    r <- runStream str +    case r of +      Left () -> return (reverse acc) +      Right (a, str') -> go str' (a:acc) + +-- | Turn a list into a 'Stream', by yielding each element in turn. +fromList :: Monad m => [a] -> Stream m a () +fromList = mapM_ yield + +-- | Apply a function to each element of a 'Stream', lazilly +map :: Monad m => (a -> b) -> Stream m a x -> Stream m b x +map f str = Stream $ do +   r <- runStream str +   case r of +     Left x -> return (Left x) +     Right (a, str') -> return (Right (f a, Stream.map f str')) + +-- | Apply a monadic operation to each element of a 'Stream', lazilly +mapM :: Monad m => (a -> m b) -> Stream m a x -> Stream m b x +mapM f str = Stream $ do +   r <- runStream str +   case r of +     Left x -> return (Left x) +     Right (a, str') -> do +        b <- f a +        return (Right (b, Stream.mapM f str')) + +-- | analog of the list-based 'mapAccumL' on Streams.  This is a simple +-- way to map over a Stream while carrying some state around. +mapAccumL :: Monad m => (c -> a -> m (c,b)) -> c -> Stream m a () +          -> Stream m b c +mapAccumL f c str = Stream $ do +  r <- runStream str +  case r of +    Left  () -> return (Left c) +    Right (a, str') -> do +      (c',b) <- f c a +      return (Right (b, mapAccumL f c' str')) | 
