summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/basicTypes/UniqSupply.lhs35
-rw-r--r--compiler/cmm/BlockId.hs8
-rw-r--r--compiler/cmm/Cmm.hs18
-rw-r--r--compiler/cmm/CmmBuildInfoTables.hs276
-rw-r--r--compiler/cmm/CmmCallConv.hs1
-rw-r--r--compiler/cmm/CmmCommonBlockElim.hs137
-rw-r--r--compiler/cmm/CmmContFlowOpt.hs289
-rw-r--r--compiler/cmm/CmmCvt.hs22
-rw-r--r--compiler/cmm/CmmExpr.hs126
-rw-r--r--compiler/cmm/CmmInfo.hs17
-rw-r--r--compiler/cmm/CmmLayoutStack.hs1048
-rw-r--r--compiler/cmm/CmmLint.hs222
-rw-r--r--compiler/cmm/CmmLive.hs57
-rw-r--r--compiler/cmm/CmmNode.hs79
-rw-r--r--compiler/cmm/CmmOpt.hs119
-rw-r--r--compiler/cmm/CmmParse.y42
-rw-r--r--compiler/cmm/CmmPipeline.hs189
-rw-r--r--compiler/cmm/CmmProcPoint.hs343
-rw-r--r--compiler/cmm/CmmRewriteAssignments.hs31
-rw-r--r--compiler/cmm/CmmSpillReload.hs166
-rw-r--r--compiler/cmm/CmmStackLayout.hs1
-rw-r--r--compiler/cmm/CmmUtils.hs126
-rw-r--r--compiler/cmm/Hoopl.hs125
-rw-r--r--compiler/cmm/Hoopl/Dataflow.hs887
-rw-r--r--compiler/cmm/MkGraph.hs531
-rw-r--r--compiler/cmm/OldCmm.hs33
-rw-r--r--compiler/cmm/OldCmmLint.hs209
-rw-r--r--compiler/cmm/OldPprCmm.hs31
-rw-r--r--compiler/cmm/OptimizationFuel.hs142
-rw-r--r--compiler/cmm/PprCmm.hs2
-rw-r--r--compiler/cmm/PprCmmExpr.hs8
-rw-r--r--compiler/cmm/SMRep.lhs6
-rw-r--r--compiler/cmm/cmm-notes41
-rw-r--r--compiler/codeGen/CgInfoTbls.hs30
-rw-r--r--compiler/codeGen/CgMonad.lhs19
-rw-r--r--compiler/codeGen/CodeGen.lhs58
-rw-r--r--compiler/codeGen/StgCmm.hs135
-rw-r--r--compiler/codeGen/StgCmmBind.hs57
-rw-r--r--compiler/codeGen/StgCmmClosure.hs29
-rw-r--r--compiler/codeGen/StgCmmCon.hs1
-rw-r--r--compiler/codeGen/StgCmmEnv.hs3
-rw-r--r--compiler/codeGen/StgCmmExpr.hs269
-rw-r--r--compiler/codeGen/StgCmmForeign.hs134
-rw-r--r--compiler/codeGen/StgCmmHeap.hs97
-rw-r--r--compiler/codeGen/StgCmmLayout.hs238
-rw-r--r--compiler/codeGen/StgCmmMonad.hs109
-rw-r--r--compiler/codeGen/StgCmmPrim.hs59
-rw-r--r--compiler/codeGen/StgCmmProf.hs8
-rw-r--r--compiler/codeGen/StgCmmTicky.hs4
-rw-r--r--compiler/codeGen/StgCmmUtils.hs246
-rw-r--r--compiler/ghc.cabal.in8
-rw-r--r--compiler/ghc.mk2
-rw-r--r--compiler/main/CodeOutput.lhs57
-rw-r--r--compiler/main/DynFlags.hs4
-rw-r--r--compiler/main/HscMain.hs75
-rw-r--r--compiler/main/HscTypes.lhs6
-rw-r--r--compiler/nativeGen/AsmCodeGen.lhs56
-rw-r--r--compiler/simplStg/SimplStg.lhs5
-rw-r--r--compiler/utils/OrdList.lhs47
-rw-r--r--compiler/utils/Stream.hs97
60 files changed, 4610 insertions, 2610 deletions
diff --git a/compiler/basicTypes/UniqSupply.lhs b/compiler/basicTypes/UniqSupply.lhs
index bb40be7ac1..f3fb28ac21 100644
--- a/compiler/basicTypes/UniqSupply.lhs
+++ b/compiler/basicTypes/UniqSupply.lhs
@@ -109,7 +109,7 @@ takeUniqFromSupply (MkSplitUniqSupply n s1 _) = (mkUniqueGrimily (iBox n), s1)
\begin{code}
-- | A monad which just gives the ability to obtain 'Unique's
-newtype UniqSM result = USM { unUSM :: UniqSupply -> (result, UniqSupply) }
+newtype UniqSM result = USM { unUSM :: UniqSupply -> (# result, UniqSupply #) }
instance Monad UniqSM where
return = returnUs
@@ -118,21 +118,21 @@ instance Monad UniqSM where
instance Functor UniqSM where
fmap f (USM x) = USM (\us -> case x us of
- (r, us') -> (f r, us'))
+ (# r, us' #) -> (# f r, us' #))
instance Applicative UniqSM where
pure = returnUs
(USM f) <*> (USM x) = USM $ \us -> case f us of
- (ff, us') -> case x us' of
- (xx, us'') -> (ff xx, us'')
+ (# ff, us' #) -> case x us' of
+ (# xx, us'' #) -> (# ff xx, us'' #)
-- | Run the 'UniqSM' action, returning the final 'UniqSupply'
initUs :: UniqSupply -> UniqSM a -> (a, UniqSupply)
-initUs init_us m = case unUSM m init_us of { (r,us) -> (r,us) }
+initUs init_us m = case unUSM m init_us of { (# r, us #) -> (r,us) }
-- | Run the 'UniqSM' action, discarding the final 'UniqSupply'
initUs_ :: UniqSupply -> UniqSM a -> a
-initUs_ init_us m = case unUSM m init_us of { (r, _) -> r }
+initUs_ init_us m = case unUSM m init_us of { (# r, _ #) -> r }
{-# INLINE thenUs #-}
{-# INLINE lazyThenUs #-}
@@ -142,27 +142,30 @@ initUs_ init_us m = case unUSM m init_us of { (r, _) -> r }
@thenUs@ is where we split the @UniqSupply@.
\begin{code}
+liftUSM :: UniqSM a -> UniqSupply -> (a, UniqSupply)
+liftUSM (USM m) us = case m us of (# a, us' #) -> (a, us')
+
instance MonadFix UniqSM where
- mfix m = USM (\us -> let (r,us') = unUSM (m r) us in (r,us'))
+ mfix m = USM (\us -> let (r,us') = liftUSM (m r) us in (# r,us' #))
thenUs :: UniqSM a -> (a -> UniqSM b) -> UniqSM b
thenUs (USM expr) cont
= USM (\us -> case (expr us) of
- (result, us') -> unUSM (cont result) us')
+ (# result, us' #) -> unUSM (cont result) us')
lazyThenUs :: UniqSM a -> (a -> UniqSM b) -> UniqSM b
-lazyThenUs (USM expr) cont
- = USM (\us -> let (result, us') = expr us in unUSM (cont result) us')
+lazyThenUs expr cont
+ = USM (\us -> let (result, us') = liftUSM expr us in unUSM (cont result) us')
thenUs_ :: UniqSM a -> UniqSM b -> UniqSM b
thenUs_ (USM expr) (USM cont)
- = USM (\us -> case (expr us) of { (_, us') -> cont us' })
+ = USM (\us -> case (expr us) of { (# _, us' #) -> cont us' })
returnUs :: a -> UniqSM a
-returnUs result = USM (\us -> (result, us))
+returnUs result = USM (\us -> (# result, us #))
getUs :: UniqSM UniqSupply
-getUs = USM (\us -> splitUniqSupply us)
+getUs = USM (\us -> case splitUniqSupply us of (us1,us2) -> (# us1, us2 #))
-- | A monad for generating unique identifiers
class Monad m => MonadUnique m where
@@ -177,17 +180,17 @@ class Monad m => MonadUnique m where
getUniquesM = liftM uniqsFromSupply getUniqueSupplyM
instance MonadUnique UniqSM where
- getUniqueSupplyM = USM (\us -> splitUniqSupply us)
+ getUniqueSupplyM = getUs
getUniqueM = getUniqueUs
getUniquesM = getUniquesUs
getUniqueUs :: UniqSM Unique
getUniqueUs = USM (\us -> case splitUniqSupply us of
- (us1,us2) -> (uniqFromSupply us1, us2))
+ (us1,us2) -> (# uniqFromSupply us1, us2 #))
getUniquesUs :: UniqSM [Unique]
getUniquesUs = USM (\us -> case splitUniqSupply us of
- (us1,us2) -> (uniqsFromSupply us1, us2))
+ (us1,us2) -> (# uniqsFromSupply us1, us2 #))
\end{code}
\begin{code}
diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs
index feeacb553d..4aedcb7074 100644
--- a/compiler/cmm/BlockId.hs
+++ b/compiler/cmm/BlockId.hs
@@ -15,7 +15,7 @@ import Outputable
import Unique
import Compiler.Hoopl as Hoopl hiding (Unique)
-import Compiler.Hoopl.GHC (uniqueToInt, uniqueToLbl, lblToUnique)
+import Compiler.Hoopl.Internals (uniqueToLbl)
----------------------------------------------------------------
--- Block Ids, their environments, and their sets
@@ -31,15 +31,9 @@ compilation unit in which it appears.
type BlockId = Hoopl.Label
-instance Uniquable BlockId where
- getUnique label = getUnique (uniqueToInt $ lblToUnique label)
-
mkBlockId :: Unique -> BlockId
mkBlockId unique = uniqueToLbl $ intToUnique $ getKey unique
-instance Outputable BlockId where
- ppr label = ppr (getUnique label)
-
retPtLbl :: BlockId -> CLabel
retPtLbl label = mkReturnPtLabel $ getUnique label
diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs
index f1318c1dc9..1c77409e49 100644
--- a/compiler/cmm/Cmm.hs
+++ b/compiler/cmm/Cmm.hs
@@ -32,9 +32,9 @@ module Cmm (
import CLabel
import BlockId
import CmmNode
-import OptimizationFuel as F
import SMRep
import CmmExpr
+import UniqSupply
import Compiler.Hoopl
import Data.Word ( Word8 )
@@ -69,8 +69,6 @@ type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph
-- (a) C--, i.e. populated with various C-- constructs
-- (Cmm and RawCmm in OldCmm.hs)
-- (b) Native code, populated with data/instructions
---
--- A second family of instances based on Hoopl is in Cmm.hs.
-- | A top-level chunk, abstracted over the type of the contents of
-- the basic blocks (Cmm or instructions are the likely instantiations).
@@ -95,19 +93,23 @@ data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C }
type CmmBlock = Block CmmNode C C
type CmmReplGraph e x = GenCmmReplGraph CmmNode e x
-type GenCmmReplGraph n e x = FuelUniqSM (Maybe (Graph n e x))
-type CmmFwdRewrite f = FwdRewrite FuelUniqSM CmmNode f
-type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f
+type GenCmmReplGraph n e x = UniqSM (Maybe (Graph n e x))
+type CmmFwdRewrite f = FwdRewrite UniqSM CmmNode f
+type CmmBwdRewrite f = BwdRewrite UniqSM CmmNode f
-----------------------------------------------------------------------------
-- Info Tables
-----------------------------------------------------------------------------
-data CmmTopInfo = TopInfo {info_tbl :: CmmInfoTable, stack_info :: CmmStackInfo}
+data CmmTopInfo = TopInfo { info_tbl :: CmmInfoTable
+ , stack_info :: CmmStackInfo }
data CmmStackInfo
= StackInfo {
- arg_space :: ByteOff, -- XXX: comment?
+ arg_space :: ByteOff,
+ -- number of bytes of arguments on the stack on entry to the
+ -- the proc. This is filled in by StgCmm.codeGen, and used
+ -- by the stack allocator later.
updfr_space :: Maybe ByteOff -- XXX: comment?
}
diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs
index 011947f55d..ebe755219b 100644
--- a/compiler/cmm/CmmBuildInfoTables.hs
+++ b/compiler/cmm/CmmBuildInfoTables.hs
@@ -14,169 +14,53 @@
{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
module CmmBuildInfoTables
( CAFSet, CAFEnv, cafAnal, localCAFInfo, mkTopCAFInfo
- , setInfoTableSRT, setInfoTableStackMap
+ , setInfoTableSRT
, TopSRT, emptySRT, srtToData
, bundleCAFs
- , lowerSafeForeignCalls
- , cafTransfers, liveSlotTransfers
- , mkLiveness )
+ , cafTransfers )
where
#include "HsVersions.h"
-- These should not be imported here!
-import StgCmmForeign
import StgCmmUtils
-import Constants
import Digraph
import qualified Prelude as P
import Prelude hiding (succ)
-import Util
import BlockId
import Bitmap
import CLabel
import Cmm
import CmmUtils
-import CmmStackLayout
-import Module
-import FastString
-import ForeignCall
import IdInfo
import Data.List
import Maybes
-import MkGraph as M
-import Control.Monad
import Name
-import OptimizationFuel
import Outputable
import SMRep
import UniqSupply
-import Compiler.Hoopl
+import Hoopl
import Data.Map (Map)
import qualified Data.Map as Map
-import qualified FiniteMap as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
+
+foldSet :: (a -> b -> b) -> b -> Set a -> b
+#if __GLASGOW_HASKELL__ < 704
+foldSet = Set.fold
+#else
+foldSet = Set.foldr
+#endif
----------------------------------------------------------------
-- Building InfoTables
-----------------------------------------------------------------------
--- Stack Maps
-
--- Given a block ID, we return a representation of the layout of the stack,
--- as suspended before entering that block.
--- (For a return site to a function call, the layout does not include the
--- parameter passing area (or the "return address" on the stack)).
--- If the element is `Nothing`, then it represents a word of the stack that
--- does not contain a live pointer.
--- If the element is `Just` a register, then it represents a live spill slot
--- for a pointer; we assume that a pointer is the size of a word.
--- The head of the list represents the young end of the stack where the infotable
--- pointer for the block `Bid` is stored.
--- The infotable pointer itself is not included in the list.
--- Call areas are also excluded from the list: besides the stuff in the update
--- frame (and the return infotable), call areas should never be live across
--- function calls.
-
--- RTS Invariant: All pointers must be word-aligned because each bit in the bitmap
--- represents a word. Consequently, we have to be careful when we see a live slot
--- on the stack: if we have packed multiple sub-word values into a word,
--- we have to make sure that we only mark the entire word as a non-pointer.
-
--- Also, don't forget to stop at the old end of the stack (oldByte),
--- which may differ depending on whether there is an update frame.
-
-type RegSlotInfo
- = ( Int -- Offset from oldest byte of Old area
- , LocalReg -- The register
- , Int) -- Width of the register
-
-live_ptrs :: ByteOff -> BlockEnv SubAreaSet -> AreaMap -> BlockId -> StackLayout
-live_ptrs oldByte slotEnv areaMap bid =
- -- pprTrace "live_ptrs for" (ppr bid <+> text (show oldByte ++ "-" ++ show youngByte) <+>
- -- ppr liveSlots) $
- -- pprTrace ("stack layout for " ++ show bid ++ ": ") (ppr res) $ res
- res
- where
- res = mkLiveness (reverse $ slotsToList youngByte liveSlots [])
-
- slotsToList :: Int -> [RegSlotInfo] -> [Maybe LocalReg] -> [Maybe LocalReg]
- -- n starts at youngByte and is decremented down to oldByte
- -- Returns a list, one element per word, with
- -- (Just r) meaning 'pointer register r is saved here',
- -- Nothing meaning 'non-pointer or empty'
-
- slotsToList n [] results | n == oldByte = results -- at old end of stack frame
-
- slotsToList n (s : _) _ | n == oldByte =
- pprPanic "slot left off live_ptrs" (ppr s <+> ppr oldByte <+>
- ppr n <+> ppr liveSlots <+> ppr youngByte)
-
- slotsToList n _ _ | n < oldByte =
- panic "stack slots not allocated on word boundaries?"
-
- slotsToList n l@((n', r, w) : rst) results =
- if n == (n' + w) then -- slot's young byte is at n
- ASSERT (not (isPtr r) ||
- (n `mod` wORD_SIZE == 0 && w == wORD_SIZE)) -- ptrs must be aligned
- slotsToList next (dropWhile (non_ptr_younger_than next) rst)
- (stack_rep : results)
- else slotsToList next (dropWhile (non_ptr_younger_than next) l)
- (Nothing : results)
- where next = n - wORD_SIZE
- stack_rep = if isPtr r then Just r else Nothing
-
- slotsToList n [] results = slotsToList (n - wORD_SIZE) [] (Nothing : results)
-
- non_ptr_younger_than next (n', r, w) =
- n' + w > next &&
- ASSERT (not (isPtr r))
- True
- isPtr = isGcPtrType . localRegType
-
- liveSlots :: [RegSlotInfo]
- liveSlots = sortBy (\ (off,_,_) (off',_,_) -> compare off' off)
- (Map.foldRightWithKey (\_ -> flip $ foldl add_slot) [] slots)
-
- add_slot :: [RegSlotInfo] -> SubArea -> [RegSlotInfo]
- add_slot rst (a@(RegSlot r@(LocalReg _ ty)), off, w) =
- if off == w && widthInBytes (typeWidth ty) == w then
- (expectJust "add_slot" (Map.lookup a areaMap), r, w) : rst
- else panic "live_ptrs: only part of a variable live at a proc point"
- add_slot rst (CallArea Old, _, _) =
- rst -- the update frame (or return infotable) should be live
- -- would be nice to check that only that part of the callarea is live...
- add_slot rst ((CallArea _), _, _) =
- rst
- -- JD: THIS ISN'T CURRENTLY A CORRECTNESS PROBLEM, BUT WE SHOULD REALLY
- -- MAKE LIVENESS INFO AROUND CALLS MORE PRECISE -- FOR NOW, A 32-BIT
- -- FLOAT PADS OUT TO 64 BITS, BUT WE ASSUME THE WHOLE PARAMETER-PASSING
- -- AREA IS LIVE (WHICH IT ISN'T...). WE SHOULD JUST PUT THE LIVE AREAS
- -- IN THE CALL NODES, WHICH SHOULD EVENTUALLY HAVE LIVE REGISTER AS WELL,
- -- SO IT'S ALL GOING IN THE SAME DIRECTION.
- -- pprPanic "CallAreas must not be live across function calls" (ppr bid <+> ppr c)
-
- slots :: SubAreaSet -- The SubAreaSet for 'bid'
- slots = expectJust "live_ptrs slots" $ mapLookup bid slotEnv
- youngByte = expectJust "live_ptrs bid_pos" $ Map.lookup (CallArea (Young bid)) areaMap
-
--- Construct the stack maps for a procedure _if_ it needs an infotable.
--- When wouldn't a procedure need an infotable? If it is a procpoint that
--- is not the successor of a call.
-setInfoTableStackMap :: SlotEnv -> AreaMap -> CmmDecl -> CmmDecl
-setInfoTableStackMap slotEnv areaMap
- t@(CmmProc (TopInfo {stack_info=StackInfo {updfr_space = Just updfr_off}}) _
- (CmmGraph {g_entry = eid}))
- = updInfo (const (live_ptrs updfr_off slotEnv areaMap eid)) id t
-setInfoTableStackMap _ _ t = t
-
-
-
------------------------------------------------------------------------
-- SRTs
-- WE NEED AN EXAMPLE HERE.
@@ -191,14 +75,14 @@ setInfoTableStackMap _ _ t = t
-----------------------------------------------------------------------
-- Finding the CAFs used by a procedure
-type CAFSet = Map CLabel ()
+type CAFSet = Set CLabel
type CAFEnv = BlockEnv CAFSet
-- First, an analysis to find live CAFs.
cafLattice :: DataflowLattice CAFSet
-cafLattice = DataflowLattice "live cafs" Map.empty add
- where add _ (OldFact old) (NewFact new) = case old `Map.union` new of
- new' -> (changeIf $ Map.size new' > Map.size old, new')
+cafLattice = DataflowLattice "live cafs" Set.empty add
+ where add _ (OldFact old) (NewFact new) = case old `Set.union` new of
+ new' -> (changeIf $ Set.size new' > Set.size old, new')
cafTransfers :: BwdTransfer CmmNode CAFSet
cafTransfers = mkBTransfer3 first middle last
@@ -210,11 +94,11 @@ cafTransfers = mkBTransfer3 first middle last
CmmLit (CmmLabelOff c _) -> add c set
CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set
_ -> set
- add l s = if hasCAF l then Map.insert (toClosureLbl l) () s
+ add l s = if hasCAF l then Set.insert (toClosureLbl l) s
else s
-cafAnal :: CmmGraph -> FuelUniqSM CAFEnv
-cafAnal g = liftM snd $ dataflowPassBwd g [] $ analBwd cafLattice cafTransfers
+cafAnal :: CmmGraph -> CAFEnv
+cafAnal g = dataflowAnalBwd g [] $ analBwd cafLattice cafTransfers
-----------------------------------------------------------------------
-- Building the SRTs
@@ -264,15 +148,15 @@ srtToData srt = [CmmData RelocatableReadOnlyData (Statics (lbl srt) tbl)]
-- we make sure they're all close enough to the bottom of the table that the
-- bitmap will be able to cover all of them.
buildSRTs :: TopSRT -> Map CLabel CAFSet -> CAFSet ->
- FuelUniqSM (TopSRT, Maybe CmmDecl, C_SRT)
+ UniqSM (TopSRT, Maybe CmmDecl, C_SRT)
buildSRTs topSRT topCAFMap cafs =
- do let liftCAF lbl () z = -- get CAFs for functions without static closures
- case Map.lookup lbl topCAFMap of Just cafs -> z `Map.union` cafs
- Nothing -> Map.insert lbl () z
+ do let liftCAF lbl z = -- get CAFs for functions without static closures
+ case Map.lookup lbl topCAFMap of Just cafs -> z `Set.union` cafs
+ Nothing -> Set.insert lbl z
-- For each label referring to a function f without a static closure,
-- replace it with the CAFs that are reachable from f.
sub_srt topSRT localCafs =
- let cafs = Map.keys (Map.foldRightWithKey liftCAF Map.empty localCafs)
+ let cafs = Set.elems (foldSet liftCAF Set.empty localCafs)
mkSRT topSRT =
do localSRTs <- procpointSRT (lbl topSRT) (elt_map topSRT) cafs
return (topSRT, localSRTs)
@@ -307,7 +191,7 @@ buildSRTs topSRT topCAFMap cafs =
-- Construct an SRT bitmap.
-- Adapted from simpleStg/SRT.lhs, which expects Id's.
procpointSRT :: CLabel -> Map CLabel Int -> [CLabel] ->
- FuelUniqSM (Maybe CmmDecl, C_SRT)
+ UniqSM (Maybe CmmDecl, C_SRT)
procpointSRT _ _ [] =
return (Nothing, NoC_SRT)
procpointSRT top_srt top_table entries =
@@ -325,7 +209,7 @@ maxBmpSize :: Int
maxBmpSize = widthInBits wordWidth `div` 2
-- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT.
-to_SRT :: CLabel -> Int -> Int -> Bitmap -> FuelUniqSM (Maybe CmmDecl, C_SRT)
+to_SRT :: CLabel -> Int -> Int -> Bitmap -> UniqSM (Maybe CmmDecl, C_SRT)
to_SRT top_srt off len bmp
| len > maxBmpSize || bmp == [fromIntegral srt_escape]
= do id <- getUniqueM
@@ -373,30 +257,30 @@ mkTopCAFInfo localCAFs = foldl addToTop Map.empty g
Map.insert l (flatten env cafset) env
addToTop env (CyclicSCC nodes) =
let (lbls, cafsets) = unzip nodes
- cafset = lbls `Map.deleteList` foldl Map.union Map.empty cafsets
+ cafset = foldr Set.delete (foldl Set.union Set.empty cafsets) lbls
in foldl (\env l -> Map.insert l (flatten env cafset) env) env lbls
- flatten env cafset = Map.foldRightWithKey (lookup env) Map.empty cafset
- lookup env caf () cafset' =
- case Map.lookup caf env of Just cafs -> Map.foldRightWithKey add cafset' cafs
- Nothing -> add caf () cafset'
- add caf () cafset' = Map.insert caf () cafset'
+ flatten env cafset = foldSet (lookup env) Set.empty cafset
+ lookup env caf cafset' =
+ case Map.lookup caf env of Just cafs -> foldSet add cafset' cafs
+ Nothing -> add caf cafset'
+ add caf cafset' = Set.insert caf cafset'
g = stronglyConnCompFromEdgedVertices
- (map (\n@(l, cafs) -> (n, l, Map.keys cafs)) localCAFs)
+ (map (\n@(l, cafs) -> (n, l, Set.elems cafs)) localCAFs)
-- Bundle the CAFs used at a procpoint.
bundleCAFs :: CAFEnv -> CmmDecl -> (CAFSet, CmmDecl)
bundleCAFs cafEnv t@(CmmProc _ _ (CmmGraph {g_entry=entry})) =
(expectJust "bundleCAFs" (mapLookup entry cafEnv), t)
-bundleCAFs _ t = (Map.empty, t)
+bundleCAFs _ t = (Set.empty, t)
-- Construct the SRTs for the given procedure.
setInfoTableSRT :: Map CLabel CAFSet -> TopSRT -> (CAFSet, CmmDecl) ->
- FuelUniqSM (TopSRT, [CmmDecl])
+ UniqSM (TopSRT, [CmmDecl])
setInfoTableSRT topCAFMap topSRT (cafs, t) =
setSRT cafs topCAFMap topSRT t
setSRT :: CAFSet -> Map CLabel CAFSet -> TopSRT ->
- CmmDecl -> FuelUniqSM (TopSRT, [CmmDecl])
+ CmmDecl -> UniqSM (TopSRT, [CmmDecl])
setSRT cafs topCAFMap topSRT t =
do (topSRT, cafTable, srt) <- buildSRTs topSRT topCAFMap cafs
let t' = updInfo id (const srt) t
@@ -418,91 +302,3 @@ updInfoTbl toVars toSrt info_tbl@(CmmInfoTable {})
StackRep ls -> StackRep (toVars ls)
other -> other }
updInfoTbl _ _ t@CmmNonInfoTable = t
-
-----------------------------------------------------------------
--- Safe foreign calls: We need to insert the code that suspends and resumes
--- the thread before and after a safe foreign call.
--- Why do we do this so late in the pipeline?
--- Because we need this code to appear without interrruption: you can't rely on the
--- value of the stack pointer between the call and resetting the thread state;
--- you need to have an infotable on the young end of the stack both when
--- suspending the thread and making the foreign call.
--- All of this is much easier if we insert the suspend and resume calls here.
-
--- At the same time, we prepare for the stages of the compiler that
--- build the proc points. We have to do this at the same time because
--- the safe foreign calls need special treatment with respect to infotables.
--- A safe foreign call needs an infotable even though it isn't
--- a procpoint. The following datatype captures the information
--- needed to generate the infotables along with the Cmm data and procedures.
-
--- JD: Why not do this while splitting procedures?
-lowerSafeForeignCalls :: AreaMap -> CmmDecl -> FuelUniqSM CmmDecl
-lowerSafeForeignCalls _ t@(CmmData _ _) = return t
-lowerSafeForeignCalls areaMap (CmmProc info l g@(CmmGraph {g_entry=entry})) = do
- let block b mblocks = mblocks >>= lowerSafeCallBlock entry areaMap b
- blocks <- foldGraphBlocks block (return mapEmpty) g
- return $ CmmProc info l (ofBlockMap entry blocks)
-
--- If the block ends with a safe call in the block, lower it to an unsafe
--- call (with appropriate saves and restores before and after).
-lowerSafeCallBlock :: BlockId -> AreaMap -> CmmBlock -> BlockEnv CmmBlock
- -> FuelUniqSM (BlockEnv CmmBlock)
-lowerSafeCallBlock entry areaMap b blocks =
- case blockToNodeList b of
- (JustC (CmmEntry id), m, JustC l@(CmmForeignCall {})) -> lowerSafeForeignCall entry areaMap blocks id m l
- _ -> return $ insertBlock b blocks
-
--- Late in the code generator, we want to insert the code necessary
--- to lower a safe foreign call to a sequence of unsafe calls.
-lowerSafeForeignCall :: BlockId -> AreaMap -> BlockEnv CmmBlock -> BlockId -> [CmmNode O O] -> CmmNode O C
- -> FuelUniqSM (BlockEnv CmmBlock)
-lowerSafeForeignCall entry areaMap blocks bid m
- (CmmForeignCall {tgt=tgt, res=rs, args=as, succ=succ, updfr = updfr_off, intrbl = intrbl}) =
- do let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
- -- Both 'id' and 'new_base' are KindNonPtr because they're
- -- RTS-only objects and are not subject to garbage collection
- id <- newTemp bWord
- new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
- let (caller_save, caller_load) = callerSaveVolatileRegs
- load_tso <- newTemp gcWord -- TODO FIXME NOW
- load_stack <- newTemp gcWord -- TODO FIXME NOW
- let (<**>) = (M.<*>)
- let suspendThread = foreignLbl "suspendThread"
- resumeThread = foreignLbl "resumeThread"
- foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit name)))
- suspend = saveThreadState <**>
- caller_save <**>
- mkUnsafeCall (ForeignTarget suspendThread
- (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
- [id] [CmmReg (CmmGlobal BaseReg), CmmLit (CmmInt (fromIntegral (fromEnum intrbl)) wordWidth)]
- midCall = mkUnsafeCall tgt rs as
- resume = mkUnsafeCall (ForeignTarget resumeThread
- (ForeignConvention CCallConv [AddrHint] [AddrHint]))
- [new_base] [CmmReg (CmmLocal id)] <**>
- -- Assign the result to BaseReg: we
- -- might now have a different Capability!
- mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <**>
- caller_load <**>
- loadThreadState load_tso load_stack
- -- We have to save the return value on the stack because its next use
- -- may appear in a different procedure due to procpoint splitting...
- saveRetVals = foldl (<**>) emptyAGraph $ map (M.mkMiddle . spill) rs
- spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
- regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset)
- where offset = w + expectJust "lowerForeign" (Map.lookup (RegSlot r) areaMap)
- sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup (CallArea area) areaMap)
- area = if succ == entry then Old else Young succ
- w = widthInBytes $ typeWidth $ localRegType r
- -- Note: The successor must be a procpoint, and we have already split,
- -- so we use a jump, not a branch.
- succLbl = CmmLit (CmmLabel (infoTblLbl succ))
- jump = CmmCall { cml_target = succLbl, cml_cont = Nothing
- , cml_args = widthInBytes wordWidth ,cml_ret_args = 0
- , cml_ret_off = updfr_off}
- graph' <- liftUniq $ labelAGraph bid $ catAGraphs (map M.mkMiddle m) <**>
- suspend <**> midCall <**>
- resume <**> saveRetVals <**> M.mkLast jump
- return $ blocks `mapUnion` toBlockMap graph'
-lowerSafeForeignCall _ _ _ _ _ _ = panic "lowerSafeForeignCall was passed something else"
-
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index d3d9ba4b41..484e89cd9b 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -53,7 +53,6 @@ assignArgumentsPos conv arg_ty reps = assignments
([_], PrimOpReturn) -> allRegs
(_, PrimOpReturn) -> getRegsWithNode
(_, Slow) -> noRegs
- _ -> pprPanic "Unknown calling convention" (ppr conv)
-- The calling conventions first assign arguments to registers,
-- then switch to the stack when we first run out of registers
-- (even if there are still available registers for args of a different type).
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
index abbfd01156..eafa2a00f3 100644
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ b/compiler/cmm/CmmCommonBlockElim.hs
@@ -13,22 +13,22 @@ where
import BlockId
import Cmm
import CmmUtils
+import CmmContFlowOpt
import Prelude hiding (iterate, succ, unzip, zip)
-import Compiler.Hoopl
+import Hoopl hiding (ChangeFlag)
import Data.Bits
import qualified Data.List as List
import Data.Word
-import FastString
-import Control.Monad
import Outputable
import UniqFM
-import Unique
my_trace :: String -> SDoc -> a -> a
my_trace = if False then pprTrace else \_ _ a -> a
--- Eliminate common blocks:
+-- -----------------------------------------------------------------------------
+-- Eliminate common blocks
+
-- If two blocks are identical except for the label on the first node,
-- then we can eliminate one of the blocks. To ensure that the semantics
-- of the program are preserved, we have to rewrite each predecessor of the
@@ -42,59 +42,50 @@ my_trace = if False then pprTrace else \_ _ a -> a
-- TODO: Use optimization fuel
elimCommonBlocks :: CmmGraph -> CmmGraph
-elimCommonBlocks g =
- upd_graph g . snd $ iterate common_block reset hashed_blocks
- (emptyUFM, mapEmpty)
- where hashed_blocks = map (\b -> (hash_block b, b)) (reverse (postorderDfs g))
- reset (_, subst) = (emptyUFM, subst)
+elimCommonBlocks g = replaceLabels env g
+ where
+ env = iterate hashed_blocks mapEmpty
+ hashed_blocks = map (\b -> (hash_block b, b)) $ postorderDfs g
-- Iterate over the blocks until convergence
-iterate :: (t -> a -> (Bool, t)) -> (t -> t) -> [a] -> t -> t
-iterate upd reset blocks state =
- case foldl upd' (False, state) blocks of
- (True, state') -> iterate upd reset blocks (reset state')
- (False, state') -> state'
- where upd' (b, s) a = let (b', s') = upd s a in (b || b', s') -- lift to track changes
+iterate :: [(HashCode,CmmBlock)] -> BlockEnv BlockId -> BlockEnv BlockId
+iterate blocks subst =
+ case foldl common_block (False, emptyUFM, subst) blocks of
+ (changed, _, subst)
+ | changed -> iterate blocks subst
+ | otherwise -> subst
+
+type State = (ChangeFlag, UniqFM [CmmBlock], BlockEnv BlockId)
+
+type ChangeFlag = Bool
+type HashCode = Int
-- Try to find a block that is equal (or ``common'') to b.
-type BidMap = BlockEnv BlockId
-type State = (UniqFM [CmmBlock], BidMap)
-common_block :: (Outputable h, Uniquable h) => State -> (h, CmmBlock) -> (Bool, State)
-common_block (bmap, subst) (hash, b) =
+common_block :: State -> (HashCode, CmmBlock) -> State
+common_block (old_change, bmap, subst) (hash, b) =
case lookupUFM bmap hash of
Just bs -> case (List.find (eqBlockBodyWith (eqBid subst) b) bs,
mapLookup bid subst) of
(Just b', Nothing) -> addSubst b'
(Just b', Just b'') | entryLabel b' /= b'' -> addSubst b'
- _ -> (False, (addToUFM bmap hash (b : bs), subst))
- Nothing -> (False, (addToUFM bmap hash [b], subst))
+ | otherwise -> (old_change, bmap, subst)
+ _ -> (old_change, addToUFM bmap hash (b : bs), subst)
+ Nothing -> (old_change, addToUFM bmap hash [b], subst)
where bid = entryLabel b
- addSubst b' = my_trace "found new common block" (ppr (entryLabel b')) $
- (True, (bmap, mapInsert bid (entryLabel b') subst))
-
--- Given the map ``subst'' from BlockId -> BlockId, we rewrite the graph.
-upd_graph :: CmmGraph -> BidMap -> CmmGraph
-upd_graph g subst = mapGraphNodes (id, middle, last) g
- where middle = mapExpDeep exp
- last l = last' (mapExpDeep exp l)
- last' :: CmmNode O C -> CmmNode O C
- last' (CmmBranch bid) = CmmBranch $ sub bid
- last' (CmmCondBranch p t f) = cond p (sub t) (sub f)
- last' (CmmCall t (Just bid) a r o) = CmmCall t (Just $ sub bid) a r o
- last' l@(CmmCall _ Nothing _ _ _) = l
- last' (CmmForeignCall t r a bid u i) = CmmForeignCall t r a (sub bid) u i
- last' (CmmSwitch e bs) = CmmSwitch e $ map (liftM sub) bs
- cond p t f = if t == f then CmmBranch t else CmmCondBranch p t f
- exp (CmmStackSlot (CallArea (Young id)) off) =
- CmmStackSlot (CallArea (Young (sub id))) off
- exp (CmmLit (CmmBlock id)) = CmmLit (CmmBlock (sub id))
- exp e = e
- sub = lookupBid subst
+ addSubst b' = my_trace "found new common block" (ppr bid <> char '=' <> ppr (entryLabel b')) $
+ (True, bmap, mapInsert bid (entryLabel b') subst)
+
+
+-- -----------------------------------------------------------------------------
+-- Hashing and equality on blocks
+
+-- Below here is mostly boilerplate: hashing blocks ignoring labels,
+-- and comparing blocks modulo a label mapping.
-- To speed up comparisons, we hash each basic block modulo labels.
-- The hashing is a bit arbitrary (the numbers are completely arbitrary),
-- but it should be fast and good enough.
-hash_block :: CmmBlock -> Int
+hash_block :: CmmBlock -> HashCode
hash_block block =
fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32))
-- UniqFM doesn't like negative Ints
@@ -103,11 +94,11 @@ hash_block block =
hash_lst m h = hash_node m + h `shiftL` 1
hash_node :: CmmNode O x -> Word32
- hash_node (CmmComment (FastString u _ _ _ _)) = cvt u
+ hash_node (CmmComment _) = 0 -- don't care
hash_node (CmmAssign r e) = hash_reg r + hash_e e
hash_node (CmmStore e e') = hash_e e + hash_e e'
hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as
- hash_node (CmmBranch _) = 23 -- would be great to hash these properly
+ hash_node (CmmBranch _) = 23 -- NB. ignore the label
hash_node (CmmCondBranch p _ _) = hash_e p
hash_node (CmmCall e _ _ _ _) = hash_e e
hash_node (CmmForeignCall t _ _ _ _ _) = hash_tgt t
@@ -143,18 +134,60 @@ hash_block block =
-- Utilities: equality and substitution on the graph.
-- Given a map ``subst'' from BlockID -> BlockID, we define equality.
-eqBid :: BidMap -> BlockId -> BlockId -> Bool
+eqBid :: BlockEnv BlockId -> BlockId -> BlockId -> Bool
eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid'
-lookupBid :: BidMap -> BlockId -> BlockId
+lookupBid :: BlockEnv BlockId -> BlockId -> BlockId
lookupBid subst bid = case mapLookup bid subst of
Just bid -> lookupBid subst bid
Nothing -> bid
--- Equality on the body of a block, modulo a function mapping block IDs to block IDs.
+-- Middle nodes and expressions can contain BlockIds, in particular in
+-- CmmStackSlot and CmmBlock, so we have to use a special equality for
+-- these.
+--
+eqMiddleWith :: (BlockId -> BlockId -> Bool)
+ -> CmmNode O O -> CmmNode O O -> Bool
+eqMiddleWith _ (CmmComment _) (CmmComment _) = True
+eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2)
+ = r1 == r2 && eqExprWith eqBid e1 e2
+eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2)
+ = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2
+eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1)
+ (CmmUnsafeForeignCall t2 r2 a2)
+ = t1 == t2 && r1 == r2 && and (zipWith (eqExprWith eqBid) a1 a2)
+eqMiddleWith _ _ _ = False
+
+eqExprWith :: (BlockId -> BlockId -> Bool)
+ -> CmmExpr -> CmmExpr -> Bool
+eqExprWith eqBid = eq
+ where
+ CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2
+ CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2
+ CmmReg r1 `eq` CmmReg r2 = r1==r2
+ CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2
+ CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2
+ CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2
+ _e1 `eq` _e2 = False
+
+ xs `eqs` ys = and (zipWith eq xs ys)
+
+ eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2
+ eqLit l1 l2 = l1 == l2
+
+ eqArea Old Old = True
+ eqArea (Young id1) (Young id2) = eqBid id1 id2
+ eqArea _ _ = False
+
+-- Equality on the body of a block, modulo a function mapping block
+-- IDs to block IDs.
eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool
-eqBlockBodyWith eqBid block block' = middles == middles' && eqLastWith eqBid last last'
- where (_, middles , JustC last :: MaybeC C (CmmNode O C)) = blockToNodeList block
- (_, middles', JustC last' :: MaybeC C (CmmNode O C)) = blockToNodeList block'
+eqBlockBodyWith eqBid block block'
+ = and (zipWith (eqMiddleWith eqBid) (blockToList m) (blockToList m')) &&
+ eqLastWith eqBid l l'
+ where (_,m,l) = blockSplit block
+ (_,m',l') = blockSplit block'
+
+
eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool
eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
index 73ce57e93f..3fabf33f97 100644
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -2,19 +2,19 @@
{-# OPTIONS_GHC -fno-warn-warnings-deprecations -fno-warn-incomplete-patterns #-}
module CmmContFlowOpt
- ( runCmmContFlowOpts
- , removeUnreachableBlocks, replaceBranches
+ ( cmmCfgOpts
+ , cmmCfgOptsProc
+ , removeUnreachableBlocks
+ , replaceLabels
)
where
import BlockId
import Cmm
import CmmUtils
-import Digraph
import Maybes
-import Outputable
-import Compiler.Hoopl
+import Hoopl
import Control.Monad
import Prelude hiding (succ, unzip, zip)
@@ -24,104 +24,158 @@ import Prelude hiding (succ, unzip, zip)
--
-----------------------------------------------------------------------------
-runCmmContFlowOpts :: CmmGroup -> CmmGroup
-runCmmContFlowOpts = map (optProc cmmCfgOpts)
-
cmmCfgOpts :: CmmGraph -> CmmGraph
-cmmCfgOpts = removeUnreachableBlocks . blockConcat . branchChainElim
- -- Here branchChainElim can ultimately be replaced
- -- with a more exciting combination of optimisations
+cmmCfgOpts = removeUnreachableBlocks . blockConcat
+
+cmmCfgOptsProc :: CmmDecl -> CmmDecl
+cmmCfgOptsProc = optProc cmmCfgOpts
optProc :: (g -> g) -> GenCmmDecl d h g -> GenCmmDecl d h g
optProc opt (CmmProc info lbl g) = CmmProc info lbl (opt g)
optProc _ top = top
+
-----------------------------------------------------------------------------
--
--- Branch Chain Elimination
+-- Block concatenation
--
-----------------------------------------------------------------------------
--- | Remove any basic block of the form L: goto L', and replace L with
--- L' everywhere else, unless L is the successor of a call instruction
--- and L' is the entry block. You don't want to set the successor of a
--- function call to the entry block because there is no good way to
--- store both the infotables for the call and from the callee, while
--- putting the stack pointer in a consistent place.
+-- This optimisation does two things:
+-- - If a block finishes with an unconditional branch, then we may
+-- be able to concatenate the block it points to and remove the
+-- branch. We do this either if the destination block is small
+-- (e.g. just another branch), or if this is the only jump to
+-- this particular destination block.
+--
+-- - If a block finishes in a call whose continuation block is a
+-- goto, then we can shortcut the destination, making the
+-- continuation block the destination of the goto.
+--
+-- Both transformations are improved by working from the end of the
+-- graph towards the beginning, because we may be able to perform many
+-- shortcuts in one go.
+
+
+-- We need to walk over the blocks from the end back to the
+-- beginning. We are going to maintain the "current" graph
+-- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId
+-- to BlockId, representing continuation labels that we have
+-- renamed. This latter mapping is important because we might
+-- shortcut a CmmCall continuation. For example:
+--
+-- Sp[0] = L
+-- call g returns to L
+--
+-- L: goto M
--
--- JD isn't quite sure when it's safe to share continuations for different
--- function calls -- have to think about where the SP will be,
--- so we'll table that problem for now by leaving all call successors alone.
-
-branchChainElim :: CmmGraph -> CmmGraph
-branchChainElim g
- | null lone_branch_blocks = g -- No blocks to remove
- | otherwise = {- pprTrace "branchChainElim" (ppr forest) $ -}
- replaceLabels (mapFromList edges) g
+-- M: ...
+--
+-- So when we shortcut the L block, we need to replace not only
+-- the continuation of the call, but also references to L in the
+-- code (e.g. the assignment Sp[0] = L). So we keep track of
+-- which labels we have renamed and apply the mapping at the end
+-- with replaceLabels.
+
+blockConcat :: CmmGraph -> CmmGraph
+blockConcat g@CmmGraph { g_entry = entry_id }
+ = replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks
where
- blocks = toBlockList g
-
- lone_branch_blocks :: [(BlockId, BlockId)]
- -- each (L,K) is a block of the form
- -- L : goto K
- lone_branch_blocks = mapCatMaybes isLoneBranch blocks
-
- call_succs = foldl add emptyBlockSet blocks
- where add :: BlockSet -> CmmBlock -> BlockSet
- add succs b =
- case lastNode b of
- (CmmCall _ (Just k) _ _ _) -> setInsert k succs
- (CmmForeignCall {succ=k}) -> setInsert k succs
- _ -> succs
-
- isLoneBranch :: CmmBlock -> Maybe (BlockId, BlockId)
- isLoneBranch block
- | (JustC (CmmEntry id), [], JustC (CmmBranch target)) <- blockToNodeList block
- , not (setMember id call_succs)
- = Just (id,target)
- | otherwise
- = Nothing
-
- -- We build a graph from lone_branch_blocks (every node has only
- -- one out edge). Then we
- -- - topologically sort the graph: if from A we can reach B,
- -- then A occurs before B in the result list.
- -- - depth-first search starting from the nodes in this list.
- -- This gives us a [[node]], in which each list is a dependency
- -- chain.
- -- - for each list [a1,a2,...an] replace branches to ai with an.
- --
- -- This approach nicely deals with cycles by ignoring them.
- -- Branches in a cycle will be redirected to somewhere in the
- -- cycle, but we don't really care where. A cycle should be dead code,
- -- and so will be eliminated by removeUnreachableBlocks.
- --
- fromNode (b,_) = b
- toNode a = (a,a)
-
- all_block_ids :: LabelSet
- all_block_ids = setFromList (map fst lone_branch_blocks)
- `setUnion`
- setFromList (map snd lone_branch_blocks)
-
- forest = dfsTopSortG $ graphFromVerticesAndAdjacency nodes lone_branch_blocks
- where nodes = map toNode $ setElems $ all_block_ids
-
- edges = [ (fromNode y, fromNode x)
- | (x:xs) <- map reverse forest, y <- xs ]
+ -- we might be able to shortcut the entry BlockId itself
+ new_entry
+ | Just entry_blk <- mapLookup entry_id new_blocks
+ , Just dest <- canShortcut entry_blk
+ = dest
+ | otherwise
+ = entry_id
-----------------------------------------------------------------
+ blocks = postorderDfs g
+
+ (new_blocks, shortcut_map) =
+ foldr maybe_concat (toBlockMap g, mapEmpty) blocks
+
+ maybe_concat :: CmmBlock
+ -> (BlockEnv CmmBlock, BlockEnv BlockId)
+ -> (BlockEnv CmmBlock, BlockEnv BlockId)
+ maybe_concat block (blocks, shortcut_map)
+ | CmmBranch b' <- last
+ , Just blk' <- mapLookup b' blocks
+ , shouldConcatWith b' blk'
+ = (mapInsert bid (splice head blk') blocks, shortcut_map)
+
+ -- calls: if we can shortcut the continuation label, then
+ -- we must *also* remember to substitute for the label in the
+ -- code, because we will push it somewhere.
+ | Just b' <- callContinuation_maybe last
+ , Just blk' <- mapLookup b' blocks
+ , Just dest <- canShortcut blk'
+ = (blocks, mapInsert b' dest shortcut_map)
+ -- replaceLabels will substitute dest for b' everywhere, later
+
+ -- non-calls: see if we can shortcut any of the successors.
+ | Nothing <- callContinuation_maybe last
+ = ( mapInsert bid (blockJoinTail head shortcut_last) blocks
+ , shortcut_map )
+
+ | otherwise
+ = (blocks, shortcut_map)
+ where
+ (head, last) = blockSplitTail block
+ bid = entryLabel block
+ shortcut_last = mapSuccessors shortcut last
+ shortcut l =
+ case mapLookup l blocks of
+ Just b | Just dest <- canShortcut b -> dest
+ _otherwise -> l
+
+ shouldConcatWith b block
+ | num_preds b == 1 = True -- only one predecessor: go for it
+ | okToDuplicate block = True -- short enough to duplicate
+ | otherwise = False
+ where num_preds bid = mapLookup bid backEdges `orElse` 0
+
+ canShortcut :: CmmBlock -> Maybe BlockId
+ canShortcut block
+ | (_, middle, CmmBranch dest) <- blockSplit block
+ , isEmptyBlock middle
+ = Just dest
+ | otherwise
+ = Nothing
+
+ backEdges :: BlockEnv Int -- number of predecessors for each block
+ backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id
+ mapMap setSize $ predMap blocks
+
+ splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
+ splice head rest = head `blockAppend` snd (blockSplitHead rest)
+
+
+callContinuation_maybe :: CmmNode O C -> Maybe BlockId
+callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
+callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
+callContinuation_maybe _ = Nothing
+
+okToDuplicate :: CmmBlock -> Bool
+okToDuplicate block
+ = case blockSplit block of (_, m, _) -> isEmptyBlock m
+ -- cheap and cheerful; we might expand this in the future to
+ -- e.g. spot blocks that represent a single instruction or two
+
+------------------------------------------------------------------------
+-- Map over the CmmGraph, replacing each label with its mapping in the
+-- supplied BlockEnv.
replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
-replaceLabels env =
- replace_eid . mapGraphNodes1 txnode
+replaceLabels env g
+ | mapNull env = g
+ | otherwise = replace_eid $ mapGraphNodes1 txnode g
where
replace_eid g = g {g_entry = lookup (g_entry g)}
lookup id = mapLookup id env `orElse` id
txnode :: CmmNode e x -> CmmNode e x
txnode (CmmBranch bid) = CmmBranch (lookup bid)
- txnode (CmmCondBranch p t f) = CmmCondBranch (exp p) (lookup t) (lookup f)
+ txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f)
txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms)
txnode (CmmCall t k a res r) = CmmCall (exp t) (liftM lookup k) a res r
txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc)
@@ -130,90 +184,25 @@ replaceLabels env =
exp :: CmmExpr -> CmmExpr
exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
- exp (CmmStackSlot (CallArea (Young id)) i) = CmmStackSlot (CallArea (Young (lookup id))) i
+ exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
exp e = e
-
-replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph
-replaceBranches env g = mapGraphNodes (id, id, last) g
- where
- last :: CmmNode O C -> CmmNode O C
- last (CmmBranch id) = CmmBranch (lookup id)
- last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi)
- last (CmmSwitch e tbl) = CmmSwitch e (map (fmap lookup) tbl)
- last l@(CmmCall {}) = l
- last l@(CmmForeignCall {}) = l
- lookup id = fmap lookup (mapLookup id env) `orElse` id
- -- XXX: this is a recursive lookup, it follows chains until the lookup
- -- returns Nothing, at which point we return the last BlockId
+mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
+mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
----------------------------------------------------------------
-- Build a map from a block to its set of predecessors. Very useful.
+
predMap :: [CmmBlock] -> BlockEnv BlockSet
predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges
where add_preds block env = foldl (add (entryLabel block)) env (successors block)
add bid env b' =
mapInsert b' (setInsert bid (mapLookup b' env `orElse` setEmpty)) env
------------------------------------------------------------------------------
---
--- Block concatenation
---
------------------------------------------------------------------------------
-
--- If a block B branches to a label L, L is not the entry block,
--- and L has no other predecessors,
--- then we can splice the block starting with L onto the end of B.
--- Order matters, so we work bottom up (reverse postorder DFS).
--- This optimization can be inhibited by unreachable blocks, but
--- the reverse postorder DFS returns only reachable blocks.
---
--- To ensure correctness, we have to make sure that the BlockId of the block
--- we are about to eliminate is not named in another instruction.
---
--- Note: This optimization does _not_ subsume branch chain elimination.
-
-blockConcat :: CmmGraph -> CmmGraph
-blockConcat g@(CmmGraph {g_entry=eid}) =
- replaceLabels concatMap $ ofBlockMap (g_entry g) blocks'
- where
- blocks = postorderDfs g
-
- (blocks', concatMap) =
- foldr maybe_concat (toBlockMap g, mapEmpty) $ blocks
-
- maybe_concat :: CmmBlock -> (LabelMap CmmBlock, LabelMap Label) -> (LabelMap CmmBlock, LabelMap Label)
- maybe_concat b unchanged@(blocks', concatMap) =
- let bid = entryLabel b
- in case blockToNodeList b of
- (JustC h, m, JustC (CmmBranch b')) ->
- if canConcatWith b' then
- (mapInsert bid (splice blocks' h m b') blocks',
- mapInsert b' bid concatMap)
- else unchanged
- _ -> unchanged
-
- num_preds bid = liftM setSize (mapLookup bid backEdges) `orElse` 0
-
- canConcatWith b' = b' /= eid && num_preds b' == 1
-
- backEdges = predMap blocks
-
- splice :: forall map n e x.
- IsMap map =>
- map (Block n e x) -> n C O -> [n O O] -> KeyOf map -> Block n C x
- splice blocks' h m bid' =
- case mapLookup bid' blocks' of
- Nothing -> panic "unknown successor block"
- Just block | (_, m', l') <- blockToNodeList block
- -> blockOfNodeList (JustC h, (m ++ m'), l')
-
-----------------------------------------------------------------------------
--
-- Removing unreachable blocks
---
------------------------------------------------------------------------------
removeUnreachableBlocks :: CmmGraph -> CmmGraph
removeUnreachableBlocks g
diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs
index 80c6079aac..e72eee041c 100644
--- a/compiler/cmm/CmmCvt.hs
+++ b/compiler/cmm/CmmCvt.hs
@@ -12,29 +12,25 @@ import CmmUtils
import qualified OldCmm as Old
import OldPprCmm ()
-import Compiler.Hoopl hiding ((<*>), mkLabel, mkBranch)
+import Hoopl hiding ((<*>), mkLabel, mkBranch)
import Data.Maybe
import Maybes
import Outputable
cmmOfZgraph :: CmmGroup -> Old.CmmGroup
cmmOfZgraph tops = map mapTop tops
- where mapTop (CmmProc h l g) = CmmProc (Old.CmmInfo Nothing Nothing (info_tbl h)) l (ofZgraph g)
+ where mapTop (CmmProc h l g) = CmmProc (info_tbl h) l (ofZgraph g)
mapTop (CmmData s ds) = CmmData s ds
data ValueDirection = Arguments | Results
-add_hints :: Convention -> ValueDirection -> [a] -> [Old.CmmHinted a]
+add_hints :: ForeignTarget -> ValueDirection -> [a] -> [Old.CmmHinted a]
add_hints conv vd args = zipWith Old.CmmHinted args (get_hints conv vd)
-get_hints :: Convention -> ValueDirection -> [ForeignHint]
-get_hints (Foreign (ForeignConvention _ hints _)) Arguments = hints
-get_hints (Foreign (ForeignConvention _ _ hints)) Results = hints
-get_hints _other_conv _vd = repeat NoHint
-
-get_conv :: ForeignTarget -> Convention
-get_conv (PrimTarget _) = NativeNodeCall -- JD: SUSPICIOUS
-get_conv (ForeignTarget _ fc) = Foreign fc
+get_hints :: ForeignTarget -> ValueDirection -> [ForeignHint]
+get_hints (ForeignTarget _ (ForeignConvention _ hints _)) Arguments = hints
+get_hints (ForeignTarget _ (ForeignConvention _ _ hints)) Results = hints
+get_hints (PrimTarget _) _vd = repeat NoHint
cmm_target :: ForeignTarget -> Old.CmmCallTarget
cmm_target (PrimTarget op) = Old.CmmPrim op Nothing
@@ -89,8 +85,8 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g
CmmUnsafeForeignCall (PrimTarget MO_Touch) _ _ -> Old.CmmNop
CmmUnsafeForeignCall target ress args ->
Old.CmmCall (cmm_target target)
- (add_hints (get_conv target) Results ress)
- (add_hints (get_conv target) Arguments args)
+ (add_hints target Results ress)
+ (add_hints target Arguments args)
Old.CmmMayReturn
last :: CmmNode O C -> () -> [Old.CmmStmt]
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 6eb91e89ba..646ecb5c67 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -14,11 +14,11 @@ module CmmExpr
, GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node, baseReg
, VGcPtr(..), vgcFlag -- Temporary!
, DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed
- , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
, RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
- , plusRegSet, minusRegSet, timesRegSet
- , regUsedIn, regSlot
- , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf
+ , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
+ , regSetToList
+ , regUsedIn
+ , Area(..)
, module CmmMachOp
, module CmmType
)
@@ -31,9 +31,9 @@ import CmmMachOp
import BlockId
import CLabel
import Unique
-import UniqSet
-import Data.Map (Map)
+import Data.Set (Set)
+import qualified Data.Set as Set
-----------------------------------------------------------------------------
-- CmmExpr
@@ -42,11 +42,12 @@ import Data.Map (Map)
data CmmExpr
= CmmLit CmmLit -- Literal
- | CmmLoad CmmExpr CmmType -- Read memory location
- | CmmReg CmmReg -- Contents of register
+ | CmmLoad !CmmExpr !CmmType -- Read memory location
+ | CmmReg !CmmReg -- Contents of register
| CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
- | CmmStackSlot Area Int -- addressing expression of a stack slot
- | CmmRegOff CmmReg Int
+ | CmmStackSlot Area {-# UNPACK #-} !Int
+ -- addressing expression of a stack slot
+ | CmmRegOff !CmmReg Int
-- CmmRegOff reg i
-- ** is shorthand only, meaning **
-- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
@@ -62,20 +63,16 @@ instance Eq CmmExpr where -- Equality ignores the types
_e1 == _e2 = False
data CmmReg
- = CmmLocal LocalReg
+ = CmmLocal {-# UNPACK #-} !LocalReg
| CmmGlobal GlobalReg
deriving( Eq, Ord )
-- | A stack area is either the stack slot where a variable is spilled
-- or the stack space where function arguments and results are passed.
data Area
- = RegSlot LocalReg
- | CallArea AreaId
- deriving (Eq, Ord)
-
-data AreaId
= Old -- See Note [Old Area]
- | Young BlockId
+ | Young {-# UNPACK #-} !BlockId -- Invariant: must be a continuation BlockId
+ -- See Note [Continuation BlockId] in CmmNode.
deriving (Eq, Ord)
{- Note [Old Area]
@@ -94,15 +91,8 @@ necessarily at the young end of the Old area.
End of note -}
-type SubArea = (Area, Int, Int) -- area, offset, width
-type SubAreaSet = Map Area [SubArea]
-
-type AreaMap = Map Area Int
- -- Byte offset of the oldest byte of the Area,
- -- relative to the oldest byte of the Old Area
-
data CmmLit
- = CmmInt Integer Width
+ = CmmInt !Integer Width
-- Interpretation: the 2's complement representation of the value
-- is truncated to the specified size. This is easier than trying
-- to keep the value within range, because we don't know whether
@@ -120,7 +110,11 @@ data CmmLit
-- It is also used inside the NCG during when generating
-- position-independent code.
| CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
- | CmmBlock BlockId -- Code label
+
+ | CmmBlock {-# UNPACK #-} !BlockId -- Code label
+ -- Invariant: must be a continuation BlockId
+ -- See Note [Continuation BlockId] in CmmNode.
+
| CmmHighStackMark -- stands for the max stack space used during a procedure
deriving Eq
@@ -163,7 +157,7 @@ maybeInvertCmmExpr _ = Nothing
-----------------------------------------------------------------------------
data LocalReg
- = LocalReg !Unique CmmType
+ = LocalReg {-# UNPACK #-} !Unique CmmType
-- ^ Parameters:
-- 1. Identifier
-- 2. Type
@@ -189,22 +183,35 @@ localRegType (LocalReg _ rep) = rep
-----------------------------------------------------------------------------
-- | Sets of local registers
-type RegSet = UniqSet LocalReg
+
+-- These are used for dataflow facts, and a common operation is taking
+-- the union of two RegSets and then asking whether the union is the
+-- same as one of the inputs. UniqSet isn't good here, because
+-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary
+-- Sets.
+
+type RegSet = Set LocalReg
emptyRegSet :: RegSet
+nullRegSet :: RegSet -> Bool
elemRegSet :: LocalReg -> RegSet -> Bool
extendRegSet :: RegSet -> LocalReg -> RegSet
deleteFromRegSet :: RegSet -> LocalReg -> RegSet
mkRegSet :: [LocalReg] -> RegSet
minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet
-
-emptyRegSet = emptyUniqSet
-elemRegSet = elementOfUniqSet
-extendRegSet = addOneToUniqSet
-deleteFromRegSet = delOneFromUniqSet
-mkRegSet = mkUniqSet
-minusRegSet = minusUniqSet
-plusRegSet = unionUniqSets
-timesRegSet = intersectUniqSets
+sizeRegSet :: RegSet -> Int
+regSetToList :: RegSet -> [LocalReg]
+
+emptyRegSet = Set.empty
+nullRegSet = Set.null
+elemRegSet = Set.member
+extendRegSet = flip Set.insert
+deleteFromRegSet = flip Set.delete
+mkRegSet = Set.fromList
+minusRegSet = Set.difference
+plusRegSet = Set.union
+timesRegSet = Set.intersection
+sizeRegSet = Set.size
+regSetToList = Set.toList
class UserOfLocalRegs a where
foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b
@@ -236,7 +243,7 @@ instance DefinerOfLocalRegs LocalReg where
foldRegsDefd f z r = f z r
instance UserOfLocalRegs RegSet where
- foldRegsUsed f = foldUniqSet (flip f)
+ foldRegsUsed f = Set.fold (flip f)
instance UserOfLocalRegs CmmExpr where
foldRegsUsed f z e = expr z e
@@ -271,49 +278,6 @@ reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es
_ `regUsedIn` CmmStackSlot _ _ = False
-----------------------------------------------------------------------------
--- Stack slots
------------------------------------------------------------------------------
-
-isStackSlotOf :: CmmExpr -> LocalReg -> Bool
-isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r'
-isStackSlotOf _ _ = False
-
-regSlot :: LocalReg -> CmmExpr
-regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r)
-
------------------------------------------------------------------------------
--- Stack slot use information for expressions and other types [_$_]
------------------------------------------------------------------------------
-
--- Fold over the area, the offset into the area, and the width of the subarea.
-class UserOfSlots a where
- foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b
-
-class DefinerOfSlots a where
- foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b
-
-instance UserOfSlots CmmExpr where
- foldSlotsUsed f z e = expr z e
- where expr z (CmmLit _) = z
- expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty)
- expr z (CmmLoad addr _) = foldSlotsUsed f z addr
- expr z (CmmReg _) = z
- expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs
- expr z (CmmRegOff _ _) = z
- expr z (CmmStackSlot _ _) = z
-
-instance UserOfSlots a => UserOfSlots [a] where
- foldSlotsUsed _ set [] = set
- foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs
-
-instance DefinerOfSlots a => DefinerOfSlots [a] where
- foldSlotsDefd _ set [] = set
- foldSlotsDefd f set (x:xs) = foldSlotsDefd f (foldSlotsDefd f set x) xs
-
-instance DefinerOfSlots SubArea where
- foldSlotsDefd f z a = f z a
-
------------------------------------------------------------------------------
-- Global STG registers
-----------------------------------------------------------------------------
diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs
index fe0c104d1c..a171faa057 100644
--- a/compiler/cmm/CmmInfo.hs
+++ b/compiler/cmm/CmmInfo.hs
@@ -19,6 +19,8 @@ import CmmUtils
import CLabel
import SMRep
import Bitmap
+import Stream (Stream)
+import qualified Stream
import Maybes
import Constants
@@ -40,10 +42,16 @@ mkEmptyContInfoTable info_lbl
, cit_prof = NoProfilingInfo
, cit_srt = NoC_SRT }
-cmmToRawCmm :: Platform -> [Old.CmmGroup] -> IO [Old.RawCmmGroup]
+cmmToRawCmm :: Platform -> Stream IO Old.CmmGroup ()
+ -> IO (Stream IO Old.RawCmmGroup ())
cmmToRawCmm platform cmms
= do { uniqs <- mkSplitUniqSupply 'i'
- ; return (initUs_ uniqs (mapM (concatMapM (mkInfoTable platform)) cmms)) }
+ ; let do_one uniqs cmm = do
+ case initUs uniqs $ concatMapM (mkInfoTable platform) cmm of
+ (b,uniqs') -> return (uniqs',b)
+ -- NB. strictness fixes a space leak. DO NOT REMOVE.
+ ; return (Stream.mapAccumL do_one uniqs cmms >> return ())
+ }
-- Make a concrete info table, represented as a list of CmmStatic
-- (it can't be simply a list of Word, because the SRT field is
@@ -82,7 +90,7 @@ mkInfoTable :: Platform -> CmmDecl -> UniqSM [RawCmmDecl]
mkInfoTable _ (CmmData sec dat)
= return [CmmData sec dat]
-mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks)
+mkInfoTable platform (CmmProc info entry_label blocks)
| CmmNonInfoTable <- info -- Code without an info table. Easy.
= return [CmmProc Nothing entry_label blocks]
@@ -91,7 +99,8 @@ mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks)
; return (top_decls ++
mkInfoTableAndCode info_lbl info_cts
entry_label blocks) }
- | otherwise = panic "mkInfoTable" -- Patern match overlap check not clever enough
+ | otherwise = panic "mkInfoTable"
+ -- Patern match overlap check not clever enough
-----------------------------------------------------
type InfoTableContents = ( [CmmLit] -- The standard part
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
new file mode 100644
index 0000000000..f0dce4a6a1
--- /dev/null
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -0,0 +1,1048 @@
+{-# LANGUAGE RecordWildCards, GADTs #-}
+module CmmLayoutStack (
+ cmmLayoutStack, setInfoTableStackMap, cmmSink
+ ) where
+
+import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
+import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX
+
+import Cmm
+import BlockId
+import CLabel
+import CmmUtils
+import MkGraph
+import Module
+import ForeignCall
+import CmmLive
+import CmmProcPoint
+import SMRep
+import Hoopl hiding ((<*>), mkLast, mkMiddle)
+import Constants
+import UniqSupply
+import Maybes
+import UniqFM
+import Util
+
+import FastString
+import Outputable
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Control.Monad.Fix
+import Data.Array as Array
+import Data.Bits
+import Data.List (nub, partition)
+import Control.Monad (liftM)
+
+#include "HsVersions.h"
+
+
+data StackSlot = Occupied | Empty
+ -- Occupied: a return address or part of an update frame
+
+instance Outputable StackSlot where
+ ppr Occupied = ptext (sLit "XXX")
+ ppr Empty = ptext (sLit "---")
+
+-- All stack locations are expressed as positive byte offsets from the
+-- "base", which is defined to be the address above the return address
+-- on the stack on entry to this CmmProc.
+--
+-- Lower addresses have higher StackLocs.
+--
+type StackLoc = ByteOff
+
+{-
+ A StackMap describes the stack at any given point. At a continuation
+ it has a particular layout, like this:
+
+ | | <- base
+ |-------------|
+ | ret0 | <- base + 8
+ |-------------|
+ . upd frame . <- base + sm_ret_off
+ |-------------|
+ | |
+ . vars .
+ . (live/dead) .
+ | | <- base + sm_sp - sm_args
+ |-------------|
+ | ret1 |
+ . ret vals . <- base + sm_sp (<--- Sp points here)
+ |-------------|
+
+Why do we include the final return address (ret0) in our stack map? I
+have absolutely no idea, but it seems to be done that way consistently
+in the rest of the code generator, so I played along here. --SDM
+
+Note that we will be constructing an info table for the continuation
+(ret1), which needs to describe the stack down to, but not including,
+the update frame (or ret0, if there is no update frame).
+-}
+
+data StackMap = StackMap
+ { sm_sp :: StackLoc
+ -- ^ the offset of Sp relative to the base on entry
+ -- to this block.
+ , sm_args :: ByteOff
+ -- ^ the number of bytes of arguments in the area for this block
+ -- Defn: the offset of young(L) relative to the base is given by
+ -- (sm_sp - sm_args) of the StackMap for block L.
+ , sm_ret_off :: ByteOff
+ -- ^ Number of words of stack that we do not describe with an info
+ -- table, because it contains an update frame.
+ , sm_regs :: UniqFM (LocalReg,StackLoc)
+ -- ^ regs on the stack
+ }
+
+instance Outputable StackMap where
+ ppr StackMap{..} =
+ text "Sp = " <> int sm_sp $$
+ text "sm_args = " <> int sm_args $$
+ text "sm_ret_off = " <> int sm_ret_off $$
+ text "sm_regs = " <> ppr (eltsUFM sm_regs)
+
+
+cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph
+ -> UniqSM (CmmGraph, BlockEnv StackMap)
+cmmLayoutStack procpoints entry_args
+ graph0@(CmmGraph { g_entry = entry })
+ = do
+ pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
+ (graph, liveness) <- removeDeadAssignments graph0
+ pprTrace "liveness" (ppr liveness) $ return ()
+ let blocks = postorderDfs graph
+
+ (final_stackmaps, final_high_sp, new_blocks) <-
+ mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
+ layout procpoints liveness entry entry_args
+ rec_stackmaps rec_high_sp blocks
+
+ new_blocks' <- mapM lowerSafeForeignCall new_blocks
+
+ pprTrace ("Sp HWM") (ppr final_high_sp) $
+ return (ofBlockList entry new_blocks', final_stackmaps)
+
+
+
+layout :: BlockSet -- proc points
+ -> BlockEnv CmmLive -- liveness
+ -> BlockId -- entry
+ -> ByteOff -- stack args on entry
+
+ -> BlockEnv StackMap -- [final] stack maps
+ -> ByteOff -- [final] Sp high water mark
+
+ -> [CmmBlock] -- [in] blocks
+
+ -> UniqSM
+ ( BlockEnv StackMap -- [out] stack maps
+ , ByteOff -- [out] Sp high water mark
+ , [CmmBlock] -- [out] new blocks
+ )
+
+layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
+ = go blocks init_stackmap entry_args []
+ where
+ (updfr, cont_info) = collectContInfo blocks
+
+ init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
+ , sm_args = entry_args
+ , sm_ret_off = updfr
+ , sm_regs = emptyUFM
+ }
+
+ go [] acc_stackmaps acc_hwm acc_blocks
+ = return (acc_stackmaps, acc_hwm, acc_blocks)
+
+ go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
+ = do
+ let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
+
+ let stack0@StackMap { sm_sp = sp0 }
+ = mapFindWithDefault
+ (pprPanic "no stack map for" (ppr entry_lbl))
+ entry_lbl acc_stackmaps
+
+ pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
+
+ -- (a) Update the stack map to include the effects of
+ -- assignments in this block
+ let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
+
+ -- (b) Insert assignments to reload all the live variables if this
+ -- block is a proc point
+ let middle1 = if entry_lbl `setMember` procpoints
+ then foldr blockCons middle0 (insertReloads stack0)
+ else middle0
+
+ -- (c) Look at the last node and if we are making a call or
+ -- jumping to a proc point, we must save the live
+ -- variables, adjust Sp, and construct the StackMaps for
+ -- each of the successor blocks. See handleLastNode for
+ -- details.
+ (middle2, sp_off, last1, fixup_blocks, out)
+ <- handleLastNode procpoints liveness cont_info
+ acc_stackmaps stack1 middle0 last0
+
+ pprTrace "layout(out)" (ppr out) $ return ()
+
+ -- (d) Manifest Sp: run over the nodes in the block and replace
+ -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
+ --
+ -- our block:
+ -- middle1 -- the original middle nodes
+ -- middle2 -- live variable saves from handleLastNode
+ -- Sp = Sp + sp_off -- Sp adjustment goes here
+ -- last1 -- the last node
+ --
+ let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
+
+ sp_high = final_hwm - entry_args
+ -- The stack check value is adjusted by the Sp offset on
+ -- entry to the proc, which is entry_args. We are
+ -- assuming that we only do a stack check at the
+ -- beginning of a proc, and we don't modify Sp before the
+ -- check.
+
+ final_blocks = manifestSp final_stackmaps stack0 sp0 sp_high entry0
+ middle_pre sp_off last1 fixup_blocks
+
+ acc_stackmaps' = mapUnion acc_stackmaps out
+
+ hwm' = maximum (acc_hwm : (sp0 - sp_off) : map sm_sp (mapElems out))
+
+ go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
+
+
+-- -----------------------------------------------------------------------------
+
+-- This doesn't seem right somehow. We need to find out whether this
+-- proc will push some update frame material at some point, so that we
+-- can avoid using that area of the stack for spilling. The
+-- updfr_space field of the CmmProc *should* tell us, but it doesn't
+-- (I think maybe it gets filled in later when we do proc-point
+-- splitting).
+--
+-- So we'll just take the max of all the cml_ret_offs. This could be
+-- unnecessarily pessimistic, but probably not in the code we
+-- generate.
+
+collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
+collectContInfo blocks
+ = (maximum ret_offs, mapFromList (catMaybes mb_argss))
+ where
+ (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
+
+ get_cont b =
+ case lastNode b of
+ CmmCall { cml_cont = Just l, .. }
+ -> (Just (l, cml_ret_args), cml_ret_off)
+ CmmForeignCall { .. }
+ -> (Just (succ, 0), updfr) -- ??
+ _other -> (Nothing, 0)
+
+
+-- -----------------------------------------------------------------------------
+-- Updating the StackMap from middle nodes
+
+-- Look for loads from stack slots, and update the StackMap. This is
+-- purely for optimisation reasons, so that we can avoid saving a
+-- variable back to a different stack slot if it is already on the
+-- stack.
+--
+-- This happens a lot: for example when function arguments are passed
+-- on the stack and need to be immediately saved across a call, we
+-- want to just leave them where they are on the stack.
+--
+procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
+procMiddle stackmaps node sm
+ = case node of
+ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
+ -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
+ where loc = getStackLoc area off stackmaps
+ CmmAssign (CmmLocal r) _other
+ -> sm { sm_regs = delFromUFM (sm_regs sm) r }
+ _other
+ -> sm
+
+getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
+getStackLoc Old n _ = n
+getStackLoc (Young l) n stackmaps =
+ case mapLookup l stackmaps of
+ Nothing -> pprPanic "getStackLoc" (ppr l)
+ Just sm -> sm_sp sm - sm_args sm + n
+
+
+-- -----------------------------------------------------------------------------
+-- Handling stack allocation for a last node
+
+-- We take a single last node and turn it into:
+--
+-- C1 (some statements)
+-- Sp = Sp + N
+-- C2 (some more statements)
+-- call f() -- the actual last node
+--
+-- plus possibly some more blocks (we may have to add some fixup code
+-- between the last node and the continuation).
+--
+-- C1: is the code for saving the variables across this last node onto
+-- the stack, if the continuation is a call or jumps to a proc point.
+--
+-- C2: if the last node is a safe foreign call, we have to inject some
+-- extra code that goes *after* the Sp adjustment.
+
+handleLastNode
+ :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
+ -> BlockEnv StackMap -> StackMap
+ -> Block CmmNode O O
+ -> CmmNode O C
+ -> UniqSM
+ ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
+ , ByteOff -- amount to adjust Sp
+ , CmmNode O C -- new last node
+ , [CmmBlock] -- new blocks
+ , BlockEnv StackMap -- stackmaps for the continuations
+ )
+
+handleLastNode procpoints liveness cont_info stackmaps
+ stack0@StackMap { sm_sp = sp0 } middle last
+ = case last of
+ -- At each return / tail call,
+ -- adjust Sp to point to the last argument pushed, which
+ -- is cml_args, after popping any other junk from the stack.
+ CmmCall{ cml_cont = Nothing, .. } -> do
+ let sp_off = sp0 - cml_args
+ return ([], sp_off, last, [], mapEmpty)
+
+ -- At each CmmCall with a continuation:
+ CmmCall{ cml_cont = Just cont_lbl, .. } ->
+ return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+
+ CmmForeignCall{ succ = cont_lbl, .. } -> do
+ return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0)
+ -- one word each for args and results: the return address
+
+ CmmBranch{..} -> handleProcPoints
+ CmmCondBranch{..} -> handleProcPoints
+ CmmSwitch{..} -> handleProcPoints
+
+ where
+ -- Calls and ForeignCalls are handled the same way:
+ lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
+ -> ( [CmmNode O O]
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock]
+ , BlockEnv StackMap
+ )
+ lastCall lbl cml_args cml_ret_args cml_ret_off
+ = ( assignments
+ , spOffsetForCall sp0 cont_stack cml_args
+ , last
+ , [] -- no new blocks
+ , mapSingleton lbl cont_stack )
+ where
+ (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
+
+
+ prepareStack lbl cml_ret_args cml_ret_off
+ | Just cont_stack <- mapLookup lbl stackmaps
+ -- If we have already seen this continuation before, then
+ -- we just have to make the stack look the same:
+ = (fixupStack stack0 cont_stack, cont_stack)
+ -- Otherwise, we have to allocate the stack frame
+ | otherwise
+ = (save_assignments, new_cont_stack)
+ where
+ (new_cont_stack, save_assignments)
+ = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0
+
+
+ -- For other last nodes (branches), if any of the targets is a
+ -- proc point, we have to set up the stack to match what the proc
+ -- point is expecting.
+ --
+ handleProcPoints :: UniqSM ( [CmmNode O O]
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock]
+ , BlockEnv StackMap )
+
+ handleProcPoints
+ -- Note [diamond proc point]
+ | Just l <- futureContinuation middle
+ , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
+ = do
+ let cont_args = mapFindWithDefault 0 l cont_info
+ (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
+ out = mapFromList [ (l', cont_stack)
+ | l' <- successors last ]
+ return ( assigs
+ , spOffsetForCall sp0 cont_stack wORD_SIZE
+ , last
+ , []
+ , out)
+
+ | otherwise = do
+ pps <- mapM handleProcPoint (successors last)
+ let lbl_map :: LabelMap Label
+ lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
+ fix_lbl l = mapLookup l lbl_map `orElse` l
+ return ( []
+ , 0
+ , mapSuccessors fix_lbl last
+ , concat [ blk | (_,_,_,blk) <- pps ]
+ , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
+
+ -- For each proc point that is a successor of this block
+ -- (a) if the proc point already has a stackmap, we need to
+ -- shuffle the current stack to make it look the same.
+ -- We have to insert a new block to make this happen.
+ -- (b) otherwise, call "allocate live stack0" to make the
+ -- stack map for the proc point
+ handleProcPoint :: BlockId
+ -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
+ handleProcPoint l
+ | not (l `setMember` procpoints) = return (l, l, stack0, [])
+ | otherwise = do
+ tmp_lbl <- liftM mkBlockId $ getUniqueM
+ let
+ (stack2, assigs) =
+ case mapLookup l stackmaps of
+ Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm)
+ Nothing ->
+ pprTrace "first visit to proc point"
+ (ppr l <+> ppr stack1) $
+ (stack1, assigs)
+ where
+ cont_args = mapFindWithDefault 0 l cont_info
+ (stack1, assigs) =
+ setupStackFrame l liveness (sm_ret_off stack0)
+ cont_args stack0
+
+ sp_off = sp0 - sm_sp stack2
+
+ block = blockJoin (CmmEntry tmp_lbl)
+ (maybeAddSpAdj sp_off (blockFromList assigs))
+ (CmmBranch l)
+ --
+ return (l, tmp_lbl, stack2, [block])
+
+
+
+-- Sp is currently pointing to current_sp,
+-- we want it to point to
+-- (sm_sp cont_stack - sm_args cont_stack + args)
+-- so the difference is
+-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
+spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
+spOffsetForCall current_sp cont_stack args
+ = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
+
+
+-- | create a sequence of assignments to establish the new StackMap,
+-- given the old StackMap.
+fixupStack :: StackMap -> StackMap -> [CmmNode O O]
+fixupStack old_stack new_stack = concatMap move new_locs
+ where
+ old_map :: Map LocalReg ByteOff
+ old_map = Map.fromList (stackSlotRegs old_stack)
+ new_locs = stackSlotRegs new_stack
+
+ move (r,n)
+ | Just m <- Map.lookup r old_map, n == m = []
+ | otherwise = [CmmStore (CmmStackSlot Old n)
+ (CmmReg (CmmLocal r))]
+
+
+
+setupStackFrame
+ :: BlockId -- label of continuation
+ -> BlockEnv CmmLive -- liveness
+ -> ByteOff -- updfr
+ -> ByteOff -- bytes of return values on stack
+ -> StackMap -- current StackMap
+ -> (StackMap, [CmmNode O O])
+
+setupStackFrame lbl liveness updfr_off ret_args stack0
+ = (cont_stack, assignments)
+ where
+ -- get the set of LocalRegs live in the continuation
+ live = mapFindWithDefault Set.empty lbl liveness
+
+ -- the stack from the base to updfr_off is off-limits.
+ -- our new stack frame contains:
+ -- * saved live variables
+ -- * the return address [young(C) + 8]
+ -- * the args for the call,
+ -- which are replaced by the return values at the return
+ -- point.
+
+ -- everything up to updfr_off is off-limits
+ -- stack1 contains updfr_off, plus everything we need to save
+ (stack1, assignments) = allocate updfr_off live stack0
+
+ -- And the Sp at the continuation is:
+ -- sm_sp stack1 + ret_args
+ cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
+ , sm_args = ret_args
+ , sm_ret_off = updfr_off
+ }
+
+
+-- -----------------------------------------------------------------------------
+-- Note [diamond proc point]
+--
+-- This special case looks for the pattern we get from a typical
+-- tagged case expression:
+--
+-- Sp[young(L1)] = L1
+-- if (R1 & 7) != 0 goto L1 else goto L2
+-- L2:
+-- call [R1] returns to L1
+-- L1: live: {y}
+-- x = R1
+--
+-- If we let the generic case handle this, we get
+--
+-- Sp[-16] = L1
+-- if (R1 & 7) != 0 goto L1a else goto L2
+-- L2:
+-- Sp[-8] = y
+-- Sp = Sp - 16
+-- call [R1] returns to L1
+-- L1a:
+-- Sp[-8] = y
+-- Sp = Sp - 16
+-- goto L1
+-- L1:
+-- x = R1
+--
+-- The code for saving the live vars is duplicated in each branch, and
+-- furthermore there is an extra jump in the fast path (assuming L1 is
+-- a proc point, which it probably is if there is a heap check).
+--
+-- So to fix this we want to set up the stack frame before the
+-- conditional jump. How do we know when to do this, and when it is
+-- safe? The basic idea is, when we see the assignment
+--
+-- Sp[young(L)] = L
+--
+-- we know that
+-- * we are definitely heading for L
+-- * there can be no more reads from another stack area, because young(L)
+-- overlaps with it.
+--
+-- We don't necessarily know that everything live at L is live now
+-- (some might be assigned between here and the jump to L). So we
+-- simplify and only do the optimisation when we see
+--
+-- (1) a block containing an assignment of a return address L
+-- (2) ending in a branch where one (and only) continuation goes to L,
+-- and no other continuations go to proc points.
+--
+-- then we allocate the stack frame for L at the end of the block,
+-- before the branch.
+--
+-- We could generalise (2), but that would make it a bit more
+-- complicated to handle, and this currently catches the common case.
+
+futureContinuation :: Block CmmNode O O -> Maybe BlockId
+futureContinuation middle = foldBlockNodesB f middle Nothing
+ where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
+ f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
+ = Just l
+ f _ r = r
+
+-- -----------------------------------------------------------------------------
+-- Saving live registers
+
+-- | Given a set of live registers and a StackMap, save all the registers
+-- on the stack and return the new StackMap and the assignments to do
+-- the saving.
+--
+allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
+allocate ret_off live stackmap@StackMap{ sm_sp = sp0
+ , sm_regs = regs0 }
+ =
+ pprTrace "allocate" (ppr live $$ ppr stackmap) $
+
+ -- we only have to save regs that are not already in a slot
+ let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
+ regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
+ in
+
+ -- make a map of the stack
+ let stack = reverse $ Array.elems $
+ accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $
+ ret_words ++ live_words
+ where ret_words =
+ [ (x, Occupied)
+ | x <- [ 1 .. toWords ret_off] ]
+ live_words =
+ [ (toWords x, Occupied)
+ | (r,off) <- eltsUFM regs1,
+ let w = localRegBytes r,
+ x <- [ off, off-wORD_SIZE .. off - w + 1] ]
+ in
+
+ -- Pass over the stack: find slots to save all the new live variables,
+ -- choosing the oldest slots first (hence a foldr).
+ let
+ save slot ([], stack, n, assigs, regs) -- no more regs to save
+ = ([], slot:stack, n `plusW` 1, assigs, regs)
+ save slot (to_save, stack, n, assigs, regs)
+ = case slot of
+ Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs)
+ Empty
+ | Just (stack', r, to_save') <-
+ select_save to_save (slot:stack)
+ -> let assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+ n' = n `plusW` 1
+ in
+ (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
+
+ | otherwise
+ -> (to_save, slot:stack, n `plusW` 1, assigs, regs)
+
+ -- we should do better here: right now we'll fit the smallest first,
+ -- but it would make more sense to fit the biggest first.
+ select_save :: [LocalReg] -> [StackSlot]
+ -> Maybe ([StackSlot], LocalReg, [LocalReg])
+ select_save regs stack = go regs []
+ where go [] _no_fit = Nothing
+ go (r:rs) no_fit
+ | Just rest <- dropEmpty words stack
+ = Just (replicate words Occupied ++ rest, r, rs++no_fit)
+ | otherwise
+ = go rs (r:no_fit)
+ where words = localRegWords r
+
+ -- fill in empty slots as much as possible
+ (still_to_save, save_stack, n, save_assigs, save_regs)
+ = foldr save (to_save, [], 0, [], []) stack
+
+ -- push any remaining live vars on the stack
+ (push_sp, push_assigs, push_regs)
+ = foldr push (n, [], []) still_to_save
+ where
+ push r (n, assigs, regs)
+ = (n', assig : assigs, (r,(r,n')) : regs)
+ where
+ n' = n + localRegBytes r
+ assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+
+ trim_sp
+ | not (null push_regs) = push_sp
+ | otherwise
+ = n `plusW` (- length (takeWhile isEmpty save_stack))
+
+ final_regs = regs1 `addListToUFM` push_regs
+ `addListToUFM` save_regs
+
+ in
+ -- XXX should be an assert
+ if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
+
+ if (trim_sp .&. (wORD_SIZE - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
+
+ ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
+ , push_assigs ++ save_assigs )
+
+
+-- -----------------------------------------------------------------------------
+-- Manifesting Sp
+
+-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
+-- block looks like this:
+--
+-- middle_pre -- the middle nodes
+-- Sp = Sp + sp_off -- Sp adjustment goes here
+-- last -- the last node
+--
+-- And we have some extra blocks too (that don't contain Sp adjustments)
+--
+-- The adjustment for middle_pre will be different from that for
+-- middle_post, because the Sp adjustment intervenes.
+--
+manifestSp
+ :: BlockEnv StackMap -- StackMaps for other blocks
+ -> StackMap -- StackMap for this block
+ -> ByteOff -- Sp on entry to the block
+ -> ByteOff -- SpHigh
+ -> CmmNode C O -- first node
+ -> [CmmNode O O] -- middle
+ -> ByteOff -- sp_off
+ -> CmmNode O C -- last node
+ -> [CmmBlock] -- new blocks
+ -> [CmmBlock] -- final blocks with Sp manifest
+
+manifestSp stackmaps stack0 sp0 sp_high
+ first middle_pre sp_off last fixup_blocks
+ = final_block : fixup_blocks'
+ where
+ area_off = getAreaOff stackmaps
+
+ adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
+ adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off)
+ adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
+
+ final_middle = maybeAddSpAdj sp_off $
+ blockFromList $
+ map adj_pre_sp $
+ elimStackStores stack0 stackmaps area_off $
+ middle_pre
+
+ final_last = optStackCheck (adj_post_sp last)
+
+ final_block = blockJoin first final_middle final_last
+
+ fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
+
+
+getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
+getAreaOff _ Old = 0
+getAreaOff stackmaps (Young l) =
+ case mapLookup l stackmaps of
+ Just sm -> sm_sp sm - sm_args sm
+ Nothing -> pprPanic "getAreaOff" (ppr l)
+
+
+maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O
+maybeAddSpAdj 0 block = block
+maybeAddSpAdj sp_off block
+ = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off)
+
+
+{-
+Sp(L) is the Sp offset on entry to block L relative to the base of the
+OLD area.
+
+SpArgs(L) is the size of the young area for L, i.e. the number of
+arguments.
+
+ - in block L, each reference to [old + N] turns into
+ [Sp + Sp(L) - N]
+
+ - in block L, each reference to [young(L') + N] turns into
+ [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
+
+ - be careful with the last node of each block: Sp has already been adjusted
+ to be Sp + Sp(L) - Sp(L')
+-}
+
+areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
+areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) =
+ cmmOffset (CmmReg spReg) (sp_old - area_off area - n)
+areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm)
+areaToSp _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check]
+ [CmmMachOp (MO_Sub _)
+ [ CmmReg (CmmGlobal Sp)
+ , CmmLit (CmmInt 0 _)],
+ CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth)
+areaToSp _ _ _ other = other
+
+-- -----------------------------------------------------------------------------
+-- Note [null stack check]
+--
+-- If the high-water Sp is zero, then we end up with
+--
+-- if (Sp - 0 < SpLim) then .. else ..
+--
+-- and possibly some dead code for the failure case. Optimising this
+-- away depends on knowing that SpLim <= Sp, so it is really the job
+-- of the stack layout algorithm, hence we do it now. This is also
+-- convenient because control-flow optimisation later will drop the
+-- dead code.
+
+optStackCheck :: CmmNode O C -> CmmNode O C
+optStackCheck n = -- Note [null stack check]
+ case n of
+ CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
+ other -> other
+
+
+-- -----------------------------------------------------------------------------
+
+-- | Eliminate stores of the form
+--
+-- Sp[area+n] = r
+--
+-- when we know that r is already in the same slot as Sp[area+n]. We
+-- could do this in a later optimisation pass, but that would involve
+-- a separate analysis and we already have the information to hand
+-- here. It helps clean up some extra stack stores in common cases.
+--
+-- Note that we may have to modify the StackMap as we walk through the
+-- code using procMiddle, since an assignment to a variable in the
+-- StackMap will invalidate its mapping there.
+--
+elimStackStores :: StackMap
+ -> BlockEnv StackMap
+ -> (Area -> ByteOff)
+ -> [CmmNode O O]
+ -> [CmmNode O O]
+elimStackStores stackmap stackmaps area_off nodes
+ = go stackmap nodes
+ where
+ go _stackmap [] = []
+ go stackmap (n:ns)
+ = case n of
+ CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
+ | Just (_,off) <- lookupUFM (sm_regs stackmap) r
+ , area_off area + m == off
+ -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns
+ _otherwise
+ -> n : go (procMiddle stackmaps n stackmap) ns
+
+
+-- -----------------------------------------------------------------------------
+-- Update info tables to include stack liveness
+
+
+setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl
+setInfoTableStackMap stackmaps
+ (CmmProc top_info@TopInfo{..} l g@CmmGraph{g_entry = eid})
+ = CmmProc top_info{ info_tbl = fix_info info_tbl } l g
+ where
+ fix_info info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
+ info_tbl { cit_rep = StackRep (get_liveness eid) }
+ fix_info other = other
+
+ get_liveness :: BlockId -> Liveness
+ get_liveness lbl
+ = case mapLookup lbl stackmaps of
+ Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl)
+ Just sm -> stackMapToLiveness sm
+
+setInfoTableStackMap _ d = d
+
+
+stackMapToLiveness :: StackMap -> Liveness
+stackMapToLiveness StackMap{..} =
+ reverse $ Array.elems $
+ accumArray (\_ x -> x) True (toWords sm_ret_off + 1,
+ toWords (sm_sp - sm_args)) live_words
+ where
+ live_words = [ (toWords off, False)
+ | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
+
+
+-- -----------------------------------------------------------------------------
+-- Lowering safe foreign calls
+
+{-
+Note [lower safe foreign calls]
+
+We start with
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | r1 = foo(x,y,z) returns to L1
+ '-----------------------
+ L1:
+ R1 = r1 -- copyIn, inserted by mkSafeCall
+ ...
+
+the stack layout algorithm will arrange to save and reload everything
+live across the call. Our job now is to expand the call so we get
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | SAVE_THREAD_STATE()
+ | token = suspendThread(BaseReg, interruptible)
+ | r = foo(x,y,z)
+ | BaseReg = resumeThread(token)
+ | LOAD_THREAD_STATE()
+ | R1 = r -- copyOut
+ | jump L1
+ '-----------------------
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ ...
+
+Note the copyOut, which saves the results in the places that L1 is
+expecting them (see Note {safe foreign call convention]).
+-}
+
+lowerSafeForeignCall :: CmmBlock -> UniqSM CmmBlock
+lowerSafeForeignCall block
+ | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
+ = do
+ -- Both 'id' and 'new_base' are KindNonPtr because they're
+ -- RTS-only objects and are not subject to garbage collection
+ id <- newTemp bWord
+ new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
+ let (caller_save, caller_load) = callerSaveVolatileRegs
+ load_tso <- newTemp gcWord
+ load_stack <- newTemp gcWord
+ let suspend = saveThreadState <*>
+ caller_save <*>
+ mkMiddle (callSuspendThread id intrbl)
+ midCall = mkUnsafeCall tgt res args
+ resume = mkMiddle (callResumeThread new_base id) <*>
+ -- Assign the result to BaseReg: we
+ -- might now have a different Capability!
+ mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
+ caller_load <*>
+ loadThreadState load_tso load_stack
+ -- Note: The successor must be a procpoint, and we have already split,
+ -- so we use a jump, not a branch.
+ succLbl = CmmLit (CmmLabel (infoTblLbl succ))
+
+ (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ)
+ (map (CmmReg . CmmLocal) res)
+ updfr (0, [])
+
+ jump = CmmCall { cml_target = succLbl
+ , cml_cont = Just succ
+ , cml_args = widthInBytes wordWidth
+ , cml_ret_args = ret_args
+ , cml_ret_off = updfr }
+
+ graph' <- lgraphOfAGraph $ suspend <*>
+ midCall <*>
+ resume <*>
+ copyout <*>
+ mkLast jump
+
+ case toBlockList graph' of
+ [one] -> let (_, middle', last) = blockSplit one
+ in return (blockJoin entry (middle `blockAppend` middle') last)
+ _ -> panic "lowerSafeForeignCall0"
+
+ -- Block doesn't end in a safe foreign call:
+ | otherwise = return block
+
+
+foreignLbl :: FastString -> CmmExpr
+foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
+
+newTemp :: CmmType -> UniqSM LocalReg
+newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
+
+callSuspendThread :: LocalReg -> Bool -> CmmNode O O
+callSuspendThread id intrbl =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "suspendThread"))
+ (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
+ [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]
+
+callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
+callResumeThread new_base id =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "resumeThread"))
+ (ForeignConvention CCallConv [AddrHint] [AddrHint]))
+ [new_base] [CmmReg (CmmLocal id)]
+
+-- -----------------------------------------------------------------------------
+
+plusW :: ByteOff -> WordOff -> ByteOff
+plusW b w = b + w * wORD_SIZE
+
+dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
+dropEmpty 0 ss = Just ss
+dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
+dropEmpty _ _ = Nothing
+
+isEmpty :: StackSlot -> Bool
+isEmpty Empty = True
+isEmpty _ = False
+
+localRegBytes :: LocalReg -> ByteOff
+localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r)))
+
+localRegWords :: LocalReg -> WordOff
+localRegWords = toWords . localRegBytes
+
+toWords :: ByteOff -> WordOff
+toWords x = x `quot` wORD_SIZE
+
+
+insertReloads :: StackMap -> [CmmNode O O]
+insertReloads stackmap =
+ [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
+ (localRegType r))
+ | (r,sp) <- stackSlotRegs stackmap
+ ]
+
+
+stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
+stackSlotRegs sm = eltsUFM (sm_regs sm)
+
+-- -----------------------------------------------------------------------------
+
+-- If we do this *before* stack layout, we might be able to avoid
+-- saving some things across calls/procpoints.
+--
+-- *but*, that will invalidate the liveness analysis, and we'll have
+-- to re-do it.
+
+cmmSink :: CmmGraph -> UniqSM CmmGraph
+cmmSink graph = do
+ let liveness = cmmLiveness graph
+ return $ cmmSink' liveness graph
+
+cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph
+cmmSink' liveness graph
+ = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph
+ where
+
+ sink :: BlockEnv [(LocalReg, CmmExpr)] -> [CmmBlock] -> [CmmBlock]
+ sink _ [] = []
+ sink sunk (b:bs) =
+ pprTrace "sink" (ppr l) $
+ blockJoin first final_middle last : sink sunk' bs
+ where
+ l = entryLabel b
+ (first, middle, last) = blockSplit b
+ (middle', assigs) = walk (blockToList middle) emptyBlock
+ (mapFindWithDefault [] l sunk)
+
+ (dropped_last, assigs') = partition (`conflictsWithLast` last) assigs
+
+ final_middle = foldl blockSnoc middle' (toNodes dropped_last)
+
+ sunk' = mapUnion sunk $
+ mapFromList [ (l, filt assigs' (getLive l))
+ | l <- successors last ]
+ where
+ getLive l = mapFindWithDefault Set.empty l liveness
+ filt as live = [ (r,e) | (r,e) <- as, r `Set.member` live ]
+
+
+walk :: [CmmNode O O] -> Block CmmNode O O -> [(LocalReg, CmmExpr)]
+ -> (Block CmmNode O O, [(LocalReg, CmmExpr)])
+
+walk [] acc as = (acc, as)
+walk (n:ns) acc as
+ | Just a <- collect_it = walk ns acc (a:as)
+ | otherwise = walk ns (foldr (flip blockSnoc) acc (n:drop_nodes)) as'
+ where
+ collect_it = case n of
+ CmmAssign (CmmLocal r) e@(CmmReg (CmmGlobal _)) -> Just (r,e)
+-- CmmAssign (CmmLocal r) e@(CmmLoad addr _) |
+-- foldRegsUsed (\b r -> False) True addr -> Just (r,e)
+ _ -> Nothing
+
+ drop_nodes = toNodes dropped
+ (dropped, as') = partition should_drop as
+ where should_drop a = a `conflicts` n
+
+toNodes :: [(LocalReg,CmmExpr)] -> [CmmNode O O]
+toNodes as = [ CmmAssign (CmmLocal r) rhs | (r,rhs) <- as ]
+
+-- We only sink "r = G" assignments right now, so conflicts is very simple:
+conflicts :: (LocalReg,CmmExpr) -> CmmNode O O -> Bool
+(_, rhs) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True
+--(r, CmmLoad _ _) `conflicts` CmmStore _ _ = True
+(r, _) `conflicts` node
+ = foldRegsUsed (\b r' -> r == r' || b) False node
+
+conflictsWithLast :: (LocalReg,CmmExpr) -> CmmNode O C -> Bool
+(r, _) `conflictsWithLast` node
+ = foldRegsUsed (\b r' -> r == r' || b) False node
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
index 01ebac6254..2e24dd7f82 100644
--- a/compiler/cmm/CmmLint.hs
+++ b/compiler/cmm/CmmLint.hs
@@ -1,67 +1,70 @@
-----------------------------------------------------------------------------
--
--- (c) The University of Glasgow 2004-2006
+-- (c) The University of Glasgow 2011
--
-- CmmLint: checking the correctness of Cmm statements and expressions
--
-----------------------------------------------------------------------------
-
-{-# OPTIONS -fno-warn-tabs #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and
--- detab the module (please do the detabbing in a separate patch). See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
--- for details
-
+{-# LANGUAGE GADTs #-}
module CmmLint (
- cmmLint, cmmLintTop
+ cmmLint, cmmLintGraph
) where
+import Hoopl
+import Cmm
+import CmmUtils
+import PprCmm ()
import BlockId
-import OldCmm
-import CLabel
+import FastString
import Outputable
-import OldPprCmm()
import Constants
-import FastString
-import Platform
import Data.Maybe
+-- Things to check:
+-- - invariant on CmmBlock in CmmExpr (see comment there)
+-- - check for branches to blocks that don't exist
+-- - check types
+
-- -----------------------------------------------------------------------------
-- Exported entry points:
cmmLint :: (Outputable d, Outputable h)
- => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc
-cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops
+ => GenCmmGroup d h CmmGraph -> Maybe SDoc
+cmmLint tops = runCmmLint (mapM_ lintCmmDecl) tops
-cmmLintTop :: (Outputable d, Outputable h)
- => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc
-cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top
+cmmLintGraph :: CmmGraph -> Maybe SDoc
+cmmLintGraph g = runCmmLint lintCmmGraph g
-runCmmLint :: Outputable a
- => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc
-runCmmLint _ l p =
+runCmmLint :: Outputable a => (a -> CmmLint b) -> a -> Maybe SDoc
+runCmmLint l p =
case unCL (l p) of
- Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"),
- nest 2 err,
- ptext $ sLit ("Program was:"),
- nest 2 (ppr p)])
- Right _ -> Nothing
-
-lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint ()
-lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks))
- = addLintInfo (text "in proc " <> pprCLabel platform lbl) $
- let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks
- in mapM_ (lintCmmBlock platform labels) blocks
-
-lintCmmDecl _ (CmmData {})
+ Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"),
+ nest 2 err,
+ ptext $ sLit ("Program was:"),
+ nest 2 (ppr p)])
+ Right _ -> Nothing
+
+lintCmmDecl :: GenCmmDecl h i CmmGraph -> CmmLint ()
+lintCmmDecl (CmmProc _ lbl g)
+ = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph g
+lintCmmDecl (CmmData {})
= return ()
-lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint ()
-lintCmmBlock platform labels (BasicBlock id stmts)
- = addLintInfo (text "in basic block " <> ppr id) $
- mapM_ (lintCmmStmt platform labels) stmts
+
+lintCmmGraph :: CmmGraph -> CmmLint ()
+lintCmmGraph g = mapM_ (lintCmmBlock labels) blocks
+ where
+ blocks = toBlockList g
+ labels = setFromList (map entryLabel blocks)
+
+
+lintCmmBlock :: BlockSet -> CmmBlock -> CmmLint ()
+lintCmmBlock labels block
+ = addLintInfo (text "in basic block " <> ppr (entryLabel block)) $ do
+ let (_, middle, last) = blockSplit block
+ mapM_ lintCmmMiddle (blockToList middle)
+ lintCmmLast labels last
-- -----------------------------------------------------------------------------
-- lintCmmExpr
@@ -69,24 +72,24 @@ lintCmmBlock platform labels (BasicBlock id stmts)
-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking
-- byte/word mismatches.
-lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType
-lintCmmExpr platform (CmmLoad expr rep) = do
- _ <- lintCmmExpr platform expr
+lintCmmExpr :: CmmExpr -> CmmLint CmmType
+lintCmmExpr (CmmLoad expr rep) = do
+ _ <- lintCmmExpr expr
-- Disabled, if we have the inlining phase before the lint phase,
-- we can have funny offsets due to pointer tagging. -- EZY
-- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
-- cmmCheckWordAddress expr
return rep
-lintCmmExpr platform expr@(CmmMachOp op args) = do
- tys <- mapM (lintCmmExpr platform) args
+lintCmmExpr expr@(CmmMachOp op args) = do
+ tys <- mapM lintCmmExpr args
if map (typeWidth . cmmExprType) args == machOpArgReps op
- then cmmCheckMachOp op args tys
- else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op)
-lintCmmExpr platform (CmmRegOff reg offset)
- = lintCmmExpr platform (CmmMachOp (MO_Add rep)
- [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
+ then cmmCheckMachOp op args tys
+ else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op)
+lintCmmExpr (CmmRegOff reg offset)
+ = lintCmmExpr (CmmMachOp (MO_Add rep)
+ [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
where rep = typeWidth (cmmRegType reg)
-lintCmmExpr _ expr =
+lintCmmExpr expr =
return (cmmExprType expr)
-- Check for some common byte/word mismatches (eg. Sp + 1)
@@ -119,43 +122,61 @@ notNodeReg :: CmmExpr -> Bool
notNodeReg (CmmReg reg) | reg == nodeReg = False
notNodeReg _ = True
-lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint ()
-lintCmmStmt platform labels = lint
- where lint (CmmNop) = return ()
- lint (CmmComment {}) = return ()
- lint stmt@(CmmAssign reg expr) = do
- erep <- lintCmmExpr platform expr
- let reg_ty = cmmRegType reg
+lintCmmMiddle :: CmmNode O O -> CmmLint ()
+lintCmmMiddle node = case node of
+ CmmComment _ -> return ()
+
+ CmmAssign reg expr -> do
+ erep <- lintCmmExpr expr
+ let reg_ty = cmmRegType reg
if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
then return ()
- else cmmLintAssignErr stmt erep reg_ty
- lint (CmmStore l r) = do
- _ <- lintCmmExpr platform l
- _ <- lintCmmExpr platform r
+ else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
+
+ CmmStore l r -> do
+ _ <- lintCmmExpr l
+ _ <- lintCmmExpr r
return ()
- lint (CmmCall target _res args _) =
- do lintTarget platform labels target
- mapM_ (lintCmmExpr platform . hintlessCmm) args
- lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e
- lint (CmmSwitch e branches) = do
+
+ CmmUnsafeForeignCall target _formals actuals -> do
+ lintTarget target
+ mapM_ lintCmmExpr actuals
+
+
+lintCmmLast :: BlockSet -> CmmNode O C -> CmmLint ()
+lintCmmLast labels node = case node of
+ CmmBranch id -> checkTarget id
+
+ CmmCondBranch e t f -> do
+ mapM_ checkTarget [t,f]
+ _ <- lintCmmExpr e
+ checkCond e
+
+ CmmSwitch e branches -> do
mapM_ checkTarget $ catMaybes branches
- erep <- lintCmmExpr platform e
+ erep <- lintCmmExpr e
if (erep `cmmEqType_ignoring_ptrhood` bWord)
then return ()
- else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <>
- text " :: " <> ppr erep)
- lint (CmmJump e _) = lintCmmExpr platform e >> return ()
- lint (CmmReturn) = return ()
- lint (CmmBranch id) = checkTarget id
- checkTarget id = if setMember id labels then return ()
- else cmmLintErr (text "Branch to nonexistent id" <+> ppr id)
-
-lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint ()
-lintTarget platform _ (CmmCallee e _) = do _ <- lintCmmExpr platform e
- return ()
-lintTarget _ _ (CmmPrim _ Nothing) = return ()
-lintTarget platform labels (CmmPrim _ (Just stmts))
- = mapM_ (lintCmmStmt platform labels) stmts
+ else cmmLintErr (text "switch scrutinee is not a word: " <>
+ ppr e <> text " :: " <> ppr erep)
+
+ CmmCall { cml_target = target, cml_cont = cont } -> do
+ _ <- lintCmmExpr target
+ maybe (return ()) checkTarget cont
+
+ CmmForeignCall tgt _ args succ _ _ -> do
+ lintTarget tgt
+ mapM_ lintCmmExpr args
+ checkTarget succ
+ where
+ checkTarget id
+ | setMember id labels = return ()
+ | otherwise = cmmLintErr (text "Branch to nonexistent id" <+> ppr id)
+
+
+lintTarget :: ForeignTarget -> CmmLint ()
+lintTarget (ForeignTarget e _) = lintCmmExpr e >> return ()
+lintTarget (PrimTarget {}) = return ()
checkCond :: CmmExpr -> CmmLint ()
@@ -163,7 +184,7 @@ checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return ()
checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values
checkCond expr
= cmmLintErr (hang (text "expression is not a conditional:") 2
- (ppr expr))
+ (ppr expr))
-- -----------------------------------------------------------------------------
-- CmmLint monad
@@ -173,37 +194,36 @@ checkCond expr
newtype CmmLint a = CmmLint { unCL :: Either SDoc a }
instance Monad CmmLint where
- CmmLint m >>= k = CmmLint $ case m of
- Left e -> Left e
- Right a -> unCL (k a)
+ CmmLint m >>= k = CmmLint $ case m of
+ Left e -> Left e
+ Right a -> unCL (k a)
return a = CmmLint (Right a)
cmmLintErr :: SDoc -> CmmLint a
cmmLintErr msg = CmmLint (Left msg)
addLintInfo :: SDoc -> CmmLint a -> CmmLint a
-addLintInfo info thing = CmmLint $
+addLintInfo info thing = CmmLint $
case unCL thing of
- Left err -> Left (hang info 2 err)
- Right a -> Right a
+ Left err -> Left (hang info 2 err)
+ Right a -> Right a
cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a
cmmLintMachOpErr expr argsRep opExpectsRep
- = cmmLintErr (text "in MachOp application: " $$
- nest 2 (ppr expr) $$
- (text "op is expecting: " <+> ppr opExpectsRep) $$
- (text "arguments provide: " <+> ppr argsRep))
+ = cmmLintErr (text "in MachOp application: " $$
+ nest 2 (ppr expr) $$
+ (text "op is expecting: " <+> ppr opExpectsRep) $$
+ (text "arguments provide: " <+> ppr argsRep))
-cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a
+cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a
cmmLintAssignErr stmt e_ty r_ty
- = cmmLintErr (text "in assignment: " $$
- nest 2 (vcat [ppr stmt,
- text "Reg ty:" <+> ppr r_ty,
- text "Rhs ty:" <+> ppr e_ty]))
-
-
+ = cmmLintErr (text "in assignment: " $$
+ nest 2 (vcat [ppr stmt,
+ text "Reg ty:" <+> ppr r_ty,
+ text "Rhs ty:" <+> ppr e_ty]))
+
cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a
cmmLintDubiousWordOffset expr
= cmmLintErr (text "offset is not a multiple of words: " $$
- nest 2 (ppr expr))
+ nest 2 (ppr expr))
diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs
index 9a5bb2d5ae..f0163fefc4 100644
--- a/compiler/cmm/CmmLive.hs
+++ b/compiler/cmm/CmmLive.hs
@@ -11,17 +11,15 @@ module CmmLive
)
where
+import UniqSupply
import BlockId
import Cmm
import CmmUtils
-import Control.Monad
-import OptimizationFuel
import PprCmmExpr ()
-import Compiler.Hoopl
+import Hoopl
import Maybes
import Outputable
-import UniqSet
-----------------------------------------------------------------------------
-- Calculating what variables are live on entry to a basic block
@@ -33,8 +31,10 @@ type CmmLive = RegSet
-- | The dataflow lattice
liveLattice :: DataflowLattice CmmLive
liveLattice = DataflowLattice "live LocalReg's" emptyRegSet add
- where add _ (OldFact old) (NewFact new) = case unionUniqSets old new of
- join -> (changeIf $ sizeUniqSet join > sizeUniqSet old, join)
+ where add _ (OldFact old) (NewFact new) =
+ (changeIf $ sizeRegSet join > sizeRegSet old, join)
+ where !join = plusRegSet old new
+
-- | A mapping from block labels to the variables live on entry
type BlockEntryLiveness = BlockEnv CmmLive
@@ -43,16 +43,17 @@ type BlockEntryLiveness = BlockEnv CmmLive
-- | Calculated liveness info for a CmmGraph
-----------------------------------------------------------------------------
-cmmLiveness :: CmmGraph -> FuelUniqSM BlockEntryLiveness
+cmmLiveness :: CmmGraph -> BlockEntryLiveness
cmmLiveness graph =
- liftM check $ liftM snd $ dataflowPassBwd graph [] $ analBwd liveLattice xferLive
+ check $ dataflowAnalBwd graph [] $ analBwd liveLattice xferLive
where entry = g_entry graph
- check facts = noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts
+ check facts = noLiveOnEntry entry
+ (expectJust "check" $ mapLookup entry facts) facts
-- | On entry to the procedure, there had better not be any LocalReg's live-in.
noLiveOnEntry :: BlockId -> CmmLive -> a -> a
noLiveOnEntry bid in_fact x =
- if isEmptyUniqSet in_fact then x
+ if nullRegSet in_fact then x
else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact)
-- | The transfer equations use the traditional 'gen' and 'kill'
@@ -60,42 +61,42 @@ noLiveOnEntry bid in_fact x =
gen :: UserOfLocalRegs a => a -> RegSet -> RegSet
gen a live = foldRegsUsed extendRegSet live a
kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet
-kill a live = foldRegsDefd delOneFromUniqSet live a
+kill a live = foldRegsDefd deleteFromRegSet live a
-gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) => a -> CmmLive -> CmmLive
+gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a)
+ => a -> CmmLive -> CmmLive
gen_kill a = gen a . kill a
-- | The transfer function
--- EZY: Bits of this analysis are duplicated in CmmSpillReload, though
--- it's not really easy to efficiently reuse all of this. Keep in mind
--- if you need to update this analysis.
xferLive :: BwdTransfer CmmNode CmmLive
xferLive = mkBTransfer3 fst mid lst
where fst _ f = f
mid :: CmmNode O O -> CmmLive -> CmmLive
mid n f = gen_kill n f
lst :: CmmNode O C -> FactBase CmmLive -> CmmLive
- -- slightly inefficient: kill is unnecessary for emptyRegSet
- lst n f = gen_kill n
- $ case n of CmmCall{} -> emptyRegSet
- CmmForeignCall{} -> emptyRegSet
- _ -> joinOutFacts liveLattice n f
+ lst n f = gen_kill n $ joinOutFacts liveLattice n f
-----------------------------------------------------------------------------
-- Removing assignments to dead variables
-----------------------------------------------------------------------------
-removeDeadAssignments :: CmmGraph -> FuelUniqSM CmmGraph
+removeDeadAssignments :: CmmGraph -> UniqSM (CmmGraph, BlockEnv CmmLive)
removeDeadAssignments g =
- liftM fst $ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites
- where rewrites = deepBwdRw3 nothing middle nothing
- -- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
- -- but GHC panics while compiling, see bug #4045.
+ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites
+ where rewrites = mkBRewrite3 nothing middle nothing
+ -- SDM: no need for deepBwdRw here, we only rewrite to empty
+ -- Beware: deepBwdRw with one polymorphic function seems more
+ -- reasonable here, but GHC panics while compiling, see bug
+ -- #4045.
middle :: CmmNode O O -> Fact O CmmLive -> CmmReplGraph O O
- middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` live) = return $ Just emptyGraph
+ middle (CmmAssign (CmmLocal reg') _) live
+ | not (reg' `elemRegSet` live)
+ = return $ Just emptyGraph
-- XXX maybe this should be somewhere else...
- middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs = return $ Just emptyGraph
- middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph
+ middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs
+ = return $ Just emptyGraph
+ middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs
+ = return $ Just emptyGraph
middle _ _ = return Nothing
nothing :: CmmNode e x -> Fact x CmmLive -> CmmReplGraph e x
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
index 4844af9d9a..9e75387436 100644
--- a/compiler/cmm/CmmNode.hs
+++ b/compiler/cmm/CmmNode.hs
@@ -18,7 +18,7 @@ module CmmNode (
CmmNode(..), ForeignHint(..), CmmFormal, CmmActual,
UpdFrameOffset, Convention(..), ForeignConvention(..), ForeignTarget(..),
mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
- mapExpM, mapExpDeepM, wrapRecExpM
+ mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors
) where
import CmmExpr
@@ -35,15 +35,17 @@ import Prelude hiding (succ)
------------------------
-- CmmNode
+#define ULabel {-# UNPACK #-} !Label
+
data CmmNode e x where
- CmmEntry :: Label -> CmmNode C O
+ CmmEntry :: ULabel -> CmmNode C O
CmmComment :: FastString -> CmmNode O O
- CmmAssign :: CmmReg -> CmmExpr -> CmmNode O O
+ CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
-- Assign to register
- CmmStore :: CmmExpr -> CmmExpr -> CmmNode O O
+ CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
-- Assign to memory location. Size is
-- given by cmmExprType of the rhs.
@@ -60,11 +62,12 @@ data CmmNode e x where
-- bug for what can be put in arguments, see
-- Note [Register Parameter Passing]
- CmmBranch :: Label -> CmmNode O C -- Goto another block in the same procedure
+ CmmBranch :: ULabel -> CmmNode O C
+ -- Goto another block in the same procedure
CmmCondBranch :: { -- conditional branch
cml_pred :: CmmExpr,
- cml_true, cml_false :: Label
+ cml_true, cml_false :: ULabel
} -> CmmNode O C
CmmSwitch :: CmmExpr -> [Maybe Label] -> CmmNode O C -- Table branch
@@ -78,6 +81,11 @@ data CmmNode e x where
cml_cont :: Maybe Label,
-- Label of continuation (Nothing for return or tail call)
+ --
+ -- Note [Continuation BlockId]: these BlockIds are called
+ -- Continuation BlockIds, and are the only BlockIds that can
+ -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
+ -- (CmmStackSlot (Young b) _).
-- ToDO: add this:
-- cml_args_regs :: [GlobalReg],
@@ -117,7 +125,7 @@ data CmmNode e x where
tgt :: ForeignTarget, -- call target and convention
res :: [CmmFormal], -- zero or more results
args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing]
- succ :: Label, -- Label of continuation
+ succ :: ULabel, -- Label of continuation
updfr :: UpdFrameOffset, -- where the update frame is (for building infotable)
intrbl:: Bool -- whether or not the call is interruptible
} -> CmmNode O C
@@ -218,14 +226,6 @@ data Convention
| GC -- Entry to the garbage collector: uses the node reg!
| PrimOpCall -- Calling prim ops
| PrimOpReturn -- Returning from prim ops
- | Foreign -- Foreign call/return
- ForeignConvention
- | Private
- -- Used for control transfers within a (pre-CPS) procedure All
- -- jump sites known, never pushed on the stack (hence no SRT)
- -- You can choose whatever calling convention you please
- -- (provided you make sure all the call sites agree)!
- -- This data type eventually to be extended to record the convention.
deriving( Eq )
data ForeignConvention
@@ -283,37 +283,6 @@ instance DefinerOfLocalRegs (CmmNode e x) where
fold f z n = foldRegsDefd f z n
-instance UserOfSlots (CmmNode e x) where
- foldSlotsUsed f z n = case n of
- CmmAssign _ expr -> fold f z expr
- CmmStore addr rval -> fold f (fold f z addr) rval
- CmmUnsafeForeignCall _ _ args -> fold f z args
- CmmCondBranch expr _ _ -> fold f z expr
- CmmSwitch expr _ -> fold f z expr
- CmmCall {cml_target=tgt} -> fold f z tgt
- CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
- _ -> z
- where fold :: forall a b.
- UserOfSlots a =>
- (b -> SubArea -> b) -> b -> a -> b
- fold f z n = foldSlotsUsed f z n
-
-instance UserOfSlots ForeignTarget where
- foldSlotsUsed f z (ForeignTarget e _) = foldSlotsUsed f z e
- foldSlotsUsed _f z (PrimTarget _) = z
-
-instance DefinerOfSlots (CmmNode e x) where
- foldSlotsDefd f z n = case n of
- CmmStore (CmmStackSlot a i) expr -> f z (a, i, widthInBytes $ typeWidth $ cmmExprType expr)
- CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res
- _ -> z
- where
- fold :: forall a b.
- DefinerOfSlots a =>
- (b -> SubArea -> b) -> b -> a -> b
- fold f z n = foldSlotsDefd f z n
- foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w)
-
-----------------------------------
-- mapping Expr in CmmNode
@@ -416,4 +385,20 @@ foldExp f (CmmCall {cml_target=tgt}) z = f tgt z
foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
-foldExpDeep f = foldExp $ wrapRecExpf f
+foldExpDeep f = foldExp go
+ where -- go :: CmmExpr -> z -> z
+ go e@(CmmMachOp _ es) z = gos es $! f e z
+ go e@(CmmLoad addr _) z = go addr $! f e z
+ go e z = f e z
+
+ gos [] z = z
+ gos (e:es) z = gos es $! f e z
+
+-- -----------------------------------------------------------------------------
+
+mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
+mapSuccessors f (CmmBranch bid) = CmmBranch (f bid)
+mapSuccessors f (CmmCondBranch p y n) = CmmCondBranch p (f y) (f n)
+mapSuccessors f (CmmSwitch e arms) = CmmSwitch e (map (fmap f) arms)
+mapSuccessors _ n = n
+
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index 8cc18fc1ca..8ff04cfa7b 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -145,8 +145,7 @@ To inline _smi:
-}
countUses :: UserOfLocalRegs a => a -> UniqFM Int
-countUses a = foldRegsUsed (\m r -> addToUFM m r (count m r + 1)) emptyUFM a
- where count m r = lookupWithDefaultUFM m (0::Int) r
+countUses a = foldRegsUsed (\m r -> addToUFM_C (+) m r 1) emptyUFM a
cmmMiniInline :: DynFlags -> [CmmBasicBlock] -> [CmmBasicBlock]
cmmMiniInline dflags blocks = map do_inline blocks
@@ -157,25 +156,16 @@ cmmMiniInlineStmts :: DynFlags -> UniqFM Int -> [CmmStmt] -> [CmmStmt]
cmmMiniInlineStmts _ _ [] = []
cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
-- not used: just discard this assignment
- | Nothing <- lookupUFM uses u
+ | 0 <- lookupWithDefaultUFM uses 0 u
= cmmMiniInlineStmts dflags uses stmts
- -- used (literal): try to inline at all the use sites
- | Just n <- lookupUFM uses u, isLit expr
- =
- ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $
- case lookForInlineLit u expr stmts of
- (m, stmts')
- | n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts'
- | otherwise ->
- stmt : cmmMiniInlineStmts dflags (adjustUFM (\x -> x - m) uses u) stmts'
-
- -- used (foldable to literal): try to inline at all the use sites
+ -- used (foldable to small thing): try to inline at all the use sites
| Just n <- lookupUFM uses u,
- e@(CmmLit _) <- wrapRecExp foldExp expr
+ e <- wrapRecExp foldExp expr,
+ isTiny e
=
ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $
- case lookForInlineLit u e stmts of
+ case lookForInlineMany u e stmts of
(m, stmts')
| n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts'
| otherwise ->
@@ -188,6 +178,11 @@ cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr)
ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $
cmmMiniInlineStmts dflags uses stmts'
where
+ isTiny (CmmLit _) = True
+ isTiny (CmmReg (CmmGlobal _)) = True
+ -- not CmmLocal: that might invalidate the usage analysis results
+ isTiny _ = False
+
platform = targetPlatform dflags
foldExp (CmmMachOp op args) = cmmMachOpFold platform op args
foldExp e = e
@@ -201,26 +196,28 @@ cmmMiniInlineStmts platform uses (stmt:stmts)
-- register, and a list of statements. Inlines the expression at all
-- use sites of the register. Returns the number of substituations
-- made and the, possibly modified, list of statements.
-lookForInlineLit :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt])
-lookForInlineLit _ _ [] = (0, [])
-lookForInlineLit u expr stmts@(stmt : rest)
- | Just n <- lookupUFM (countUses stmt) u
- = case lookForInlineLit u expr rest of
- (m, stmts) -> let z = n + m
- in z `seq` (z, inlineStmt u expr stmt : stmts)
-
- | ok_to_skip
- = case lookForInlineLit u expr rest of
+lookForInlineMany :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt])
+lookForInlineMany u expr stmts = lookForInlineMany' u expr regset stmts
+ where regset = foldRegsUsed extendRegSet emptyRegSet expr
+
+lookForInlineMany' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> (Int, [CmmStmt])
+lookForInlineMany' _ _ _ [] = (0, [])
+lookForInlineMany' u expr regset stmts@(stmt : rest)
+ | Just n <- lookupUFM (countUses stmt) u, okToInline expr stmt
+ = let stmt' = inlineStmt u expr stmt in
+ if okToSkip stmt' u expr regset
+ then case lookForInlineMany' u expr regset rest of
+ (m, stmts) -> let z = n + m
+ in z `seq` (z, stmt' : stmts)
+ else (n, stmt' : rest)
+
+ | okToSkip stmt u expr regset
+ = case lookForInlineMany' u expr regset rest of
(n, stmts) -> (n, stmt : stmts)
| otherwise
= (0, stmts)
- where
- -- We skip over assignments to registers, unless the register
- -- being assigned to is the one we're inlining.
- ok_to_skip = case stmt of
- CmmAssign (CmmLocal (LocalReg u' _)) _ | u' == u -> False
- _other -> True
+
lookForInline :: Unique -> CmmExpr -> [CmmStmt] -> Maybe [CmmStmt]
lookForInline u expr stmts = lookForInline' u expr regset stmts
@@ -229,10 +226,10 @@ lookForInline u expr stmts = lookForInline' u expr regset stmts
lookForInline' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> Maybe [CmmStmt]
lookForInline' _ _ _ [] = panic "lookForInline' []"
lookForInline' u expr regset (stmt : rest)
- | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline
+ | Just 1 <- lookupUFM (countUses stmt) u, okToInline expr stmt
= Just (inlineStmt u expr stmt : rest)
- | ok_to_skip
+ | okToSkip stmt u expr regset
= case lookForInline' u expr regset rest of
Nothing -> Nothing
Just stmts -> Just (stmt:stmts)
@@ -240,31 +237,37 @@ lookForInline' u expr regset (stmt : rest)
| otherwise
= Nothing
- where
- -- we don't inline into CmmCall if the expression refers to global
- -- registers. This is a HACK to avoid global registers clashing with
- -- C argument-passing registers, really the back-end ought to be able
- -- to handle it properly, but currently neither PprC nor the NCG can
- -- do it. See also CgForeignCall:load_args_into_temps.
- ok_to_inline = case stmt of
- CmmCall{} -> hasNoGlobalRegs expr
- _ -> True
-
- -- Expressions aren't side-effecting. Temporaries may or may not
- -- be single-assignment depending on the source (the old code
- -- generator creates single-assignment code, but hand-written Cmm
- -- and Cmm from the new code generator is not single-assignment.)
- -- So we do an extra check to make sure that the register being
- -- changed is not one we were relying on. I don't know how much of a
- -- performance hit this is (we have to create a regset for every
- -- instruction.) -- EZY
- ok_to_skip = case stmt of
- CmmNop -> True
- CmmComment{} -> True
- CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True
- CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr)
- _other -> False
+-- we don't inline into CmmCall if the expression refers to global
+-- registers. This is a HACK to avoid global registers clashing with
+-- C argument-passing registers, really the back-end ought to be able
+-- to handle it properly, but currently neither PprC nor the NCG can
+-- do it. See also CgForeignCall:load_args_into_temps.
+okToInline :: CmmExpr -> CmmStmt -> Bool
+okToInline expr CmmCall{} = hasNoGlobalRegs expr
+okToInline _ _ = True
+
+-- Expressions aren't side-effecting. Temporaries may or may not
+-- be single-assignment depending on the source (the old code
+-- generator creates single-assignment code, but hand-written Cmm
+-- and Cmm from the new code generator is not single-assignment.)
+-- So we do an extra check to make sure that the register being
+-- changed is not one we were relying on. I don't know how much of a
+-- performance hit this is (we have to create a regset for every
+-- instruction.) -- EZY
+okToSkip :: CmmStmt -> Unique -> CmmExpr -> RegSet -> Bool
+okToSkip stmt u expr regset
+ = case stmt of
+ CmmNop -> True
+ CmmComment{} -> True
+ CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True
+ CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr)
+ CmmStore _ _ -> not_a_load expr
+ _other -> False
+ where
+ not_a_load (CmmMachOp _ args) = all not_a_load args
+ not_a_load (CmmLoad _ _) = False
+ not_a_load _ = True
inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index 075ed22ea9..f46d49e022 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -230,35 +230,31 @@ lits :: { [ExtFCode CmmExpr] }
cmmproc :: { ExtCode }
-- TODO: add real SRT/info tables to parsed Cmm
- : info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}'
- { do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <-
+ : info maybe_formals_without_hints '{' body '}'
+ { do ((entry_ret_label, info, live, formals), stmts) <-
getCgStmtsEC' $ loopDecls $ do {
(entry_ret_label, info, live) <- $1;
formals <- sequence $2;
- gc_block <- $3;
- frame <- $4;
- $6;
- return (entry_ret_label, info, live, formals, gc_block, frame) }
+ $4;
+ return (entry_ret_label, info, live, formals) }
blks <- code (cgStmtsToBlocks stmts)
- code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) }
+ code (emitInfoTableAndCode entry_ret_label info formals blks) }
| info maybe_formals_without_hints ';'
{ do (entry_ret_label, info, live) <- $1;
formals <- sequence $2;
- code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) }
+ code (emitInfoTableAndCode entry_ret_label info formals []) }
- | NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}'
+ | NAME maybe_formals_without_hints '{' body '}'
{% withThisPackage $ \pkg ->
do newFunctionName $1 pkg
- ((formals, gc_block, frame), stmts) <-
+ (formals, stmts) <-
getCgStmtsEC' $ loopDecls $ do {
formals <- sequence $2;
- gc_block <- $3;
- frame <- $4;
- $6;
- return (formals, gc_block, frame) }
+ $4;
+ return formals }
blks <- code (cgStmtsToBlocks stmts)
- code (emitProc (CmmInfo gc_block frame CmmNonInfoTable) (mkCmmCodeLabel pkg $1) formals blks) }
+ code (emitProc CmmNonInfoTable (mkCmmCodeLabel pkg $1) formals blks) }
info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) }
: 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
@@ -599,18 +595,7 @@ formals_without_hints :: { [ExtFCode LocalReg] }
formal_without_hint :: { ExtFCode LocalReg }
: type NAME { newLocal $1 $2 }
-maybe_frame :: { ExtFCode (Maybe UpdateFrame) }
- : {- empty -} { return Nothing }
- | 'jump' expr '(' exprs0 ')' { do { target <- $2;
- args <- sequence $4;
- return $ Just (UpdateFrame target args) } }
-
-maybe_gc_block :: { ExtFCode (Maybe BlockId) }
- : {- empty -} { return Nothing }
- | 'goto' NAME
- { do l <- lookupLabel $2; return (Just l) }
-
-type :: { CmmType }
+type :: { CmmType }
: 'bits8' { b8 }
| typenot8 { $1 }
@@ -1073,7 +1058,8 @@ parseCmmFile dflags filename = do
let msg = mkPlainErrMsg dflags span err
return ((emptyBag, unitBag msg), Nothing)
POk pst code -> do
- cmm <- initC dflags no_module (getCmm (unEC code initEnv [] >> return ()))
+ st <- initC
+ let (cmm,_) = runC dflags no_module st (getCmm (unEC code initEnv [] >> return ()))
let ms = getMessages pst
if (errorsFound dflags ms)
then return (ms, Nothing)
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index 7361bbf385..bb8d5b2f22 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -11,25 +11,23 @@ module CmmPipeline (
import CLabel
import Cmm
-import CmmLive
+import CmmLint
import CmmBuildInfoTables
import CmmCommonBlockElim
import CmmProcPoint
-import CmmSpillReload
-import CmmRewriteAssignments
-import CmmStackLayout
import CmmContFlowOpt
-import OptimizationFuel
+import CmmLayoutStack
+import UniqSupply
import DynFlags
import ErrUtils
import HscTypes
import Data.Maybe
import Control.Monad
-import Data.Map (Map)
-import qualified Data.Map as Map
import Outputable
-import StaticFlags
+
+import qualified Data.Set as Set
+import Data.Map (Map)
-----------------------------------------------------------------------------
-- | Top level driver for C-- pipeline
@@ -53,32 +51,28 @@ import StaticFlags
-- we actually need to do the initial pass.
cmmPipeline :: HscEnv -- Compilation env including
-- dynamic flags: -dcmm-lint -ddump-cps-cmm
- -> (TopSRT, [CmmGroup]) -- SRT table and accumulating list of compiled procs
+ -> TopSRT -- SRT table and accumulating list of compiled procs
-> CmmGroup -- Input C-- with Procedures
- -> IO (TopSRT, [CmmGroup]) -- Output CPS transformed C--
-cmmPipeline hsc_env (topSRT, rst) prog =
+ -> IO (TopSRT, CmmGroup) -- Output CPS transformed C--
+cmmPipeline hsc_env topSRT prog =
do let dflags = hsc_dflags hsc_env
--
showPass dflags "CPSZ"
- let tops = runCmmContFlowOpts prog
- (cafEnvs, tops) <- liftM unzip $ mapM (cpsTop hsc_env) tops
+ (cafEnvs, tops) <- {-# SCC "tops" #-} liftM unzip $ mapM (cpsTop hsc_env) prog
-- tops :: [[(CmmDecl,CAFSet]] (one list per group)
- let topCAFEnv = mkTopCAFInfo (concat cafEnvs)
+ let topCAFEnv = {-# SCC "topCAFEnv" #-} mkTopCAFInfo (concat cafEnvs)
-- folding over the groups
- (topSRT, tops) <- foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops
+ (topSRT, tops) <- {-# SCC "toTops" #-} foldM (toTops topCAFEnv) (topSRT, []) tops
let cmms :: CmmGroup
cmms = reverse (concat tops)
dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr cmms)
- -- SRT is not affected by control flow optimization pass
- let prog' = runCmmContFlowOpts cmms
-
- return (topSRT, prog' : rst)
+ return (topSRT, cmms)
{- [Note global fuel]
~~~~~~~~~~~~~~~~~~~~~
@@ -92,105 +86,110 @@ global to one compiler session.
-- -ddump-cmmz
cpsTop :: HscEnv -> CmmDecl -> IO ([(CLabel, CAFSet)], [(CAFSet, CmmDecl)])
-cpsTop _ p@(CmmData {}) = return ([], [(Map.empty, p)])
+cpsTop _ p@(CmmData {}) = return ([], [(Set.empty, p)])
cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) =
do
- -- Why bother doing these early: dualLivenessWithInsertion,
- -- insertLateReloads, rewriteAssignments?
+ ----------- Control-flow optimisations ---------------
+ g <- {-# SCC "cmmCfgOpts(1)" #-} return $ cmmCfgOpts g
+ dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g
----------- Eliminate common blocks -------------------
- g <- return $ elimCommonBlocks g
+ g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g
dump Opt_D_dump_cmmz_cbe "Post common block elimination" g
- -- Any work storing block Labels must be performed _after_ elimCommonBlocks
+ -- Any work storing block Labels must be performed _after_
+ -- elimCommonBlocks
----------- Proc points -------------------
- let callPPs = callProcPoints g
- procPoints <- run $ minimalProcPointSet (targetPlatform dflags) callPPs g
- g <- run $ addProcPointProtocols callPPs procPoints g
- dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g
-
- ----------- Spills and reloads -------------------
- g <- run $ dualLivenessWithInsertion procPoints g
- dump Opt_D_dump_cmmz_spills "Post spills and reloads" g
-
- ----------- Sink and inline assignments -------------------
- g <- runOptimization $ rewriteAssignments platform g
- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g
-
- ----------- Eliminate dead assignments -------------------
- g <- runOptimization $ removeDeadAssignments g
- dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g
-
- ----------- Zero dead stack slots (Debug only) ---------------
- -- Debugging: stubbing slots on death can cause crashes early
- g <- if opt_StubDeadValues
- then run $ stubSlotsOnDeath g
- else return g
- dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g
-
- --------------- Stack layout ----------------
- slotEnv <- run $ liveSlotAnal g
- let spEntryMap = getSpEntryMap entry_off g
- mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return ()
- let areaMap = layout procPoints spEntryMap slotEnv entry_off g
- mbpprTrace "areaMap" (ppr areaMap) $ return ()
-
- ------------ Manifest the stack pointer --------
- g <- run $ manifestSP spEntryMap areaMap entry_off g
- dump Opt_D_dump_cmmz_sp "Post manifestSP" g
- -- UGH... manifestSP can require updates to the procPointMap.
- -- We can probably do something quicker here for the update...
+ let callPPs = {-# SCC "callProcPoints" #-} callProcPoints g
+ procPoints <- {-# SCC "minimalProcPointSet" #-} runUniqSM $
+ minimalProcPointSet (targetPlatform dflags) callPPs g
+
+ ----------- Layout the stack and manifest Sp ---------------
+ -- (also does: removeDeadAssignments, and lowerSafeForeignCalls)
+ (g, stackmaps) <- {-# SCC "layoutStack" #-}
+ runUniqSM $ cmmLayoutStack procPoints entry_off g
+ dump Opt_D_dump_cmmz_sp "Layout Stack" g
+
+-- g <- {-# SCC "sink" #-} runUniqSM $ cmmSink g
+-- dump Opt_D_dump_cmmz_rewrite "Sink assignments" g
+
+-- ----------- Sink and inline assignments -------------------
+-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $
+-- rewriteAssignments platform g
+-- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g
------------- Split into separate procedures ------------
- procPointMap <- run $ procPointAnalysis procPoints g
- dump Opt_D_dump_cmmz_procmap "procpoint map" procPointMap
- gs <- run $ splitAtProcPoints l callPPs procPoints procPointMap
- (CmmProc h l g)
- mapM_ (dump Opt_D_dump_cmmz_split "Post splitting") gs
-
- ------------- More CAFs and foreign calls ------------
- cafEnv <- run $ cafAnal g
- let localCAFs = catMaybes $ map (localCAFInfo cafEnv) gs
+ procPointMap <- {-# SCC "procPointAnalysis" #-} runUniqSM $
+ procPointAnalysis procPoints g
+ dumpWith dflags Opt_D_dump_cmmz_procmap "procpoint map" procPointMap
+ gs <- {-# SCC "splitAtProcPoints" #-} runUniqSM $
+ splitAtProcPoints l callPPs procPoints procPointMap (CmmProc h l g)
+ dumps Opt_D_dump_cmmz_split "Post splitting" gs
+
+ ------------- More CAFs ------------------------------
+ let cafEnv = {-# SCC "cafAnal" #-} cafAnal g
+ let localCAFs = {-# SCC "localCAFs" #-} catMaybes $ map (localCAFInfo cafEnv) gs
mbpprTrace "localCAFs" (ppr localCAFs) $ return ()
- gs <- run $ mapM (lowerSafeForeignCalls areaMap) gs
- mapM_ (dump Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls") gs
-
-- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES
- gs <- return $ map (setInfoTableStackMap slotEnv areaMap) gs
- mapM_ (dump Opt_D_dump_cmmz_info "after setInfoTableStackMap") gs
- gs <- return $ map (bundleCAFs cafEnv) gs
- mapM_ (dump Opt_D_dump_cmmz_cafs "after bundleCAFs") gs
+ gs <- {-# SCC "setInfoTableStackMap" #-}
+ return $ map (setInfoTableStackMap stackmaps) gs
+ dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs
+
+ ----------- Control-flow optimisations ---------------
+ gs <- {-# SCC "cmmCfgOpts(2)" #-} return $ map cmmCfgOptsProc gs
+ dumps Opt_D_dump_cmmz_cfg "Post control-flow optimsations" gs
+
+ gs <- {-# SCC "bundleCAFs" #-} return $ map (bundleCAFs cafEnv) gs
+ dumps Opt_D_dump_cmmz_cafs "after bundleCAFs" gs
+
return (localCAFs, gs)
-- gs :: [ (CAFSet, CmmDecl) ]
-- localCAFs :: [ (CLabel, CAFSet) ] -- statics filtered out(?)
where dflags = hsc_dflags hsc_env
- platform = targetPlatform dflags
- mbpprTrace x y z = if dopt Opt_D_dump_cmmz dflags then pprTrace x y z else z
- dump f = dumpWith ppr f
- dumpWith pprFun f txt g = do
- -- ToDo: No easy way of say "dump all the cmmz, *and* split
- -- them into files." Also, -ddump-cmmz doesn't play nicely
- -- with -ddump-to-file, since the headers get omitted.
- dumpIfSet_dyn dflags f txt (pprFun g)
- when (not (dopt f dflags)) $
- dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (pprFun g)
- -- Runs a required transformation/analysis
- run = runInfiniteFuelIO (hsc_OptFuel hsc_env)
- -- Runs an optional transformation/analysis (and should
- -- thus be subject to optimization fuel)
- runOptimization = runFuelIO (hsc_OptFuel hsc_env)
+ mbpprTrace x y z | dopt Opt_D_dump_cmmz dflags = pprTrace x y z
+ | otherwise = z
+ dump = dumpGraph dflags
+
+ dumps flag name
+ = mapM_ (dumpWith dflags flag name)
+
+runUniqSM :: UniqSM a -> IO a
+runUniqSM m = do
+ us <- mkSplitUniqSupply 'u'
+ return (initUs_ us m)
+
+
+dumpGraph :: DynFlags -> DynFlag -> String -> CmmGraph -> IO ()
+dumpGraph dflags flag name g = do
+ when (dopt Opt_DoCmmLinting dflags) $ do_lint g
+ dumpWith dflags flag name g
+ where
+ do_lint g = case cmmLintGraph g of
+ Just err -> do { fatalErrorMsg dflags err
+ ; ghcExit dflags 1
+ }
+ Nothing -> return ()
+
+dumpWith :: Outputable a => DynFlags -> DynFlag -> String -> a -> IO ()
+dumpWith dflags flag txt g = do
+ -- ToDo: No easy way of say "dump all the cmmz, *and* split
+ -- them into files." Also, -ddump-cmmz doesn't play nicely
+ -- with -ddump-to-file, since the headers get omitted.
+ dumpIfSet_dyn dflags flag txt (ppr g)
+ when (not (dopt flag dflags)) $
+ dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (ppr g)
-- This probably belongs in CmmBuildInfoTables?
-- We're just finishing the job here: once we know what CAFs are defined
-- in non-static closures, we can build the SRTs.
-toTops :: HscEnv -> Map CLabel CAFSet -> (TopSRT, [[CmmDecl]])
- -> [(CAFSet, CmmDecl)] -> IO (TopSRT, [[CmmDecl]])
-toTops hsc_env topCAFEnv (topSRT, tops) gs =
+toTops :: Map CLabel CAFSet -> (TopSRT, [[CmmDecl]])
+ -> [(CAFSet, CmmDecl)] -> IO (TopSRT, [[CmmDecl]])
+toTops topCAFEnv (topSRT, tops) gs =
do let setSRT (topSRT, rst) g =
do (topSRT, gs) <- setInfoTableSRT topCAFEnv topSRT g
return (topSRT, gs : rst)
- (topSRT, gs') <- runFuelIO (hsc_OptFuel hsc_env) $ foldM setSRT (topSRT, []) gs
+ (topSRT, gs') <- runUniqSM $ foldM setSRT (topSRT, []) gs
return (topSRT, concat gs' : tops)
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
index f50d850b3a..6eb92666af 100644
--- a/compiler/cmm/CmmProcPoint.hs
+++ b/compiler/cmm/CmmProcPoint.hs
@@ -4,7 +4,7 @@
module CmmProcPoint
( ProcPointSet, Status(..)
, callProcPoints, minimalProcPointSet
- , addProcPointProtocols, splitAtProcPoints, procPointAnalysis
+ , splitAtProcPoints, procPointAnalysis
)
where
@@ -13,22 +13,17 @@ import Prelude hiding (last, unzip, succ, zip)
import BlockId
import CLabel
import Cmm
+import PprCmm ()
import CmmUtils
-import CmmContFlowOpt
import CmmInfo
-import CmmLive
-import Constants
import Data.List (sortBy)
import Maybes
-import MkGraph
import Control.Monad
-import OptimizationFuel
import Outputable
import Platform
-import UniqSet
import UniqSupply
-import Compiler.Hoopl
+import Hoopl
import qualified Data.Map as Map
@@ -103,34 +98,50 @@ instance Outputable Status where
(hsep $ punctuate comma $ map ppr $ setElems ps)
ppr ProcPoint = text "<procpt>"
-lattice :: DataflowLattice Status
-lattice = DataflowLattice "direct proc-point reachability" unreached add_to
- where unreached = ReachedBy setEmpty
- add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint)
- add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint) -- because of previous case
- add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) =
- let union = setUnion p' p
- in if setSize union > setSize p then (SomeChange, ReachedBy union)
- else (NoChange, ReachedBy p)
--------------------------------------------------
+-- Proc point analysis
+
+procPointAnalysis :: ProcPointSet -> CmmGraph -> UniqSM (BlockEnv Status)
+-- Once you know what the proc-points are, figure out
+-- what proc-points each block is reachable from
+procPointAnalysis procPoints g =
+ -- pprTrace "procPointAnalysis" (ppr procPoints) $
+ dataflowAnalFwdBlocks g initProcPoints $ analFwd lattice forward
+ where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints]
+
-- transfer equations
forward :: FwdTransfer CmmNode Status
-forward = mkFTransfer3 first middle ((mkFactBase lattice . ) . last)
- where first :: CmmNode C O -> Status -> Status
- first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id
- first _ x = x
+forward = mkFTransfer3 first middle last
+ where
+ first :: CmmNode C O -> Status -> Status
+ first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id
+ first _ x = x
- middle _ x = x
+ middle _ x = x
- last :: CmmNode O C -> Status -> [(Label, Status)]
- last (CmmCall {cml_cont = Just k}) _ = [(k, ProcPoint)]
- last (CmmForeignCall {succ = k}) _ = [(k, ProcPoint)]
- last l x = map (\id -> (id, x)) (successors l)
+ last :: CmmNode O C -> Status -> FactBase Status
+ last l x = mkFactBase lattice $ map (\id -> (id, x)) (successors l)
--- It is worth distinguishing two sets of proc points:
--- those that are induced by calls in the original graph
--- and those that are introduced because they're reachable from multiple proc points.
+lattice :: DataflowLattice Status
+lattice = DataflowLattice "direct proc-point reachability" unreached add_to
+ where unreached = ReachedBy setEmpty
+ add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint)
+ add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint)
+ -- because of previous case
+ add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p'))
+ | setSize union > setSize p = (SomeChange, ReachedBy union)
+ | otherwise = (NoChange, ReachedBy p)
+ where
+ union = setUnion p' p
+
+----------------------------------------------------------------------
+
+-- It is worth distinguishing two sets of proc points: those that are
+-- induced by calls in the original graph and those that are
+-- introduced because they're reachable from multiple proc points.
+--
+-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds].
callProcPoints :: CmmGraph -> ProcPointSet
callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g
where add :: CmmBlock -> BlockSet -> BlockSet
@@ -139,21 +150,17 @@ callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g
CmmForeignCall {succ=k} -> setInsert k set
_ -> set
-minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph -> FuelUniqSM ProcPointSet
+minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph
+ -> UniqSM ProcPointSet
-- Given the set of successors of calls (which must be proc-points)
-- figure out the minimal set of necessary proc-points
-minimalProcPointSet platform callProcPoints g = extendPPSet platform g (postorderDfs g) callProcPoints
+minimalProcPointSet platform callProcPoints g
+ = extendPPSet platform g (postorderDfs g) callProcPoints
-procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelUniqSM (BlockEnv Status)
--- Once you know what the proc-points are, figure out
--- what proc-points each block is reachable from
-procPointAnalysis procPoints g =
- liftM snd $ dataflowPassFwd g initProcPoints $ analFwd lattice forward
- where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints]
-
-extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelUniqSM ProcPointSet
+extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet
extendPPSet platform g blocks procPoints =
do env <- procPointAnalysis procPoints g
+ -- pprTrace "extensPPSet" (ppr env) $ return ()
let add block pps = let id = entryLabel block
in case mapLookup id env of
Just ProcPoint -> setInsert id pps
@@ -179,183 +186,13 @@ extendPPSet platform g blocks procPoints =
pps -> extendPPSet g blocks
(foldl extendBlockSet procPoints' pps)
-}
- case newPoint of Just id ->
- if setMember id procPoints' then panic "added old proc pt"
- else extendPPSet platform g blocks (setInsert id procPoints')
- Nothing -> return procPoints'
-
-
-------------------------------------------------------------------------
--- Computing Proc-Point Protocols --
-------------------------------------------------------------------------
-
-{-
-
-There is one major trick, discovered by Michael Adams, which is that
-we want to choose protocols in a way that enables us to optimize away
-some continuations. The optimization is very much like branch-chain
-elimination, except that it involves passing results as well as
-control. The idea is that if a call's continuation k does nothing but
-CopyIn its results and then goto proc point P, the call's continuation
-may be changed to P, *provided* P's protocol is identical to the
-protocol for the CopyIn. We choose protocols to make this so.
-
-Here's an explanatory example; we begin with the source code (lines
-separate basic blocks):
-
- ..1..;
- x, y = g();
- goto P;
- -------
- P: ..2..;
-
-Zipperization converts this code as follows:
-
- ..1..;
- call g() returns to k;
- -------
- k: CopyIn(x, y);
- goto P;
- -------
- P: ..2..;
-
-What we'd like to do is assign P the same CopyIn protocol as k, so we
-can eliminate k:
-
- ..1..;
- call g() returns to P;
- -------
- P: CopyIn(x, y); ..2..;
-
-Of course, P may be the target of more than one continuation, and
-different continuations may have different protocols. Michael Adams
-implemented a voting mechanism, but he thinks a simple greedy
-algorithm would be just as good, so that's what we do.
-
--}
+ case newPoint of
+ Just id ->
+ if setMember id procPoints'
+ then panic "added old proc pt"
+ else extendPPSet platform g blocks (setInsert id procPoints')
+ Nothing -> return procPoints'
-data Protocol = Protocol Convention [CmmFormal] Area
- deriving Eq
-instance Outputable Protocol where
- ppr (Protocol c fs a) = text "Protocol" <+> ppr c <+> ppr fs <+> ppr a
-
--- | Function 'optimize_calls' chooses protocols only for those proc
--- points that are relevant to the optimization explained above.
--- The others are assigned by 'add_unassigned', which is not yet clever.
-
-addProcPointProtocols :: ProcPointSet -> ProcPointSet -> CmmGraph -> FuelUniqSM CmmGraph
-addProcPointProtocols callPPs procPoints g =
- do liveness <- cmmLiveness g
- (protos, g') <- optimize_calls liveness g
- blocks'' <- add_CopyOuts protos procPoints g'
- return $ ofBlockMap (g_entry g) blocks''
- where optimize_calls liveness g = -- see Note [Separate Adams optimization]
- do let (protos, blocks') =
- foldGraphBlocks maybe_add_call (mapEmpty, mapEmpty) g
- protos' = add_unassigned liveness procPoints protos
- let g' = ofBlockMap (g_entry g) (add_CopyIns callPPs protos' blocks')
- return (protos', removeUnreachableBlocks g')
- maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock)
- -> (BlockEnv Protocol, BlockEnv CmmBlock)
- -- ^ If the block is a call whose continuation goes to a proc point
- -- whose protocol either matches the continuation's or is not yet set,
- -- redirect the call (cf 'newblock') and set the protocol if necessary
- maybe_add_call block (protos, blocks) =
- case lastNode block of
- CmmCall tgt (Just k) args res s
- | Just proto <- mapLookup k protos,
- Just pee <- branchesToProcPoint k
- -> let newblock = replaceLastNode block (CmmCall tgt (Just pee)
- args res s)
- changed_blocks = insertBlock newblock blocks
- unchanged_blocks = insertBlock block blocks
- in case mapLookup pee protos of
- Nothing -> (mapInsert pee proto protos, changed_blocks)
- Just proto' ->
- if proto == proto' then (protos, changed_blocks)
- else (protos, unchanged_blocks)
- _ -> (protos, insertBlock block blocks)
-
- branchesToProcPoint :: BlockId -> Maybe BlockId
- -- ^ Tells whether the named block is just a branch to a proc point
- branchesToProcPoint id =
- let block = mapLookup id (toBlockMap g) `orElse`
- panic "branch out of graph"
- in case blockToNodeList block of
- (_, [], JustC (CmmBranch pee)) | setMember pee procPoints -> Just pee
- _ -> Nothing
-
--- | For now, following a suggestion by Ben Lippmeier, we pass all
--- live variables as arguments, hoping that a clever register
--- allocator might help.
-
-add_unassigned :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol ->
- BlockEnv Protocol
-add_unassigned = pass_live_vars_as_args
-
-pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet ->
- BlockEnv Protocol -> BlockEnv Protocol
-pass_live_vars_as_args _liveness procPoints protos = protos'
- where protos' = setFold addLiveVars protos procPoints
- addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol
- addLiveVars id protos =
- case mapLookup id protos of
- Just _ -> protos
- Nothing -> let live = emptyRegSet
- --lookupBlockEnv _liveness id `orElse`
- --panic ("no liveness at block " ++ show id)
- formals = uniqSetToList live
- prot = Protocol Private formals $ CallArea $ Young id
- in mapInsert id prot protos
-
-
--- | Add copy-in instructions to each proc point that did not arise from a call
--- instruction. (Proc-points that arise from calls already have their copy-in instructions.)
-
-add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock
-add_CopyIns callPPs protos blocks = mapFold maybe_insert_CopyIns mapEmpty blocks
- where maybe_insert_CopyIns block blocks
- | not $ setMember bid callPPs
- , Just (Protocol c fs _area) <- mapLookup bid protos
- = let nodes = copyInSlot c fs
- (h, m, l) = blockToNodeList block
- in insertBlock (blockOfNodeList (h, nodes ++ m, l)) blocks
- | otherwise = insertBlock block blocks
- where bid = entryLabel block
-
-
--- | Add a CopyOut node before each procpoint.
--- If the predecessor is a call, then the copy outs should already be done by the callee.
--- Note: If we need to add copy-out instructions, they may require stack space,
--- so we accumulate a map from the successors to the necessary stack space,
--- then update the successors after we have finished inserting the copy-outs.
-
-add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph ->
- FuelUniqSM (BlockEnv CmmBlock)
-add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty) g
- where mb_copy_out :: CmmBlock -> FuelUniqSM (BlockEnv CmmBlock) ->
- FuelUniqSM (BlockEnv CmmBlock)
- mb_copy_out b z | entryLabel b == g_entry g = skip b z
- mb_copy_out b z =
- case lastNode b of
- CmmCall {} -> skip b z -- copy out done by callee
- CmmForeignCall {} -> skip b z -- copy out done by callee
- _ -> copy_out b z
- copy_out b z = foldr trySucc init (successors b) >>= finish
- where init = (\bmap -> (b, bmap)) `liftM` z
- trySucc succId z =
- if setMember succId procPoints then
- case mapLookup succId protos of
- Nothing -> z
- Just (Protocol c fs _area) -> insert z succId $ copyOutSlot c fs
- else z
- insert z succId m =
- do (b, bmap) <- z
- (b, bs) <- insertBetween b m succId
- -- pprTrace "insert for succ" (ppr succId <> ppr m) $ do
- return $ (b, foldl (flip insertBlock) bmap bs)
- finish (b, bmap) = return $ insertBlock b bmap
- skip b bs = insertBlock b `liftM` bs
-- At this point, we have found a set of procpoints, each of which should be
-- the entry point of a procedure.
@@ -370,10 +207,9 @@ add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty)
-- ToDo: use the _ret naming convention that the old code generator
-- used. -- EZY
splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status ->
- CmmDecl -> FuelUniqSM [CmmDecl]
+ CmmDecl -> UniqSM [CmmDecl]
splitAtProcPoints entry_label callPPs procPoints procMap
- (CmmProc (TopInfo {info_tbl=info_tbl,
- stack_info=stack_info})
+ (CmmProc (TopInfo {info_tbl=info_tbl})
top_l g@(CmmGraph {g_entry=entry})) =
do -- Build a map from procpoints to the blocks they reach
let addBlock b graphEnv =
@@ -384,15 +220,18 @@ splitAtProcPoints entry_label callPPs procPoints procMap
[] -> graphEnv
[id] -> add graphEnv id bid b
_ -> panic "Each block should be reachable from only one ProcPoint"
- Nothing -> pprPanic "block not reached by a proc point?" (ppr bid)
+ Nothing -> graphEnv
where bid = entryLabel b
add graphEnv procId bid b = mapInsert procId graph' graphEnv
where graph = mapLookup procId graphEnv `orElse` mapEmpty
graph' = mapInsert bid b graph
+
graphEnv <- return $ foldGraphBlocks addBlock emptyBlockMap g
+
-- Build a map from proc point BlockId to pairs of:
-- * Labels for their new procedures
- -- * Labels for the info tables of their new procedures (only if the proc point is a callPP)
+ -- * Labels for the info tables of their new procedures (only if
+ -- the proc point is a callPP)
-- Due to common blockification, we may overestimate the set of procpoints.
let add_label map pp = Map.insert pp lbls map
where lbls | pp == entry = (entry_label, Just entry_info_lbl)
@@ -401,30 +240,15 @@ splitAtProcPoints entry_label callPPs procPoints procMap
entry_info_lbl = cit_lbl info_tbl
procLabels = foldl add_label Map.empty
(filter (flip mapMember (toBlockMap g)) (setElems procPoints))
- -- For each procpoint, we need to know the SP offset on entry.
- -- If the procpoint is:
- -- - continuation of a call, the SP offset is in the call
- -- - otherwise, 0 (and left out of the spEntryMap)
- let add_sp_off :: CmmBlock -> BlockEnv CmmStackInfo -> BlockEnv CmmStackInfo
- add_sp_off b env =
- case lastNode b of
- CmmCall {cml_cont = Just succ, cml_ret_args = off, cml_ret_off = updfr_off} ->
- mapInsert succ (StackInfo { arg_space = off, updfr_space = Just updfr_off}) env
- CmmForeignCall {succ = succ, updfr = updfr_off} ->
- mapInsert succ (StackInfo { arg_space = wORD_SIZE, updfr_space = Just updfr_off}) env
- _ -> env
- spEntryMap = foldGraphBlocks add_sp_off (mapInsert entry stack_info emptyBlockMap) g
- getStackInfo id = mapLookup id spEntryMap `orElse` StackInfo {arg_space = 0, updfr_space = Nothing}
-- In each new graph, add blocks jumping off to the new procedures,
-- and replace branches to procpoints with branches to the jump-off blocks
let add_jump_block (env, bs) (pp, l) =
do bid <- liftM mkBlockId getUniqueM
- let b = blockOfNodeList (JustC (CmmEntry bid), [], JustC jump)
- StackInfo {arg_space = argSpace, updfr_space = off} = getStackInfo pp
- jump = CmmCall (CmmLit (CmmLabel l)) Nothing argSpace 0
- (off `orElse` 0) -- Jump's shouldn't need the offset...
+ let b = blockJoin (CmmEntry bid) emptyBlock jump
+ jump = CmmCall (CmmLit (CmmLabel l)) Nothing 0 0 0
return (mapInsert pp bid env, b : bs)
- add_jumps (newGraphEnv) (ppId, blockEnv) =
+
+ add_jumps newGraphEnv (ppId, blockEnv) =
do let needed_jumps = -- find which procpoints we currently branch to
mapFold add_if_branch_to_pp [] blockEnv
add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)]
@@ -441,17 +265,16 @@ splitAtProcPoints entry_label callPPs procPoints procMap
foldM add_jump_block (mapEmpty, []) needed_jumps
-- update the entry block
let b = expectJust "block in env" $ mapLookup ppId blockEnv
- off = getStackInfo ppId
blockEnv' = mapInsert ppId b blockEnv
-- replace branches to procpoints with branches to jumps
blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv'
-- add the jump blocks to the graph
blockEnv''' = foldl (flip insertBlock) blockEnv'' jumpBlocks
- let g' = (off, ofBlockMap ppId blockEnv''')
+ let g' = ofBlockMap ppId blockEnv'''
-- pprTrace "g' pre jumps" (ppr g') $ do
return (mapInsert ppId g' newGraphEnv)
graphEnv <- foldM add_jumps emptyBlockMap $ mapToList graphEnv
- let to_proc (bid, (stack_info, g)) = case expectJust "pp label" $ Map.lookup bid procLabels of
+ let to_proc (bid, g) = case expectJust "pp label" $ Map.lookup bid procLabels of
(lbl, Just info_lbl)
| bid == entry
-> CmmProc (TopInfo {info_tbl=info_tbl, stack_info=stack_info})
@@ -462,15 +285,22 @@ splitAtProcPoints entry_label callPPs procPoints procMap
(lbl, Nothing)
-> CmmProc (TopInfo {info_tbl=CmmNonInfoTable, stack_info=stack_info})
lbl (replacePPIds g)
- -- References to procpoint IDs can now be replaced with the infotable's label
- replacePPIds g = mapGraphNodes (id, mapExp repl, mapExp repl) g
+ where
+ stack_info = panic "No StackInfo"
+
+ -- References to procpoint IDs can now be replaced with the
+ -- infotable's label
+ replacePPIds g = {-# SCC "replacePPIds" #-}
+ mapGraphNodes (id, mapExp repl, mapExp repl) g
where repl e@(CmmLit (CmmBlock bid)) =
case Map.lookup bid procLabels of
Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl)
_ -> e
repl e = e
- -- The C back end expects to see return continuations before the call sites.
- -- Here, we sort them in reverse order -- it gets reversed later.
+
+ -- The C back end expects to see return continuations before the
+ -- call sites. Here, we sort them in reverse order -- it gets
+ -- reversed later.
let (_, block_order) = foldl add_block_num (0::Int, emptyBlockMap) (postorderDfs g)
add_block_num (i, map) block = (i+1, mapInsert (entryLabel block) i map)
sort_fn (bid, _) (bid', _) =
@@ -482,6 +312,27 @@ splitAtProcPoints entry_label callPPs procPoints procMap
procs
splitAtProcPoints _ _ _ _ t@(CmmData _ _) = return [t]
+
+-- Only called from CmmProcPoint.splitAtProcPoints. NB. does a
+-- recursive lookup, see comment below.
+replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph
+replaceBranches env cmmg
+ = {-# SCC "replaceBranches" #-}
+ ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg
+ where
+ f block = replaceLastNode block $ last (lastNode block)
+
+ last :: CmmNode O C -> CmmNode O C
+ last (CmmBranch id) = CmmBranch (lookup id)
+ last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi)
+ last (CmmSwitch e tbl) = CmmSwitch e (map (fmap lookup) tbl)
+ last l@(CmmCall {}) = l
+ last l@(CmmForeignCall {}) = l
+ lookup id = fmap lookup (mapLookup id env) `orElse` id
+ -- XXX: this is a recursive lookup, it follows chains
+ -- until the lookup returns Nothing, at which point we
+ -- return the last BlockId
+
----------------------------------------------------------------
{-
diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs
index ecf3f7e0c3..cf349a0334 100644
--- a/compiler/cmm/CmmRewriteAssignments.hs
+++ b/compiler/cmm/CmmRewriteAssignments.hs
@@ -18,23 +18,23 @@ module CmmRewriteAssignments
import Cmm
import CmmUtils
import CmmOpt
-import OptimizationFuel
import StgCmmUtils
-import Control.Monad
+import UniqSupply
import Platform
import UniqFM
import Unique
import BlockId
-import Compiler.Hoopl hiding (Unique)
+import Hoopl
import Data.Maybe
+import Control.Monad
import Prelude hiding (succ, zip)
----------------------------------------------------------------
--- Main function
-rewriteAssignments :: Platform -> CmmGraph -> FuelUniqSM CmmGraph
+rewriteAssignments :: Platform -> CmmGraph -> UniqSM CmmGraph
rewriteAssignments platform g = do
-- Because we need to act on forwards and backwards information, we
-- first perform usage analysis and bake this information into the
@@ -213,7 +213,7 @@ usageTransfer = mkBTransfer3 first middle last
increaseUsage f r = addToUFM_C combine f r SingleUse
where combine _ _ = ManyUse
-usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap
+usageRewrite :: BwdRewrite UniqSM (WithRegUsage CmmNode) UsageMap
usageRewrite = mkBRewrite3 first middle last
where first _ _ = return Nothing
middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
@@ -226,7 +226,7 @@ usageRewrite = mkBRewrite3 first middle last
last _ _ = return Nothing
type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
-annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage)
+annotateUsage :: CmmGraph -> UniqSM (CmmGraphWithRegUsage)
annotateUsage vanilla_g =
let g = modifyGraph liftRegUsage vanilla_g
in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
@@ -404,8 +404,8 @@ clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
-- ToDo: Also catch MachOp case
clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
| getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
-clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
+clobbers (CmmStackSlot a o, rhs) (_, expr) = f expr
+ where f (CmmLoad (CmmStackSlot a' o') t)
= (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
f (CmmLoad e _) = containsStackSlot e
f (CmmMachOp _ es) = or (map f es)
@@ -416,9 +416,6 @@ clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
containsStackSlot (CmmStackSlot{}) = True
containsStackSlot _ = False
-clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
- f _ = False
clobbers _ (_, e) = f e
where f (CmmLoad (CmmStackSlot _ _) _) = False
f (CmmLoad{}) = True -- conservative
@@ -432,7 +429,7 @@ clobbers _ (_, e) = f e
-- [ I32 ]
-- [ F64 ]
-- s' -w'- o'
-type CallSubArea = (AreaId, Int, Int) -- area, offset, width
+type CallSubArea = (Area, Int, Int) -- area, offset, width
overlaps :: CallSubArea -> CallSubArea -> Bool
overlaps (a, _, _) (a', _, _) | a /= a' = False
overlaps (_, o, w) (_, o', w') =
@@ -457,7 +454,7 @@ invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
invalidateVolatile k m = mapUFM p m
where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
where exp CmmLit{} = True
- exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _)
+ exp (CmmLoad (CmmStackSlot (Young k') _) _)
| k' == k = False
exp (CmmLoad (CmmStackSlot _ _) _) = True
exp (CmmMachOp _ es) = and (map exp es)
@@ -527,7 +524,7 @@ assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase ass
-- values from the assignment map, due to reassignment of the local
-- register.) This is probably not locally sound.
-assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap
+assignmentRewrite :: FwdRewrite UniqSM (WithRegUsage CmmNode) AssignmentMap
assignmentRewrite = mkFRewrite3 first middle last
where
first _ _ = return Nothing
@@ -596,10 +593,6 @@ assignmentRewrite = mkFRewrite3 first middle last
where rep = typeWidth (localRegType r)
_ -> old
-- See Note [Soundness of store rewriting]
- inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _)
- = case lookupUFM assign r of
- Just (AlwaysInline x) -> x
- _ -> old
inlineExp _ old = old
inlinable :: CmmNode e x -> Bool
@@ -612,7 +605,7 @@ assignmentRewrite = mkFRewrite3 first middle last
-- in literals, which we can inline more aggressively, and inlining
-- gives us opportunities for more folding. However, we don't need any
-- facts to do MachOp folding.
-machOpFoldRewrite :: Platform -> FwdRewrite FuelUniqSM (WithRegUsage CmmNode) a
+machOpFoldRewrite :: Platform -> FwdRewrite UniqSM (WithRegUsage CmmNode) a
machOpFoldRewrite platform = mkFRewrite3 first middle last
where first _ _ = return Nothing
middle :: WithRegUsage CmmNode O O -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O O
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
deleted file mode 100644
index 9e762fe48a..0000000000
--- a/compiler/cmm/CmmSpillReload.hs
+++ /dev/null
@@ -1,166 +0,0 @@
-{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts #-}
--- Norman likes local bindings
--- If this module lives on I'd like to get rid of this flag in due course
-
-{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
-
--- TODO: Get rid of this flag:
-{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
-
-module CmmSpillReload
- ( dualLivenessWithInsertion
- )
-where
-
-import BlockId
-import Cmm
-import CmmUtils
-import CmmLive
-import OptimizationFuel
-
-import Control.Monad
-import Outputable hiding (empty)
-import qualified Outputable as PP
-import UniqSet
-
-import Compiler.Hoopl hiding (Unique)
-import Data.Maybe
-import Prelude hiding (succ, zip)
-
-{- Note [Overview of spill/reload]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The point of this module is to insert spills and reloads to establish
-the invariant that at a call or any proc point with an established
-protocol all live variables not expected in registers are sitting on the
-stack. We use a backward dual liveness analysis (both traditional
-register liveness as well as register slot liveness on the stack) to
-insert spills and reloads. It should be followed by a forward
-transformation to sink reloads as deeply as possible, so as to reduce
-register pressure: this transformation is performed by
-CmmRewriteAssignments.
-
-A variable can be expected to be live in a register, live on the
-stack, or both. This analysis ensures that spills and reloads are
-inserted as needed to make sure that every live variable needed
-after a call is available on the stack. Spills are placed immediately
-after their reaching definitions, but reloads are placed immediately
-after a return from a call (the entry point.)
-
-Note that we offer no guarantees about the consistency of the value
-in memory and the value in the register, except that they are
-equal across calls/procpoints. If the variable is changed, this
-mapping breaks: but as the original value of the register may still
-be useful in a different context, the memory location is not updated.
--}
-
-data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet }
-
-changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive
-changeStack f live = live { on_stack = f (on_stack live) }
-changeRegs f live = live { in_regs = f (in_regs live) }
-
-dualLiveLattice :: DataflowLattice DualLive
-dualLiveLattice = DataflowLattice "variables live in registers and on stack" empty add
- where empty = DualLive emptyRegSet emptyRegSet
- add _ (OldFact old) (NewFact new) = (changeIf $ change1 || change2, DualLive stack regs)
- where (change1, stack) = add1 (on_stack old) (on_stack new)
- (change2, regs) = add1 (in_regs old) (in_regs new)
- add1 old new = if sizeUniqSet join > sizeUniqSet old then (True, join) else (False, old)
- where join = unionUniqSets old new
-
-dualLivenessWithInsertion :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph
-dualLivenessWithInsertion procPoints g =
- liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice
- (dualLiveTransfers (g_entry g) procPoints)
- (insertSpillsAndReloads g procPoints)
-
--- Note [Live registers on entry to procpoints]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- Remember that the transfer function is only ever run on the rewritten
--- version of a graph, and the rewrite function for spills and reloads
--- enforces the invariant that no local registers are live on entry to
--- a procpoint. Accordingly, we check for this invariant here. An old
--- version of this code incorrectly claimed that any live registers were
--- live on the stack before entering the function: this is wrong, but
--- didn't cause bugs because it never actually was invoked.
-
-dualLiveTransfers :: BlockId -> BlockSet -> (BwdTransfer CmmNode DualLive)
-dualLiveTransfers entry procPoints = mkBTransfer3 first middle last
- where first :: CmmNode C O -> DualLive -> DualLive
- first (CmmEntry id) live -- See Note [Live registers on entry to procpoints]
- | id == entry || setMember id procPoints = noLiveOnEntry id (in_regs live) live
- | otherwise = live
-
- middle :: CmmNode O O -> DualLive -> DualLive
- middle m = changeStack updSlots
- . changeRegs updRegs
- where -- Reuse middle of liveness analysis from CmmLive
- updRegs = case getBTransfer3 xferLive of (_, middle, _) -> middle m
-
- updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m
- spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r
- spill live _ = live
- reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r
- reload live _ = live
- -- Ensure the assignment refers to the entirety of the
- -- register slot (and not just a slice).
- check (RegSlot (LocalReg _ ty), o, w) x
- | o == w && w == widthInBytes (typeWidth ty) = x
- check _ _ = panic "dualLiveTransfers: slices unsupported"
-
- -- Register analysis is identical to liveness analysis from CmmLive.
- last :: CmmNode O C -> FactBase DualLive -> DualLive
- last l fb = changeRegs (gen_kill l) $ case l of
- CmmCall {cml_cont=Nothing} -> empty
- CmmCall {cml_cont=Just k} -> keep_stack_only k
- CmmForeignCall {succ=k} -> keep_stack_only k
- _ -> joinOutFacts dualLiveLattice l fb
- where empty = fact_bot dualLiveLattice
- lkp k = fromMaybe empty (lookupFact k fb)
- keep_stack_only k = DualLive (on_stack (lkp k)) emptyRegSet
-
-insertSpillsAndReloads :: CmmGraph -> BlockSet -> CmmBwdRewrite DualLive
-insertSpillsAndReloads graph procPoints = deepBwdRw3 first middle nothing
- -- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
- -- but GHC miscompiles it, see bug #4044.
- where first :: CmmNode C O -> Fact O DualLive -> CmmReplGraph C O
- first e@(CmmEntry id) live = return $
- if id /= (g_entry graph) && setMember id procPoints then
- case map reload (uniqSetToList (in_regs live)) of
- [] -> Nothing
- is -> Just $ mkFirst e <*> mkMiddles is
- else Nothing
- -- EZY: There was some dead code for handling the case where
- -- we were not splitting procedures. Check Git history if
- -- you're interested (circa e26ea0f41).
-
- middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O
- -- Don't add spills next to reloads.
- middle (CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) _ | reg == reg' = return Nothing
- -- Spill if register is live on stack.
- middle m@(CmmAssign (CmmLocal reg) _) live
- | reg `elemRegSet` on_stack live = return (Just (mkMiddles [m, spill reg]))
- middle _ _ = return Nothing
-
- nothing _ _ = return Nothing
-
-spill, reload :: LocalReg -> CmmNode O O
-spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
-reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r)
-
----------------------
--- prettyprinting
-
-ppr_regs :: String -> RegSet -> SDoc
-ppr_regs s regs = text s <+> commafy (map ppr $ uniqSetToList regs)
- where commafy xs = hsep $ punctuate comma xs
-
-instance Outputable DualLive where
- ppr (DualLive {in_regs = regs, on_stack = stack}) =
- if isEmptyUniqSet regs && isEmptyUniqSet stack then
- text "<nothing-live>"
- else
- nest 2 $ fsep [if isEmptyUniqSet regs then PP.empty
- else (ppr_regs "live in regs =" regs),
- if isEmptyUniqSet stack then PP.empty
- else (ppr_regs "live on stack =" stack)]
diff --git a/compiler/cmm/CmmStackLayout.hs b/compiler/cmm/CmmStackLayout.hs
index c7fedad05b..726f98e8a3 100644
--- a/compiler/cmm/CmmStackLayout.hs
+++ b/compiler/cmm/CmmStackLayout.hs
@@ -35,7 +35,6 @@ import CmmProcPoint
import Maybes
import MkGraph (stackStubExpr)
import Control.Monad
-import OptimizationFuel
import Outputable
import SMRep (ByteOff)
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
index 0756c87583..f2e4d8e183 100644
--- a/compiler/cmm/CmmUtils.hs
+++ b/compiler/cmm/CmmUtils.hs
@@ -60,13 +60,14 @@ module CmmUtils(
-- * Operations that probably don't belong here
modifyGraph,
- lastNode, replaceLastNode, insertBetween,
+ lastNode, replaceLastNode,
ofBlockMap, toBlockMap, insertBlock,
ofBlockList, toBlockList, bodyToBlockList,
foldGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1,
analFwd, analBwd, analRewFwd, analRewBwd,
- dataflowPassFwd, dataflowPassBwd
+ dataflowPassFwd, dataflowPassBwd, dataflowAnalFwd, dataflowAnalBwd,
+ dataflowAnalFwdBlocks
) where
#include "HsVersions.h"
@@ -79,7 +80,6 @@ import Cmm
import BlockId
import CLabel
import Outputable
-import OptimizationFuel as F
import Unique
import UniqSupply
import Constants( wORD_SIZE, tAG_MASK )
@@ -88,8 +88,7 @@ import Util
import Data.Word
import Data.Maybe
import Data.Bits
-import Control.Monad
-import Compiler.Hoopl hiding ( Unique )
+import Hoopl
---------------------------------------------------
--
@@ -402,13 +401,13 @@ mkLiveness (reg:regs)
modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n'
modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)}
-toBlockMap :: CmmGraph -> LabelMap CmmBlock
+toBlockMap :: CmmGraph -> BlockEnv CmmBlock
toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body
-ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph
+ofBlockMap :: BlockId -> BlockEnv CmmBlock -> CmmGraph
ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO}
-insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock
+insertBlock :: CmmBlock -> BlockEnv CmmBlock -> BlockEnv CmmBlock
insertBlock block map =
ASSERT (isNothing $ mapLookup id map)
mapInsert id block map
@@ -418,7 +417,8 @@ toBlockList :: CmmGraph -> [CmmBlock]
toBlockList g = mapElems $ toBlockMap g
ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph
-ofBlockList entry blocks = CmmGraph {g_entry=entry, g_graph=GMany NothingO body NothingO}
+ofBlockList entry blocks = CmmGraph { g_entry = entry
+ , g_graph = GMany NothingO body NothingO }
where body = foldr addBlock emptyBody blocks
bodyToBlockList :: Body CmmNode -> [CmmBlock]
@@ -429,97 +429,77 @@ mapGraphNodes :: ( CmmNode C O -> CmmNode C O
, CmmNode O C -> CmmNode O C)
-> CmmGraph -> CmmGraph
mapGraphNodes funs@(mf,_,_) g =
- ofBlockMap (entryLabel $ mf $ CmmEntry $ g_entry g) $ mapMap (blockMapNodes3 funs) $ toBlockMap g
+ ofBlockMap (entryLabel $ mf $ CmmEntry $ g_entry g) $ mapMap (mapBlock3' funs) $ toBlockMap g
mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGraph
-mapGraphNodes1 f g = modifyGraph (graphMapBlocks (blockMapNodes f)) g
+mapGraphNodes1 f = modifyGraph (mapGraph f)
foldGraphBlocks :: (CmmBlock -> a -> a) -> a -> CmmGraph -> a
foldGraphBlocks k z g = mapFold k z $ toBlockMap g
postorderDfs :: CmmGraph -> [CmmBlock]
-postorderDfs g = postorder_dfs_from (toBlockMap g) (g_entry g)
-
--------------------------------------------------
--- Manipulating CmmBlocks
-
-lastNode :: CmmBlock -> CmmNode O C
-lastNode block = foldBlockNodesF3 (nothing, nothing, const) block ()
- where nothing :: a -> b -> ()
- nothing _ _ = ()
-
-replaceLastNode :: Block CmmNode e C -> CmmNode O C -> Block CmmNode e C
-replaceLastNode block last = blockOfNodeList (first, middle, JustC last)
- where (first, middle, _) = blockToNodeList block
-
-----------------------------------------------------------------------
------ Splicing between blocks
--- Given a middle node, a block, and a successor BlockId,
--- we can insert the middle node between the block and the successor.
--- We return the updated block and a list of new blocks that must be added
--- to the graph.
--- The semantics is a bit tricky. We consider cases on the last node:
--- o For a branch, we can just insert before the branch,
--- but sometimes the optimizer does better if we actually insert
--- a fresh basic block, enabling some common blockification.
--- o For a conditional branch, switch statement, or call, we must insert
--- a new basic block.
--- o For a jump or return, this operation is impossible.
-
-insertBetween :: MonadUnique m => CmmBlock -> [CmmNode O O] -> BlockId -> m (CmmBlock, [CmmBlock])
-insertBetween b ms succId = insert $ lastNode b
- where insert :: MonadUnique m => CmmNode O C -> m (CmmBlock, [CmmBlock])
- insert (CmmBranch bid) =
- if bid == succId then
- do (bid', bs) <- newBlocks
- return (replaceLastNode b (CmmBranch bid'), bs)
- else panic "tried invalid block insertBetween"
- insert (CmmCondBranch c t f) =
- do (t', tbs) <- if t == succId then newBlocks else return $ (t, [])
- (f', fbs) <- if f == succId then newBlocks else return $ (f, [])
- return (replaceLastNode b (CmmCondBranch c t' f'), tbs ++ fbs)
- insert (CmmSwitch e ks) =
- do (ids, bs) <- mapAndUnzipM mbNewBlocks ks
- return (replaceLastNode b (CmmSwitch e ids), join bs)
- insert (CmmCall {}) =
- panic "unimp: insertBetween after a call -- probably not a good idea"
- insert (CmmForeignCall {}) =
- panic "unimp: insertBetween after a foreign call -- probably not a good idea"
-
- newBlocks :: MonadUnique m => m (BlockId, [CmmBlock])
- newBlocks = do id <- liftM mkBlockId $ getUniqueM
- return $ (id, [blockOfNodeList (JustC (CmmEntry id), ms, JustC (CmmBranch succId))])
- mbNewBlocks :: MonadUnique m => Maybe BlockId -> m (Maybe BlockId, [CmmBlock])
- mbNewBlocks (Just k) = if k == succId then liftM fstJust newBlocks
- else return (Just k, [])
- mbNewBlocks Nothing = return (Nothing, [])
- fstJust (id, bs) = (Just id, bs)
+postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g)
-------------------------------------------------
-- Running dataflow analysis and/or rewrites
-- Constructing forward and backward analysis-only pass
-analFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdPass m n f
-analBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdPass m n f
+analFwd :: DataflowLattice f -> FwdTransfer n f -> FwdPass UniqSM n f
+analBwd :: DataflowLattice f -> BwdTransfer n f -> BwdPass UniqSM n f
analFwd lat xfer = analRewFwd lat xfer noFwdRewrite
analBwd lat xfer = analRewBwd lat xfer noBwdRewrite
-- Constructing forward and backward analysis + rewrite pass
-analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdRewrite m n f -> FwdPass m n f
-analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdRewrite m n f -> BwdPass m n f
+analRewFwd :: DataflowLattice f -> FwdTransfer n f
+ -> FwdRewrite UniqSM n f
+ -> FwdPass UniqSM n f
+
+analRewBwd :: DataflowLattice f
+ -> BwdTransfer n f
+ -> BwdRewrite UniqSM n f
+ -> BwdPass UniqSM n f
analRewFwd lat xfer rew = FwdPass {fp_lattice = lat, fp_transfer = xfer, fp_rewrite = rew}
analRewBwd lat xfer rew = BwdPass {bp_lattice = lat, bp_transfer = xfer, bp_rewrite = rew}
-- Running forward and backward dataflow analysis + optional rewrite
-dataflowPassFwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> FwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
+dataflowPassFwd :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> FwdPass UniqSM n f
+ -> UniqSM (GenCmmGraph n, BlockEnv f)
dataflowPassFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do
(graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)
return (CmmGraph {g_entry=entry, g_graph=graph}, facts)
-dataflowPassBwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> BwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
+dataflowAnalFwd :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> FwdPass UniqSM n f
+ -> BlockEnv f
+dataflowAnalFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd =
+ analyzeFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)
+
+dataflowAnalFwdBlocks :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> FwdPass UniqSM n f
+ -> UniqSM (BlockEnv f)
+dataflowAnalFwdBlocks (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do
+-- (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)
+-- return facts
+ return (analyzeFwdBlocks fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts))
+
+dataflowAnalBwd :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> BwdPass UniqSM n f
+ -> BlockEnv f
+dataflowAnalBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd =
+ analyzeBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts)
+
+dataflowPassBwd :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> BwdPass UniqSM n f
+ -> UniqSM (GenCmmGraph n, BlockEnv f)
dataflowPassBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = do
(graph, facts, NothingO) <- analyzeAndRewriteBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts)
return (CmmGraph {g_entry=entry, g_graph=graph}, facts)
diff --git a/compiler/cmm/Hoopl.hs b/compiler/cmm/Hoopl.hs
new file mode 100644
index 0000000000..0eca85cb8a
--- /dev/null
+++ b/compiler/cmm/Hoopl.hs
@@ -0,0 +1,125 @@
+module Hoopl (
+ module Compiler.Hoopl,
+ module Hoopl.Dataflow,
+ deepFwdRw, deepFwdRw3,
+ deepBwdRw, deepBwdRw3,
+ thenFwdRw
+ ) where
+
+import Compiler.Hoopl hiding
+ ( Unique,
+ FwdTransfer(..), FwdRewrite(..), FwdPass(..),
+ BwdTransfer(..), BwdRewrite(..), BwdPass(..),
+ noFwdRewrite, noBwdRewrite,
+ analyzeAndRewriteFwd, analyzeAndRewriteBwd,
+ mkFactBase, Fact,
+ mkBRewrite, mkBRewrite3, mkBTransfer, mkBTransfer3,
+ mkFRewrite, mkFRewrite3, mkFTransfer, mkFTransfer3,
+ deepFwdRw, deepFwdRw3, thenFwdRw, iterFwdRw,
+ deepBwdRw, deepBwdRw3, thenBwdRw, iterBwdRw
+ )
+
+import Hoopl.Dataflow
+import Control.Monad
+import UniqSupply
+
+deepFwdRw3 :: (n C O -> f -> UniqSM (Maybe (Graph n C O)))
+ -> (n O O -> f -> UniqSM (Maybe (Graph n O O)))
+ -> (n O C -> f -> UniqSM (Maybe (Graph n O C)))
+ -> (FwdRewrite UniqSM n f)
+deepFwdRw :: (forall e x . n e x -> f -> UniqSM (Maybe (Graph n e x))) -> FwdRewrite UniqSM n f
+deepFwdRw3 f m l = iterFwdRw $ mkFRewrite3 f m l
+deepFwdRw f = deepFwdRw3 f f f
+
+-- N.B. rw3, rw3', and rw3a are triples of functions.
+-- But rw and rw' are single functions.
+thenFwdRw :: forall n f.
+ FwdRewrite UniqSM n f
+ -> FwdRewrite UniqSM n f
+ -> FwdRewrite UniqSM n f
+thenFwdRw rw3 rw3' = wrapFR2 thenrw rw3 rw3'
+ where
+ thenrw :: forall e x t t1.
+ (t -> t1 -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f)))
+ -> (t -> t1 -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f)))
+ -> t
+ -> t1
+ -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f))
+ thenrw rw rw' n f = rw n f >>= fwdRes
+ where fwdRes Nothing = rw' n f
+ fwdRes (Just gr) = return $ Just $ fadd_rw rw3' gr
+
+iterFwdRw :: forall n f.
+ FwdRewrite UniqSM n f
+ -> FwdRewrite UniqSM n f
+iterFwdRw rw3 = wrapFR iter rw3
+ where iter :: forall a e x t.
+ (t -> a -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f)))
+ -> t
+ -> a
+ -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f))
+ iter rw n = (liftM $ liftM $ fadd_rw (iterFwdRw rw3)) . rw n
+
+-- | Function inspired by 'rew' in the paper
+_frewrite_cps :: ((Graph n e x, FwdRewrite UniqSM n f) -> UniqSM a)
+ -> UniqSM a
+ -> (forall e x . n e x -> f -> UniqSM (Maybe (Graph n e x, FwdRewrite UniqSM n f)))
+ -> n e x
+ -> f
+ -> UniqSM a
+_frewrite_cps j n rw node f =
+ do mg <- rw node f
+ case mg of Nothing -> n
+ Just gr -> j gr
+
+
+
+-- | Function inspired by 'add' in the paper
+fadd_rw :: FwdRewrite UniqSM n f
+ -> (Graph n e x, FwdRewrite UniqSM n f)
+ -> (Graph n e x, FwdRewrite UniqSM n f)
+fadd_rw rw2 (g, rw1) = (g, rw1 `thenFwdRw` rw2)
+
+
+
+deepBwdRw3 ::
+ (n C O -> f -> UniqSM (Maybe (Graph n C O)))
+ -> (n O O -> f -> UniqSM (Maybe (Graph n O O)))
+ -> (n O C -> FactBase f -> UniqSM (Maybe (Graph n O C)))
+ -> (BwdRewrite UniqSM n f)
+deepBwdRw :: (forall e x . n e x -> Fact x f -> UniqSM (Maybe (Graph n e x)))
+ -> BwdRewrite UniqSM n f
+deepBwdRw3 f m l = iterBwdRw $ mkBRewrite3 f m l
+deepBwdRw f = deepBwdRw3 f f f
+
+
+thenBwdRw :: forall n f. BwdRewrite UniqSM n f -> BwdRewrite UniqSM n f -> BwdRewrite UniqSM n f
+thenBwdRw rw1 rw2 = wrapBR2 f rw1 rw2
+ where f :: forall t t1 t2 e x.
+ t
+ -> (t1 -> t2 -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f)))
+ -> (t1 -> t2 -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f)))
+ -> t1
+ -> t2
+ -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f))
+ f _ rw1 rw2' n f = do
+ res1 <- rw1 n f
+ case res1 of
+ Nothing -> rw2' n f
+ Just gr -> return $ Just $ badd_rw rw2 gr
+
+iterBwdRw :: forall n f. BwdRewrite UniqSM n f -> BwdRewrite UniqSM n f
+iterBwdRw rw = wrapBR f rw
+ where f :: forall t e x t1 t2.
+ t
+ -> (t1 -> t2 -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f)))
+ -> t1
+ -> t2
+ -> UniqSM (Maybe (Graph n e x, BwdRewrite UniqSM n f))
+ f _ rw' n f = liftM (liftM (badd_rw (iterBwdRw rw))) (rw' n f)
+
+-- | Function inspired by 'add' in the paper
+badd_rw :: BwdRewrite UniqSM n f
+ -> (Graph n e x, BwdRewrite UniqSM n f)
+ -> (Graph n e x, BwdRewrite UniqSM n f)
+badd_rw rw2 (g, rw1) = (g, rw1 `thenBwdRw` rw2)
diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs
new file mode 100644
index 0000000000..9745eac9d8
--- /dev/null
+++ b/compiler/cmm/Hoopl/Dataflow.hs
@@ -0,0 +1,887 @@
+--
+-- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones,
+-- and Norman Ramsey
+--
+-- Modifications copyright (c) The University of Glasgow 2012
+--
+-- This module is a specialised and optimised version of
+-- Compiler.Hoopl.Dataflow in the hoopl package. In particular it is
+-- specialised to the UniqSM monad.
+--
+
+{-# LANGUAGE RankNTypes, ScopedTypeVariables, GADTs, EmptyDataDecls, PatternGuards, TypeFamilies, MultiParamTypeClasses #-}
+#if __GLASGOW_HASKELL__ >= 703
+{-# OPTIONS_GHC -fprof-auto-top #-}
+#endif
+#if __GLASGOW_HASKELL__ >= 701
+{-# LANGUAGE Trustworthy #-}
+#endif
+
+module Hoopl.Dataflow
+ ( DataflowLattice(..), OldFact(..), NewFact(..), Fact, mkFactBase
+ , ChangeFlag(..)
+ , FwdPass(..), FwdTransfer, mkFTransfer, mkFTransfer3, getFTransfer3
+ -- * Respecting Fuel
+
+ -- $fuel
+ , FwdRewrite, mkFRewrite, mkFRewrite3, getFRewrite3, noFwdRewrite
+ , wrapFR, wrapFR2
+ , BwdPass(..), BwdTransfer, mkBTransfer, mkBTransfer3, getBTransfer3
+ , wrapBR, wrapBR2
+ , BwdRewrite, mkBRewrite, mkBRewrite3, getBRewrite3, noBwdRewrite
+ , analyzeAndRewriteFwd, analyzeAndRewriteBwd
+ , analyzeFwd, analyzeFwdBlocks, analyzeBwd
+ )
+where
+
+import UniqSupply
+
+import Data.Maybe
+import Data.Array
+
+import Compiler.Hoopl hiding
+ ( mkBRewrite3, mkFRewrite3, noFwdRewrite, noBwdRewrite
+ , analyzeAndRewriteBwd, analyzeAndRewriteFwd
+ )
+import Compiler.Hoopl.Internals
+ ( wrapFR, wrapFR2
+ , wrapBR, wrapBR2
+ , splice
+ )
+
+
+-- -----------------------------------------------------------------------------
+
+noRewrite :: a -> b -> UniqSM (Maybe c)
+noRewrite _ _ = return Nothing
+
+noFwdRewrite :: FwdRewrite UniqSM n f
+noFwdRewrite = FwdRewrite3 (noRewrite, noRewrite, noRewrite)
+
+-- | Functions passed to 'mkFRewrite3' should not be aware of the fuel supply.
+-- The result returned by 'mkFRewrite3' respects fuel.
+mkFRewrite3 :: forall n f.
+ (n C O -> f -> UniqSM (Maybe (Graph n C O)))
+ -> (n O O -> f -> UniqSM (Maybe (Graph n O O)))
+ -> (n O C -> f -> UniqSM (Maybe (Graph n O C)))
+ -> FwdRewrite UniqSM n f
+mkFRewrite3 f m l = FwdRewrite3 (lift f, lift m, lift l)
+ where lift :: forall t t1 a. (t -> t1 -> UniqSM (Maybe a))
+ -> t -> t1 -> UniqSM (Maybe (a, FwdRewrite UniqSM n f))
+ {-# INLINE lift #-}
+ lift rw node fact = do
+ a <- rw node fact
+ case a of
+ Nothing -> return Nothing
+ Just a -> return (Just (a,noFwdRewrite))
+
+noBwdRewrite :: BwdRewrite UniqSM n f
+noBwdRewrite = BwdRewrite3 (noRewrite, noRewrite, noRewrite)
+
+mkBRewrite3 :: forall n f.
+ (n C O -> f -> UniqSM (Maybe (Graph n C O)))
+ -> (n O O -> f -> UniqSM (Maybe (Graph n O O)))
+ -> (n O C -> FactBase f -> UniqSM (Maybe (Graph n O C)))
+ -> BwdRewrite UniqSM n f
+mkBRewrite3 f m l = BwdRewrite3 (lift f, lift m, lift l)
+ where lift :: forall t t1 a. (t -> t1 -> UniqSM (Maybe a))
+ -> t -> t1 -> UniqSM (Maybe (a, BwdRewrite UniqSM n f))
+ {-# INLINE lift #-}
+ lift rw node fact = do
+ a <- rw node fact
+ case a of
+ Nothing -> return Nothing
+ Just a -> return (Just (a,noBwdRewrite))
+
+-----------------------------------------------------------------------------
+-- Analyze and rewrite forward: the interface
+-----------------------------------------------------------------------------
+
+-- | if the graph being analyzed is open at the entry, there must
+-- be no other entry point, or all goes horribly wrong...
+analyzeAndRewriteFwd
+ :: forall n f e x . NonLocal n =>
+ FwdPass UniqSM n f
+ -> MaybeC e [Label]
+ -> Graph n e x -> Fact e f
+ -> UniqSM (Graph n e x, FactBase f, MaybeO x f)
+analyzeAndRewriteFwd pass entries g f =
+ do (rg, fout) <- arfGraph pass (fmap targetLabels entries) g f
+ let (g', fb) = normalizeGraph rg
+ return (g', fb, distinguishedExitFact g' fout)
+
+distinguishedExitFact :: forall n e x f . Graph n e x -> Fact x f -> MaybeO x f
+distinguishedExitFact g f = maybe g
+ where maybe :: Graph n e x -> MaybeO x f
+ maybe GNil = JustO f
+ maybe (GUnit {}) = JustO f
+ maybe (GMany _ _ x) = case x of NothingO -> NothingO
+ JustO _ -> JustO f
+
+----------------------------------------------------------------
+-- Forward Implementation
+----------------------------------------------------------------
+
+type Entries e = MaybeC e [Label]
+
+arfGraph :: forall n f e x . NonLocal n =>
+ FwdPass UniqSM n f ->
+ Entries e -> Graph n e x -> Fact e f -> UniqSM (DG f n e x, Fact x f)
+arfGraph pass@FwdPass { fp_lattice = lattice,
+ fp_transfer = transfer,
+ fp_rewrite = rewrite } entries g in_fact = graph g in_fact
+ where
+ {- nested type synonyms would be so lovely here
+ type ARF thing = forall e x . thing e x -> f -> m (DG f n e x, Fact x f)
+ type ARFX thing = forall e x . thing e x -> Fact e f -> m (DG f n e x, Fact x f)
+ -}
+ graph :: Graph n e x -> Fact e f -> UniqSM (DG f n e x, Fact x f)
+ block :: forall e x .
+ Block n e x -> f -> UniqSM (DG f n e x, Fact x f)
+
+ body :: [Label] -> LabelMap (Block n C C)
+ -> Fact C f -> UniqSM (DG f n C C, Fact C f)
+ -- Outgoing factbase is restricted to Labels *not* in
+ -- in the Body; the facts for Labels *in*
+ -- the Body are in the 'DG f n C C'
+
+ cat :: forall e a x f1 f2 f3.
+ (f1 -> UniqSM (DG f n e a, f2))
+ -> (f2 -> UniqSM (DG f n a x, f3))
+ -> (f1 -> UniqSM (DG f n e x, f3))
+
+ graph GNil f = return (dgnil, f)
+ graph (GUnit blk) f = block blk f
+ graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f
+ where
+ ebcat :: MaybeO e (Block n O C) -> Body n -> Fact e f -> UniqSM (DG f n e C, Fact C f)
+ exit :: MaybeO x (Block n C O) -> Fact C f -> UniqSM (DG f n C x, Fact x f)
+ exit (JustO blk) f = arfx block blk f
+ exit NothingO f = return (dgnilC, f)
+ ebcat entry bdy f = c entries entry f
+ where c :: MaybeC e [Label] -> MaybeO e (Block n O C)
+ -> Fact e f -> UniqSM (DG f n e C, Fact C f)
+ c NothingC (JustO entry) f = (block entry `cat` body (successors entry) bdy) f
+ c (JustC entries) NothingO f = body entries bdy f
+ c _ _ _ = error "bogus GADT pattern match failure"
+
+ -- Lift from nodes to blocks
+ block BNil f = return (dgnil, f)
+ block (BlockCO n b) f = (node n `cat` block b) f
+ block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f
+ block (BlockOC b n) f = (block b `cat` node n) f
+
+ block (BMiddle n) f = node n f
+ block (BCat b1 b2) f = (block b1 `cat` block b2) f
+ block (BHead h n) f = (block h `cat` node n) f
+ block (BTail n t) f = (node n `cat` block t) f
+
+ {-# INLINE node #-}
+ node :: forall e x . (ShapeLifter e x)
+ => n e x -> f -> UniqSM (DG f n e x, Fact x f)
+ node n f
+ = do { grw <- frewrite rewrite n f
+ ; case grw of
+ Nothing -> return ( singletonDG f n
+ , ftransfer transfer n f )
+ Just (g, rw) ->
+ let pass' = pass { fp_rewrite = rw }
+ f' = fwdEntryFact n f
+ in arfGraph pass' (fwdEntryLabel n) g f' }
+
+ -- | Compose fact transformers and concatenate the resulting
+ -- rewritten graphs.
+ {-# INLINE cat #-}
+ cat ft1 ft2 f = do { (g1,f1) <- ft1 f
+ ; (g2,f2) <- ft2 f1
+ ; let !g = g1 `dgSplice` g2
+ ; return (g, f2) }
+
+ arfx :: forall x .
+ (Block n C x -> f -> UniqSM (DG f n C x, Fact x f))
+ -> (Block n C x -> Fact C f -> UniqSM (DG f n C x, Fact x f))
+ arfx arf thing fb =
+ arf thing $ fromJust $ lookupFact (entryLabel thing) $ joinInFacts lattice fb
+ -- joinInFacts adds debugging information
+
+
+ -- Outgoing factbase is restricted to Labels *not* in
+ -- in the Body; the facts for Labels *in*
+ -- the Body are in the 'DG f n C C'
+ body entries blockmap init_fbase
+ = fixpoint Fwd lattice do_block entries blockmap init_fbase
+ where
+ lattice = fp_lattice pass
+ do_block :: forall x . Block n C x -> FactBase f
+ -> UniqSM (DG f n C x, Fact x f)
+ do_block b fb = block b entryFact
+ where entryFact = getFact lattice (entryLabel b) fb
+
+
+-- Join all the incoming facts with bottom.
+-- We know the results _shouldn't change_, but the transfer
+-- functions might, for example, generate some debugging traces.
+joinInFacts :: DataflowLattice f -> FactBase f -> FactBase f
+joinInFacts (lattice @ DataflowLattice {fact_bot = bot, fact_join = fj}) fb =
+ mkFactBase lattice $ map botJoin $ mapToList fb
+ where botJoin (l, f) = (l, snd $ fj l (OldFact bot) (NewFact f))
+
+forwardBlockList :: (NonLocal n)
+ => [Label] -> Body n -> [Block n C C]
+-- This produces a list of blocks in order suitable for forward analysis,
+-- along with the list of Labels it may depend on for facts.
+forwardBlockList entries blks = postorder_dfs_from blks entries
+
+----------------------------------------------------------------
+-- Forward Analysis only
+----------------------------------------------------------------
+
+-- | if the graph being analyzed is open at the entry, there must
+-- be no other entry point, or all goes horribly wrong...
+analyzeFwd
+ :: forall n f e . NonLocal n =>
+ FwdPass UniqSM n f
+ -> MaybeC e [Label]
+ -> Graph n e C -> Fact e f
+ -> FactBase f
+analyzeFwd FwdPass { fp_lattice = lattice,
+ fp_transfer = FwdTransfer3 (ftr, mtr, ltr) }
+ entries g in_fact = graph g in_fact
+ where
+ graph :: Graph n e C -> Fact e f -> FactBase f
+ graph (GMany entry blockmap NothingO)
+ = case (entries, entry) of
+ (NothingC, JustO entry) -> block entry `cat` body (successors entry)
+ (JustC entries, NothingO) -> body entries
+ _ -> error "bogus GADT pattern match failure"
+ where
+ body :: [Label] -> Fact C f -> Fact C f
+ body entries f
+ = fixpointAnal Fwd lattice do_block entries blockmap f
+ where
+ do_block :: forall x . Block n C x -> FactBase f -> Fact x f
+ do_block b fb = block b entryFact
+ where entryFact = getFact lattice (entryLabel b) fb
+
+ -- NB. eta-expand block, GHC can't do this by itself. See #5809.
+ block :: forall e x . Block n e x -> f -> Fact x f
+ block BNil f = f
+ block (BlockCO n b) f = (ftr n `cat` block b) f
+ block (BlockCC l b n) f = (ftr l `cat` (block b `cat` ltr n)) f
+ block (BlockOC b n) f = (block b `cat` ltr n) f
+
+ block (BMiddle n) f = mtr n f
+ block (BCat b1 b2) f = (block b1 `cat` block b2) f
+ block (BHead h n) f = (block h `cat` mtr n) f
+ block (BTail n t) f = (mtr n `cat` block t) f
+
+ {-# INLINE cat #-}
+ cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3)
+ cat ft1 ft2 = \f -> ft2 $! ft1 f
+
+-- | if the graph being analyzed is open at the entry, there must
+-- be no other entry point, or all goes horribly wrong...
+analyzeFwdBlocks
+ :: forall n f e . NonLocal n =>
+ FwdPass UniqSM n f
+ -> MaybeC e [Label]
+ -> Graph n e C -> Fact e f
+ -> FactBase f
+analyzeFwdBlocks FwdPass { fp_lattice = lattice,
+ fp_transfer = FwdTransfer3 (ftr, _, ltr) }
+ entries g in_fact = graph g in_fact
+ where
+ graph :: Graph n e C -> Fact e f -> FactBase f
+ graph (GMany entry blockmap NothingO)
+ = case (entries, entry) of
+ (NothingC, JustO entry) -> block entry `cat` body (successors entry)
+ (JustC entries, NothingO) -> body entries
+ _ -> error "bogus GADT pattern match failure"
+ where
+ body :: [Label] -> Fact C f -> Fact C f
+ body entries f
+ = fixpointAnal Fwd lattice do_block entries blockmap f
+ where
+ do_block :: forall x . Block n C x -> FactBase f -> Fact x f
+ do_block b fb = block b entryFact
+ where entryFact = getFact lattice (entryLabel b) fb
+
+ -- NB. eta-expand block, GHC can't do this by itself. See #5809.
+ block :: forall e x . Block n e x -> f -> Fact x f
+ block BNil f = f
+ block (BlockCO n _) f = ftr n f
+ block (BlockCC l _ n) f = (ftr l `cat` ltr n) f
+ block (BlockOC _ n) f = ltr n f
+ block _ _ = error "analyzeFwdBlocks"
+
+ {-# INLINE cat #-}
+ cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3)
+ cat ft1 ft2 = \f -> ft2 $! ft1 f
+
+----------------------------------------------------------------
+-- Backward Analysis only
+----------------------------------------------------------------
+
+-- | if the graph being analyzed is open at the entry, there must
+-- be no other entry point, or all goes horribly wrong...
+analyzeBwd
+ :: forall n f e . NonLocal n =>
+ BwdPass UniqSM n f
+ -> MaybeC e [Label]
+ -> Graph n e C -> Fact C f
+ -> FactBase f
+analyzeBwd BwdPass { bp_lattice = lattice,
+ bp_transfer = BwdTransfer3 (ftr, mtr, ltr) }
+ entries g in_fact = graph g in_fact
+ where
+ graph :: Graph n e C -> Fact C f -> FactBase f
+ graph (GMany entry blockmap NothingO)
+ = case (entries, entry) of
+ (NothingC, JustO entry) -> body (successors entry)
+ (JustC entries, NothingO) -> body entries
+ _ -> error "bogus GADT pattern match failure"
+ where
+ body :: [Label] -> Fact C f -> Fact C f
+ body entries f
+ = fixpointAnal Bwd lattice do_block entries blockmap f
+ where
+ do_block :: forall x . Block n C x -> Fact x f -> FactBase f
+ do_block b fb = mapSingleton (entryLabel b) (block b fb)
+
+ -- NB. eta-expand block, GHC can't do this by itself. See #5809.
+ block :: forall e x . Block n e x -> Fact x f -> f
+ block BNil f = f
+ block (BlockCO n b) f = (ftr n `cat` block b) f
+ block (BlockCC l b n) f = ((ftr l `cat` block b) `cat` ltr n) f
+ block (BlockOC b n) f = (block b `cat` ltr n) f
+
+ block (BMiddle n) f = mtr n f
+ block (BCat b1 b2) f = (block b1 `cat` block b2) f
+ block (BHead h n) f = (block h `cat` mtr n) f
+ block (BTail n t) f = (mtr n `cat` block t) f
+
+ {-# INLINE cat #-}
+ cat :: forall f1 f2 f3 . (f2 -> f3) -> (f1 -> f2) -> (f1 -> f3)
+ cat ft1 ft2 = \f -> ft1 $! ft2 f
+
+-----------------------------------------------------------------------------
+-- Backward analysis and rewriting: the interface
+-----------------------------------------------------------------------------
+
+
+-- | if the graph being analyzed is open at the exit, I don't
+-- quite understand the implications of possible other exits
+analyzeAndRewriteBwd
+ :: NonLocal n
+ => BwdPass UniqSM n f
+ -> MaybeC e [Label] -> Graph n e x -> Fact x f
+ -> UniqSM (Graph n e x, FactBase f, MaybeO e f)
+analyzeAndRewriteBwd pass entries g f =
+ do (rg, fout) <- arbGraph pass (fmap targetLabels entries) g f
+ let (g', fb) = normalizeGraph rg
+ return (g', fb, distinguishedEntryFact g' fout)
+
+distinguishedEntryFact :: forall n e x f . Graph n e x -> Fact e f -> MaybeO e f
+distinguishedEntryFact g f = maybe g
+ where maybe :: Graph n e x -> MaybeO e f
+ maybe GNil = JustO f
+ maybe (GUnit {}) = JustO f
+ maybe (GMany e _ _) = case e of NothingO -> NothingO
+ JustO _ -> JustO f
+
+
+-----------------------------------------------------------------------------
+-- Backward implementation
+-----------------------------------------------------------------------------
+
+arbGraph :: forall n f e x .
+ NonLocal n =>
+ BwdPass UniqSM n f ->
+ Entries e -> Graph n e x -> Fact x f -> UniqSM (DG f n e x, Fact e f)
+arbGraph pass@BwdPass { bp_lattice = lattice,
+ bp_transfer = transfer,
+ bp_rewrite = rewrite } entries g in_fact = graph g in_fact
+ where
+ {- nested type synonyms would be so lovely here
+ type ARB thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, f)
+ type ARBX thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, Fact e f)
+ -}
+ graph :: Graph n e x -> Fact x f -> UniqSM (DG f n e x, Fact e f)
+ block :: forall e x . Block n e x -> Fact x f -> UniqSM (DG f n e x, f)
+ body :: [Label] -> Body n -> Fact C f -> UniqSM (DG f n C C, Fact C f)
+ node :: forall e x . (ShapeLifter e x)
+ => n e x -> Fact x f -> UniqSM (DG f n e x, f)
+ cat :: forall e a x info info' info''.
+ (info' -> UniqSM (DG f n e a, info''))
+ -> (info -> UniqSM (DG f n a x, info'))
+ -> (info -> UniqSM (DG f n e x, info''))
+
+ graph GNil f = return (dgnil, f)
+ graph (GUnit blk) f = block blk f
+ graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f
+ where
+ ebcat :: MaybeO e (Block n O C) -> Body n -> Fact C f -> UniqSM (DG f n e C, Fact e f)
+ exit :: MaybeO x (Block n C O) -> Fact x f -> UniqSM (DG f n C x, Fact C f)
+ exit (JustO blk) f = arbx block blk f
+ exit NothingO f = return (dgnilC, f)
+ ebcat entry bdy f = c entries entry f
+ where c :: MaybeC e [Label] -> MaybeO e (Block n O C)
+ -> Fact C f -> UniqSM (DG f n e C, Fact e f)
+ c NothingC (JustO entry) f = (block entry `cat` body (successors entry) bdy) f
+ c (JustC entries) NothingO f = body entries bdy f
+ c _ _ _ = error "bogus GADT pattern match failure"
+
+ -- Lift from nodes to blocks
+ block BNil f = return (dgnil, f)
+ block (BlockCO n b) f = (node n `cat` block b) f
+ block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f
+ block (BlockOC b n) f = (block b `cat` node n) f
+
+ block (BMiddle n) f = node n f
+ block (BCat b1 b2) f = (block b1 `cat` block b2) f
+ block (BHead h n) f = (block h `cat` node n) f
+ block (BTail n t) f = (node n `cat` block t) f
+
+ {-# INLINE node #-}
+ node n f
+ = do { bwdres <- brewrite rewrite n f
+ ; case bwdres of
+ Nothing -> return (singletonDG entry_f n, entry_f)
+ where entry_f = btransfer transfer n f
+ Just (g, rw) ->
+ do { let pass' = pass { bp_rewrite = rw }
+ ; (g, f) <- arbGraph pass' (fwdEntryLabel n) g f
+ ; return (g, bwdEntryFact lattice n f)} }
+
+ -- | Compose fact transformers and concatenate the resulting
+ -- rewritten graphs.
+ {-# INLINE cat #-}
+ cat ft1 ft2 f = do { (g2,f2) <- ft2 f
+ ; (g1,f1) <- ft1 f2
+ ; let !g = g1 `dgSplice` g2
+ ; return (g, f1) }
+
+ arbx :: forall x .
+ (Block n C x -> Fact x f -> UniqSM (DG f n C x, f))
+ -> (Block n C x -> Fact x f -> UniqSM (DG f n C x, Fact C f))
+
+ arbx arb thing f = do { (rg, f) <- arb thing f
+ ; let fb = joinInFacts (bp_lattice pass) $
+ mapSingleton (entryLabel thing) f
+ ; return (rg, fb) }
+ -- joinInFacts adds debugging information
+
+ -- Outgoing factbase is restricted to Labels *not* in
+ -- in the Body; the facts for Labels *in*
+ -- the Body are in the 'DG f n C C'
+ body entries blockmap init_fbase
+ = fixpoint Bwd (bp_lattice pass) do_block entries blockmap init_fbase
+ where
+ do_block :: forall x. Block n C x -> Fact x f -> UniqSM (DG f n C x, LabelMap f)
+ do_block b f = do (g, f) <- block b f
+ return (g, mapSingleton (entryLabel b) f)
+
+
+{-
+
+The forward and backward cases are not dual. In the forward case, the
+entry points are known, and one simply traverses the body blocks from
+those points. In the backward case, something is known about the exit
+points, but this information is essentially useless, because we don't
+actually have a dual graph (that is, one with edges reversed) to
+compute with. (Even if we did have a dual graph, it would not avail
+us---a backward analysis must include reachable blocks that don't
+reach the exit, as in a procedure that loops forever and has side
+effects.)
+
+-}
+
+-----------------------------------------------------------------------------
+-- fixpoint
+-----------------------------------------------------------------------------
+
+data Direction = Fwd | Bwd
+
+-- | fixpointing for analysis-only
+--
+fixpointAnal :: forall n f. NonLocal n
+ => Direction
+ -> DataflowLattice f
+ -> (Block n C C -> Fact C f -> Fact C f)
+ -> [Label]
+ -> LabelMap (Block n C C)
+ -> Fact C f -> FactBase f
+
+fixpointAnal direction DataflowLattice{ fact_bot = _, fact_join = join }
+ do_block entries blockmap init_fbase
+ = loop start init_fbase
+ where
+ blocks = sortBlocks direction entries blockmap
+ n = length blocks
+ block_arr = {-# SCC "block_arr" #-} listArray (0,n-1) blocks
+ start = {-# SCC "start" #-} [0..n-1]
+ dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
+
+ loop
+ :: IntHeap -- blocks still to analyse
+ -> FactBase f -- current factbase (increases monotonically)
+ -> FactBase f
+
+ loop [] fbase = fbase
+ loop (ix:todo) fbase =
+ let
+ blk = block_arr ! ix
+
+ out_facts = {-# SCC "do_block" #-} do_block blk fbase
+
+ !(todo', fbase') = {-# SCC "mapFoldWithKey" #-}
+ mapFoldWithKey (updateFact join dep_blocks)
+ (todo,fbase) out_facts
+ in
+ -- trace ("analysing: " ++ show (entryLabel blk)) $
+ -- trace ("fbase': " ++ show (mapKeys fbase')) $ return ()
+ -- trace ("changed: " ++ show changed) $ return ()
+ -- trace ("to analyse: " ++ show to_analyse) $ return ()
+
+ loop todo' fbase'
+
+
+-- | fixpointing for combined analysis/rewriting
+--
+fixpoint :: forall n f. NonLocal n
+ => Direction
+ -> DataflowLattice f
+ -> (Block n C C -> Fact C f -> UniqSM (DG f n C C, Fact C f))
+ -> [Label]
+ -> LabelMap (Block n C C)
+ -> (Fact C f -> UniqSM (DG f n C C, Fact C f))
+
+fixpoint direction DataflowLattice{ fact_bot = _, fact_join = join }
+ do_block entries blockmap init_fbase
+ = do
+ -- trace ("fixpoint: " ++ show (case direction of Fwd -> True; Bwd -> False) ++ " " ++ show (mapKeys blockmap) ++ show entries ++ " " ++ show (mapKeys init_fbase)) $ return()
+ (fbase, newblocks) <- loop start init_fbase mapEmpty
+ -- trace ("fixpoint DONE: " ++ show (mapKeys fbase) ++ show (mapKeys newblocks)) $ return()
+ return (GMany NothingO newblocks NothingO,
+ mapDeleteList (mapKeys blockmap) fbase)
+ -- The successors of the Graph are the the Labels
+ -- for which we have facts and which are *not* in
+ -- the blocks of the graph
+ where
+ blocks = sortBlocks direction entries blockmap
+ n = length blocks
+ block_arr = {-# SCC "block_arr" #-} listArray (0,n-1) blocks
+ start = {-# SCC "start" #-} [0..n-1]
+ dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
+
+ loop
+ :: IntHeap
+ -> FactBase f -- current factbase (increases monotonically)
+ -> LabelMap (DBlock f n C C) -- transformed graph
+ -> UniqSM (FactBase f, LabelMap (DBlock f n C C))
+
+ loop [] fbase newblocks = return (fbase, newblocks)
+ loop (ix:todo) fbase !newblocks = do
+ let blk = block_arr ! ix
+
+ -- trace ("analysing: " ++ show (entryLabel blk)) $ return ()
+ (rg, out_facts) <- do_block blk fbase
+ let !(todo', fbase') =
+ mapFoldWithKey (updateFact join dep_blocks)
+ (todo,fbase) out_facts
+
+ -- trace ("fbase': " ++ show (mapKeys fbase')) $ return ()
+ -- trace ("changed: " ++ show changed) $ return ()
+ -- trace ("to analyse: " ++ show to_analyse) $ return ()
+
+ let newblocks' = case rg of
+ GMany _ blks _ -> mapUnion blks newblocks
+
+ loop todo' fbase' newblocks'
+
+
+{- Note [TxFactBase invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The TxFactBase is used only during a fixpoint iteration (or "sweep"),
+and accumulates facts (and the transformed code) during the fixpoint
+iteration.
+
+* tfb_fbase increases monotonically, across all sweeps
+
+* At the beginning of each sweep
+ tfb_cha = NoChange
+ tfb_lbls = {}
+
+* During each sweep we process each block in turn. Processing a block
+ is done thus:
+ 1. Read from tfb_fbase the facts for its entry label (forward)
+ or successors labels (backward)
+ 2. Transform those facts into new facts for its successors (forward)
+ or entry label (backward)
+ 3. Augment tfb_fbase with that info
+ We call the labels read in step (1) the "in-labels" of the sweep
+
+* The field tfb_lbls is the set of in-labels of all blocks that have
+ been processed so far this sweep, including the block that is
+ currently being processed. tfb_lbls is initialised to {}. It is a
+ subset of the Labels of the *original* (not transformed) blocks.
+
+* The tfb_cha field is set to SomeChange iff we decide we need to
+ perform another iteration of the fixpoint loop. It is initialsed to NoChange.
+
+ Specifically, we set tfb_cha to SomeChange in step (3) iff
+ (a) The fact in tfb_fbase for a block L changes
+ (b) L is in tfb_lbls
+ Reason: until a label enters the in-labels its accumuated fact in tfb_fbase
+ has not been read, hence cannot affect the outcome
+
+Note [Unreachable blocks]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+A block that is not in the domain of tfb_fbase is "currently unreachable".
+A currently-unreachable block is not even analyzed. Reason: consider
+constant prop and this graph, with entry point L1:
+ L1: x:=3; goto L4
+ L2: x:=4; goto L4
+ L4: if x>3 goto L2 else goto L5
+Here L2 is actually unreachable, but if we process it with bottom input fact,
+we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4.
+
+* If a currently-unreachable block is not analyzed, then its rewritten
+ graph will not be accumulated in tfb_rg. And that is good:
+ unreachable blocks simply do not appear in the output.
+
+* Note that clients must be careful to provide a fact (even if bottom)
+ for each entry point. Otherwise useful blocks may be garbage collected.
+
+* Note that updateFact must set the change-flag if a label goes from
+ not-in-fbase to in-fbase, even if its fact is bottom. In effect the
+ real fact lattice is
+ UNR
+ bottom
+ the points above bottom
+
+* Even if the fact is going from UNR to bottom, we still call the
+ client's fact_join function because it might give the client
+ some useful debugging information.
+
+* All of this only applies for *forward* ixpoints. For the backward
+ case we must treat every block as reachable; it might finish with a
+ 'return', and therefore have no successors, for example.
+-}
+
+
+-----------------------------------------------------------------------------
+-- Pieces that are shared by fixpoint and fixpoint_anal
+-----------------------------------------------------------------------------
+
+-- | Sort the blocks into the right order for analysis.
+sortBlocks :: NonLocal n => Direction -> [Label] -> LabelMap (Block n C C)
+ -> [Block n C C]
+sortBlocks direction entries blockmap
+ = case direction of Fwd -> fwd
+ Bwd -> reverse fwd
+ where fwd = forwardBlockList entries blockmap
+
+-- | construct a mapping from L -> block indices. If the fact for L
+-- changes, re-analyse the given blocks.
+mkDepBlocks :: NonLocal n => Direction -> [Block n C C] -> LabelMap [Int]
+mkDepBlocks Fwd blocks = go blocks 0 mapEmpty
+ where go [] !_ m = m
+ go (b:bs) !n m = go bs (n+1) $! mapInsert (entryLabel b) [n] m
+mkDepBlocks Bwd blocks = go blocks 0 mapEmpty
+ where go [] !_ m = m
+ go (b:bs) !n m = go bs (n+1) $! go' (successors b) m
+ where go' [] m = m
+ go' (l:ls) m = go' ls (mapInsertWith (++) l [n] m)
+
+
+-- | After some new facts have been generated by analysing a block, we
+-- fold this function over them to generate (a) a list of block
+-- indices to (re-)analyse, and (b) the new FactBase.
+--
+updateFact :: JoinFun f -> LabelMap [Int]
+ -> Label -> f -- out fact
+ -> (IntHeap, FactBase f)
+ -> (IntHeap, FactBase f)
+
+updateFact fact_join dep_blocks lbl new_fact (todo, fbase)
+ = case lookupFact lbl fbase of
+ Nothing -> let !z = mapInsert lbl new_fact fbase in (changed, z)
+ -- Note [no old fact]
+ Just old_fact ->
+ case fact_join lbl (OldFact old_fact) (NewFact new_fact) of
+ (NoChange, _) -> (todo, fbase)
+ (_, f) -> let !z = mapInsert lbl f fbase in (changed, z)
+ where
+ changed = foldr insertIntHeap todo $
+ mapFindWithDefault [] lbl dep_blocks
+
+{-
+Note [no old fact]
+
+We know that the new_fact is >= _|_, so we don't need to join. However,
+if the new fact is also _|_, and we have already analysed its block,
+we don't need to record a change. So there's a tradeoff here. It turns
+out that always recording a change is faster.
+-}
+
+-----------------------------------------------------------------------------
+-- DG: an internal data type for 'decorated graphs'
+-- TOTALLY internal to Hoopl; each block is decorated with a fact
+-----------------------------------------------------------------------------
+
+type DG f = Graph' (DBlock f)
+data DBlock f n e x = DBlock f (Block n e x) -- ^ block decorated with fact
+
+instance NonLocal n => NonLocal (DBlock f n) where
+ entryLabel (DBlock _ b) = entryLabel b
+ successors (DBlock _ b) = successors b
+
+--- constructors
+
+dgnil :: DG f n O O
+dgnilC :: DG f n C C
+dgSplice :: NonLocal n => DG f n e a -> DG f n a x -> DG f n e x
+
+---- observers
+
+normalizeGraph :: forall n f e x .
+ NonLocal n => DG f n e x
+ -> (Graph n e x, FactBase f)
+ -- A Graph together with the facts for that graph
+ -- The domains of the two maps should be identical
+
+normalizeGraph g = (mapGraphBlocks dropFact g, facts g)
+ where dropFact :: DBlock t t1 t2 t3 -> Block t1 t2 t3
+ dropFact (DBlock _ b) = b
+ facts :: DG f n e x -> FactBase f
+ facts GNil = noFacts
+ facts (GUnit _) = noFacts
+ facts (GMany _ body exit) = bodyFacts body `mapUnion` exitFacts exit
+ exitFacts :: MaybeO x (DBlock f n C O) -> FactBase f
+ exitFacts NothingO = noFacts
+ exitFacts (JustO (DBlock f b)) = mapSingleton (entryLabel b) f
+ bodyFacts :: LabelMap (DBlock f n C C) -> FactBase f
+ bodyFacts body = mapFoldWithKey f noFacts body
+ where f :: forall t a x. (NonLocal t) => Label -> DBlock a t C x -> LabelMap a -> LabelMap a
+ f lbl (DBlock f _) fb = mapInsert lbl f fb
+
+--- implementation of the constructors (boring)
+
+dgnil = GNil
+dgnilC = GMany NothingO emptyBody NothingO
+
+dgSplice = splice fzCat
+ where fzCat :: DBlock f n e O -> DBlock t n O x -> DBlock f n e x
+ fzCat (DBlock f b1) (DBlock _ b2) = DBlock f $! b1 `blockAppend` b2
+ -- NB. strictness, this function is hammered.
+
+----------------------------------------------------------------
+-- Utilities
+----------------------------------------------------------------
+
+-- Lifting based on shape:
+-- - from nodes to blocks
+-- - from facts to fact-like things
+-- Lowering back:
+-- - from fact-like things to facts
+-- Note that the latter two functions depend only on the entry shape.
+class ShapeLifter e x where
+ singletonDG :: f -> n e x -> DG f n e x
+ fwdEntryFact :: NonLocal n => n e x -> f -> Fact e f
+ fwdEntryLabel :: NonLocal n => n e x -> MaybeC e [Label]
+ ftransfer :: FwdTransfer n f -> n e x -> f -> Fact x f
+ frewrite :: FwdRewrite m n f -> n e x
+ -> f -> m (Maybe (Graph n e x, FwdRewrite m n f))
+-- @ end node.tex
+ bwdEntryFact :: NonLocal n => DataflowLattice f -> n e x -> Fact e f -> f
+ btransfer :: BwdTransfer n f -> n e x -> Fact x f -> f
+ brewrite :: BwdRewrite m n f -> n e x
+ -> Fact x f -> m (Maybe (Graph n e x, BwdRewrite m n f))
+
+instance ShapeLifter C O where
+ singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil))
+ fwdEntryFact n f = mapSingleton (entryLabel n) f
+ bwdEntryFact lat n fb = getFact lat (entryLabel n) fb
+ ftransfer (FwdTransfer3 (ft, _, _)) n f = ft n f
+ btransfer (BwdTransfer3 (bt, _, _)) n f = bt n f
+ frewrite (FwdRewrite3 (fr, _, _)) n f = fr n f
+ brewrite (BwdRewrite3 (br, _, _)) n f = br n f
+ fwdEntryLabel n = JustC [entryLabel n]
+
+instance ShapeLifter O O where
+ singletonDG f = gUnitOO . DBlock f . BMiddle
+ fwdEntryFact _ f = f
+ bwdEntryFact _ _ f = f
+ ftransfer (FwdTransfer3 (_, ft, _)) n f = ft n f
+ btransfer (BwdTransfer3 (_, bt, _)) n f = bt n f
+ frewrite (FwdRewrite3 (_, fr, _)) n f = fr n f
+ brewrite (BwdRewrite3 (_, br, _)) n f = br n f
+ fwdEntryLabel _ = NothingC
+
+instance ShapeLifter O C where
+ singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n))
+ fwdEntryFact _ f = f
+ bwdEntryFact _ _ f = f
+ ftransfer (FwdTransfer3 (_, _, ft)) n f = ft n f
+ btransfer (BwdTransfer3 (_, _, bt)) n f = bt n f
+ frewrite (FwdRewrite3 (_, _, fr)) n f = fr n f
+ brewrite (BwdRewrite3 (_, _, br)) n f = br n f
+ fwdEntryLabel _ = NothingC
+
+{-
+class ShapeLifter e x where
+ singletonDG :: f -> n e x -> DG f n e x
+
+instance ShapeLifter C O where
+ singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil))
+
+instance ShapeLifter O O where
+ singletonDG f = gUnitOO . DBlock f . BMiddle
+
+instance ShapeLifter O C where
+ singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n))
+-}
+
+-- Fact lookup: the fact `orelse` bottom
+getFact :: DataflowLattice f -> Label -> FactBase f -> f
+getFact lat l fb = case lookupFact l fb of Just f -> f
+ Nothing -> fact_bot lat
+
+
+
+{- Note [Respects fuel]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-}
+-- $fuel
+-- A value of type 'FwdRewrite' or 'BwdRewrite' /respects fuel/ if
+-- any function contained within the value satisfies the following properties:
+--
+-- * When fuel is exhausted, it always returns 'Nothing'.
+--
+-- * When it returns @Just g rw@, it consumes /exactly/ one unit
+-- of fuel, and new rewrite 'rw' also respects fuel.
+--
+-- Provided that functions passed to 'mkFRewrite', 'mkFRewrite3',
+-- 'mkBRewrite', and 'mkBRewrite3' are not aware of the fuel supply,
+-- the results respect fuel.
+--
+-- It is an /unchecked/ run-time error for the argument passed to 'wrapFR',
+-- 'wrapFR2', 'wrapBR', or 'warpBR2' to return a function that does not respect fuel.
+
+-- -----------------------------------------------------------------------------
+-- a Heap of Int
+
+-- We should really use a proper Heap here, but my attempts to make
+-- one have not succeeded in beating the simple ordered list. Another
+-- alternative is IntSet (using deleteFindMin), but that was also
+-- slower than the ordered list in my experiments --SDM 25/1/2012
+
+type IntHeap = [Int] -- ordered
+
+insertIntHeap :: Int -> [Int] -> [Int]
+insertIntHeap x [] = [x]
+insertIntHeap x (y:ys)
+ | x < y = x : y : ys
+ | x == y = x : ys
+ | otherwise = y : insertIntHeap x ys
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 04586b1029..ecd4d4f985 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -1,29 +1,19 @@
{-# LANGUAGE GADTs #-}
--- ToDo: remove -fno-warn-warnings-deprecations
-{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
--- ToDo: remove -fno-warn-incomplete-patterns
-{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
-
--- Module for building CmmAGraphs.
-
--- As the CmmAGraph is a wrapper over Graph CmmNode O x, it is different
--- from Hoopl's AGraph. The current clients expect functions with the
--- same names Hoopl uses, so this module cannot be in the same namespace
--- as Compiler.Hoopl.
-
module MkGraph
- ( CmmAGraph
- , emptyAGraph, (<*>), catAGraphs, outOfLine
- , mkLabel, mkMiddle, mkLast
- , withFreshLabel, withUnique, lgraphOfAGraph, labelAGraph
+ ( CmmAGraph, CgStmt(..)
+ , (<*>), catAGraphs
+ , mkLabel, mkMiddle, mkLast, outOfLine
+ , lgraphOfAGraph, labelAGraph
, stackStubExpr
- , mkNop, mkAssign, mkStore, mkCall, mkCmmCall, mkSafeCall, mkUnsafeCall, mkFinalCall
- , mkJump, mkDirectJump, mkForeignJump, mkJumpGC, mkCbranch, mkSwitch
- , mkReturn, mkReturnSimple, mkComment, mkCallEntry
- , mkBranch, mkCmmIfThenElse, mkCmmIfThen, mkCmmWhileDo
- , copyInOflow, copyInSlot, copyOutOflow, copyOutSlot
+ , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo
+ , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC
+ , mkCbranch, mkSwitch
+ , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch
+ , copyInOflow, copyOutOflow
+ , noExtraStack
+ , toCall, Transfer(..)
)
where
@@ -31,250 +21,232 @@ import BlockId
import Cmm
import CmmCallConv (assignArgumentsPos, ParamLocation(..))
+
import Compiler.Hoopl hiding (Unique, (<*>), mkFirst, mkMiddle, mkLast, mkLabel, mkBranch, Shape(..))
-import qualified Compiler.Hoopl as H
-import Compiler.Hoopl.GHC (uniqueToLbl)
import FastString
import ForeignCall
import Outputable
import Prelude hiding (succ)
import SMRep (ByteOff)
-import StaticFlags
-import Unique
import UniqSupply
-import Util
+import OrdList
#include "HsVersions.h"
-{-
-A 'CmmAGraph' is an abstract version of a 'Graph CmmNode O x' from module
-'Cmm'. The difference is that the 'CmmAGraph' can be eigher open of closed at
-exit and it can supply fresh Labels and Uniques.
-
-It also supports a splicing operation <*>, which is different from the Hoopl's
-<*>, because it splices two CmmAGraphs. Specifically, it can splice Graph
-O C and Graph O x. In this case, the open beginning of the second graph is
-thrown away. In the debug mode this sequence is checked to be empty or
-containing a branch (see note [Branch follows branch]).
-
-When an CmmAGraph open at exit is being converted to a CmmGraph, the output
-exit sequence is considered unreachable. If the graph consist of one block
-only, if it not the case and we crash. Otherwise we just throw the exit
-sequence away (and in debug mode we test that it really was unreachable).
--}
-
-{-
-Node [Branch follows branch]
-============================
-Why do we say it's ok for a Branch to follow a Branch?
-Because the standard constructor mkLabel has fall-through
-semantics. So if you do a mkLabel, you finish the current block,
-giving it a label, and start a new one that branches to that label.
-Emitting a Branch at this point is fine:
- goto L1; L2: ...stuff...
--}
-
-data CmmGraphOC = Opened (Graph CmmNode O O)
- | Closed (Graph CmmNode O C)
-type CmmAGraph = UniqSM CmmGraphOC -- Graph open at entry
-
-{-
-MS: I began with
- newtype CmmAGraph = forall x. AG (UniqSM (Graph CmmNode O x))
-but that does not work well, because we cannot take the graph
-out of the monad -- we do not know the type of what we would take
-out and pattern matching does not help, as we cannot pattern match
-on a graph inside the monad.
--}
-data Transfer = Call | Jump | Ret deriving Eq
+-----------------------------------------------------------------------------
+-- Building Graphs
+
+
+-- | CmmAGraph is a chunk of code consisting of:
+--
+-- * ordinary statements (assignments, stores etc.)
+-- * jumps
+-- * labels
+-- * out-of-line labelled blocks
+--
+-- The semantics is that control falls through labels and out-of-line
+-- blocks. Everything after a jump up to the next label is by
+-- definition unreachable code, and will be discarded.
+--
+-- Two CmmAGraphs can be stuck together with <*>, with the meaning that
+-- control flows from the first to the second.
+--
+-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends)
+-- by providing a label for the entry point; see 'labelAGraph'.
+--
+type CmmAGraph = OrdList CgStmt
+
+data CgStmt
+ = CgLabel BlockId
+ | CgStmt (CmmNode O O)
+ | CgLast (CmmNode O C)
+ | CgFork BlockId CmmAGraph
+
+flattenCmmAGraph :: BlockId -> CmmAGraph -> CmmGraph
+flattenCmmAGraph id stmts =
+ CmmGraph { g_entry = id,
+ g_graph = GMany NothingO body NothingO }
+ where
+ (block, blocks) = flatten (fromOL stmts)
+ entry = blockJoinHead (CmmEntry id) block
+ body = foldr addBlock emptyBody (entry:blocks)
+
+ flatten :: [CgStmt] -> (Block CmmNode O C, [Block CmmNode C C])
+ flatten [] = panic "flatten []"
+
+ -- A label at the end of a function or fork: this label must not be reachable,
+ -- but it might be referred to from another BB that also isn't reachable.
+ -- Eliminating these has to be done with a dead-code analysis. For now,
+ -- we just make it into a well-formed block by adding a recursive jump.
+ flatten [CgLabel id]
+ = (goto_id, [blockJoinHead (CmmEntry id) goto_id] )
+ where goto_id = blockJoinTail emptyBlock (CmmBranch id)
+
+ -- A jump/branch: throw away all the code up to the next label, because
+ -- it is unreachable. Be careful to keep forks that we find on the way.
+ flatten (CgLast stmt : stmts)
+ = case dropWhile isOrdinaryStmt stmts of
+ [] ->
+ ( sing, [] )
+ [CgLabel id] ->
+ ( sing, [blockJoin (CmmEntry id) emptyBlock (CmmBranch id)] )
+ (CgLabel id : stmts) ->
+ ( sing, blockJoinHead (CmmEntry id) block : blocks )
+ where (block,blocks) = flatten stmts
+ (CgFork fork_id stmts : ss) ->
+ flatten (CgFork fork_id stmts : CgLast stmt : ss)
+ _ -> panic "MkGraph.flatten"
+ where
+ sing = blockJoinTail emptyBlock stmt
+
+ flatten (s:ss) =
+ case s of
+ CgStmt stmt -> (blockCons stmt block, blocks)
+ CgLabel id -> (blockJoinTail emptyBlock (CmmBranch id),
+ blockJoinHead (CmmEntry id) block : blocks)
+ CgFork fork_id stmts ->
+ (block, blockJoinHead (CmmEntry fork_id) fork_block : fork_blocks ++ blocks)
+ where (fork_block, fork_blocks) = flatten (fromOL stmts)
+ _ -> panic "MkGraph.flatten"
+ where (block,blocks) = flatten ss
+
+isOrdinaryStmt :: CgStmt -> Bool
+isOrdinaryStmt (CgStmt _) = True
+isOrdinaryStmt (CgLast _) = True
+isOrdinaryStmt _ = False
+
+
---------- AGraph manipulation
-emptyAGraph :: CmmAGraph
(<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph
+(<*>) = appOL
+
catAGraphs :: [CmmAGraph] -> CmmAGraph
+catAGraphs = concatOL
+
+-- | created a sequence "goto id; id:" as an AGraph
+mkLabel :: BlockId -> CmmAGraph
+mkLabel bid = unitOL (CgLabel bid)
+
+-- | creates an open AGraph from a given node
+mkMiddle :: CmmNode O O -> CmmAGraph
+mkMiddle middle = unitOL (CgStmt middle)
-mkLabel :: BlockId -> CmmAGraph -- created a sequence "goto id; id:" as an AGraph
-mkMiddle :: CmmNode O O -> CmmAGraph -- creates an open AGraph from a given node
-mkLast :: CmmNode O C -> CmmAGraph -- created a closed AGraph from a given node
+-- | created a closed AGraph from a given node
+mkLast :: CmmNode O C -> CmmAGraph
+mkLast last = unitOL (CgLast last)
-withFreshLabel :: String -> (BlockId -> CmmAGraph) -> CmmAGraph
-withUnique :: (Unique -> CmmAGraph) -> CmmAGraph
+-- | A labelled code block; should end in a last node
+outOfLine :: BlockId -> CmmAGraph -> CmmAGraph
+outOfLine l g = unitOL (CgFork l g)
+-- | allocate a fresh label for the entry point
lgraphOfAGraph :: CmmAGraph -> UniqSM CmmGraph
- -- ^ allocate a fresh label for the entry point
+lgraphOfAGraph g = do u <- getUniqueM
+ return (flattenCmmAGraph (mkBlockId u) g)
+
+-- | use the given BlockId as the label of the entry point
labelAGraph :: BlockId -> CmmAGraph -> UniqSM CmmGraph
- -- ^ use the given BlockId as the label of the entry point
+labelAGraph lbl ag = return (flattenCmmAGraph lbl ag)
---------- No-ops
mkNop :: CmmAGraph
+mkNop = nilOL
+
mkComment :: FastString -> CmmAGraph
+#ifdef DEBUG
+-- SDM: generating all those comments takes time, this saved about 4% for me
+mkComment fs = mkMiddle $ CmmComment fs
+#else
+mkComment _ = nilOL
+#endif
---------- Assignment and store
mkAssign :: CmmReg -> CmmExpr -> CmmAGraph
-mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
+mkAssign l r = mkMiddle $ CmmAssign l r
----------- Calls
-mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] ->
- UpdFrameOffset -> CmmAGraph
-mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] ->
- UpdFrameOffset -> CmmAGraph
- -- Native C-- calling convention
-mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset -> Bool -> CmmAGraph
-mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
-mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
- -- Never returns; like exit() or barf()
+mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
+mkStore l r = mkMiddle $ CmmStore l r
---------- Control transfer
-mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph
-mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph
+mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkJump e actuals updfr_off =
+ lastWithArgs Jump Old NativeNodeCall actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkDirectJump e actuals updfr_off =
+ lastWithArgs Jump Old NativeDirectCall actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkJumpGC e actuals updfr_off =
+ lastWithArgs Jump Old GC actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset
+ -> CmmAGraph
+mkForeignJump conv e actuals updfr_off =
+ mkForeignJumpExtra conv e actuals updfr_off noExtraStack
+
+mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual]
+ -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)])
+ -> CmmAGraph
+mkForeignJumpExtra conv e actuals updfr_off extra_stack =
+ lastWithArgsAndExtraStack Jump Old conv actuals updfr_off extra_stack $
+ toCall e Nothing updfr_off 0
+
+mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph
+mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot)
+
+mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph
+mkSwitch e tbl = mkLast $ CmmSwitch e tbl
+
mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkReturn e actuals updfr_off =
+ lastWithArgs Ret Old NativeReturn actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkReturnSimple actuals updfr_off =
+ mkReturn e actuals updfr_off
+ where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
mkBranch :: BlockId -> CmmAGraph
-mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph
-mkCmmIfThen :: CmmExpr -> CmmAGraph -> CmmAGraph
-mkCmmWhileDo :: CmmExpr -> CmmAGraph -> CmmAGraph
-
-outOfLine :: CmmAGraph -> CmmAGraph
--- ^ The argument is an CmmAGraph that must have an
--- empty entry sequence and be closed at the end.
--- The result is a new CmmAGraph that is open at the
--- end and goes directly from entry to exit, with the
--- original graph sitting to the side out-of-line.
---
--- Example: mkMiddle (x = 3)
--- <*> outOfLine (mkLabel L <*> ...stuff...)
--- <*> mkMiddle (y = x)
--- Control will flow directly from x=3 to y=x;
--- the block starting with L is "on the side".
---
--- N.B. algebraically forall g g' : g <*> outOfLine g' == outOfLine g' <*> g
+mkBranch bid = mkLast (CmmBranch bid)
+
+mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset
+ -> CmmAGraph
+mkFinalCall f _ actuals updfr_off =
+ lastWithArgs Call Old NativeDirectCall actuals updfr_off $
+ toCall f Nothing updfr_off 0
+
+mkCallReturnsTo :: CmmExpr -> Convention -> [CmmActual]
+ -> BlockId
+ -> ByteOff
+ -> UpdFrameOffset
+ -> (ByteOff, [(CmmExpr,ByteOff)])
+ -> CmmAGraph
+mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do
+ lastWithArgsAndExtraStack Call (Young ret_lbl) callConv actuals
+ updfr_off extra_stack $
+ toCall f (Just ret_lbl) updfr_off ret_off
+
+mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
+mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
+
--------------------------------------------------------------------------
--- ================ IMPLEMENTATION ================--
-
---------------------------------------------------
--- Raw CmmAGraph handling
-
-emptyAGraph = return $ Opened emptyGraph
-ag <*> ah = do g <- ag
- h <- ah
- return (case (g, h) of
- (Opened g, Opened h) -> Opened $ g H.<*> h
- (Opened g, Closed h) -> Closed $ g H.<*> h
- (Closed g, Opened GNil) -> Closed g
- (Closed g, Opened (GUnit e)) -> note_unreachable e $ Closed g
- (Closed g, Opened (GMany (JustO e) b x)) -> note_unreachable e $ Opened $ g H.|*><*| GMany NothingO b x
- (Closed g, Closed (GMany (JustO e) b x)) -> note_unreachable e $ Closed $ g H.|*><*| GMany NothingO b x
- :: CmmGraphOC)
-catAGraphs = foldl (<*>) emptyAGraph
-
-outOfLine ag = withFreshLabel "outOfLine" $ \l ->
- do g <- ag
- return (case g of
- Closed (GMany (JustO e) b _) -> note_unreachable e $ Opened $
- GMany (JustO $ BLast $ CmmBranch l) b (JustO $ BFirst $ CmmEntry l)
- _ -> panic "outOfLine"
- :: CmmGraphOC)
-
-note_unreachable :: Block CmmNode O x -> a -> a
-note_unreachable block graph =
- ASSERT (block_is_empty_or_label) -- Note [Branch follows branch]
- graph
- where block_is_empty_or_label :: Bool
- block_is_empty_or_label = case blockToNodeList block of
- (NothingC, [], NothingC) -> True
- (NothingC, [], JustC (CmmBranch _)) -> True
- _ -> False
-
-mkLabel bid = return $ Opened $ H.mkLast (CmmBranch bid) |*><*| H.mkFirst (CmmEntry bid)
-mkMiddle middle = return $ Opened $ H.mkMiddle middle
-mkLast last = return $ Closed $ H.mkLast last
-
-withUnique f = getUniqueM >>= f
-withFreshLabel _name f = getUniqueM >>= f . uniqueToLbl . intToUnique . getKey
-lgraphOfAGraph g = do u <- getUniqueM
- labelAGraph (mkBlockId u) g
-
-labelAGraph lbl ag = do g <- ag
- return $ CmmGraph {g_entry=lbl, g_graph=H.mkFirst (CmmEntry lbl) H.<*> closed g}
- where closed :: CmmGraphOC -> Graph CmmNode O C
- closed (Closed g) = g
- closed (Opened g@(GMany entry body (JustO exit))) =
- ASSERT (entryLabel exit `notElem` map entryLabel (postorder_dfs g))
- GMany entry body NothingO
- closed (Opened _) = panic "labelAGraph"
-
---------------------------------------------------
--- CmmAGraph constructions
-
-mkNop = emptyAGraph
-mkComment fs = mkMiddle $ CmmComment fs
-mkStore l r = mkMiddle $ CmmStore l r
-
--- NEED A COMPILER-DEBUGGING FLAG HERE
--- Sanity check: any value assigned to a pointer must be non-zero.
--- If it's 0, cause a crash immediately.
-mkAssign l r = if opt_StubDeadValues then assign l r <*> check l else assign l r
- where assign l r = mkMiddle (CmmAssign l r)
- check (CmmGlobal _) = mkNop
- check l@(CmmLocal reg) = -- if a ptr arg is NULL, cause a crash!
- if isGcPtrType ty then
- mkCmmIfThen (CmmMachOp (MO_Eq w) [r, stackStubExpr w])
- (assign l (CmmLoad (CmmLit (CmmInt 0 w)) ty))
- else mkNop
- where ty = localRegType reg
- w = typeWidth ty
- r = CmmReg l
-- Why are we inserting extra blocks that simply branch to the successors?
-- Because in addition to the branch instruction, @mkBranch@ will insert
-- a necessary adjustment to the stack pointer.
-mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot)
-mkSwitch e tbl = mkLast $ CmmSwitch e tbl
-
-mkSafeCall t fs as upd i = withFreshLabel "safe call" $ body
- where
- body k =
- ( mkStore (CmmStackSlot (CallArea (Young k)) (widthInBytes wordWidth))
- (CmmLit (CmmBlock k))
- <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i})
- <*> mkLabel k)
-mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
-mkBranch bid = mkLast (CmmBranch bid)
-
-mkCmmIfThenElse e tbranch fbranch =
- withFreshLabel "end of if" $ \endif ->
- withFreshLabel "start of then" $ \tid ->
- withFreshLabel "start of else" $ \fid ->
- mkCbranch e tid fid <*>
- mkLabel tid <*> tbranch <*> mkBranch endif <*>
- mkLabel fid <*> fbranch <*> mkLabel endif
-
-mkCmmIfThen e tbranch
- = withFreshLabel "end of if" $ \endif ->
- withFreshLabel "start of then" $ \tid ->
- mkCbranch e tid endif <*>
- mkLabel tid <*> tbranch <*> mkLabel endif
-
-mkCmmWhileDo e body =
- withFreshLabel "loop test" $ \test ->
- withFreshLabel "loop head" $ \head ->
- withFreshLabel "end while" $ \endwhile ->
- -- Forrest Baskett's while-loop layout
- mkBranch test <*> mkLabel head <*> body
- <*> mkLabel test <*> mkCbranch e head endwhile
- <*> mkLabel endwhile
-- For debugging purposes, we can stub out dead stack slots:
stackStubExpr :: Width -> CmmExpr
@@ -286,12 +258,9 @@ stackStubExpr w = CmmLit (CmmInt 0 w)
-- Therefore, for copying arguments and results, we provide different
-- functions to pass the arguments in an overflow area and to pass them in spill slots.
copyInOflow :: Convention -> Area -> [CmmFormal] -> (Int, CmmAGraph)
-copyInSlot :: Convention -> [CmmFormal] -> [CmmNode O O]
-copyOutSlot :: Convention -> [LocalReg] -> [CmmNode O O]
copyInOflow conv area formals = (offset, catAGraphs $ map mkMiddle nodes)
where (offset, nodes) = copyIn oneCopyOflowI conv area formals
-copyInSlot c f = snd $ copyIn oneCopySlotI c (panic "no area for copying to slots") f
type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) ->
(ByteOff, [CmmNode O O])
@@ -312,104 +281,86 @@ copyIn oflow conv area formals =
adjust rst x@(_, RegisterParam _) = x : rst
-- Copy-in one arg, using overflow space if needed.
-oneCopyOflowI, oneCopySlotI :: SlotCopier
+oneCopyOflowI :: SlotCopier
oneCopyOflowI area (reg, off) (n, ms) =
(max n off, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) : ms)
where ty = localRegType reg
--- Copy-in one arg, using spill slots if needed -- used for calling conventions at
--- a procpoint that is not a return point. The offset is irrelevant here...
-oneCopySlotI _ (reg, _) (n, ms) =
- (n, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg) w) ty) : ms)
- where ty = localRegType reg
- w = widthInBytes (typeWidth ty)
-
-
-- Factoring out the common parts of the copyout functions yielded something
-- more complicated:
-copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset ->
- (Int, CmmAGraph)
+data Transfer = Call | Jump | Ret deriving Eq
+
+copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual]
+ -> UpdFrameOffset
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff
+ -> (Int, CmmAGraph)
+
-- Generate code to move the actual parameters into the locations
--- required by the calling convention. This includes a store for the return address.
+-- required by the calling convention. This includes a store for the
+-- return address.
--
--- The argument layout function ignores the pointer to the info table, so we slot that
--- in here. When copying-out to a young area, we set the info table for return
--- and adjust the offsets of the other parameters.
--- If this is a call instruction, we adjust the offsets of the other parameters.
-copyOutOflow conv transfer area@(CallArea a) actuals updfr_off
- = foldr co (init_offset, emptyAGraph) args'
+-- The argument layout function ignores the pointer to the info table,
+-- so we slot that in here. When copying-out to a young area, we set
+-- the info table for return and adjust the offsets of the other
+-- parameters. If this is a call instruction, we adjust the offsets
+-- of the other parameters.
+copyOutOflow conv transfer area actuals updfr_off
+ (extra_stack_off, extra_stack_stuff)
+ = foldr co (init_offset, mkNop) (args' ++ stack_params)
where
co (v, RegisterParam r) (n, ms) = (n, mkAssign (CmmGlobal r) v <*> ms)
co (v, StackParam off) (n, ms) = (max n off, mkStore (CmmStackSlot area off) v <*> ms)
+ stack_params = [ (e, StackParam (off + init_offset))
+ | (e,off) <- extra_stack_stuff ]
+
(setRA, init_offset) =
- case a of Young id -> id `seq` -- Generate a store instruction for
- -- the return address if making a call
+ case area of
+ Young id -> id `seq` -- Generate a store instruction for
+ -- the return address if making a call
if transfer == Call then
([(CmmLit (CmmBlock id), StackParam init_offset)],
widthInBytes wordWidth)
else ([], 0)
- Old -> ([], updfr_off)
+ Old -> ([], updfr_off)
+
+ arg_offset = init_offset + extra_stack_off
args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it
args = assignArgumentsPos conv cmmExprType actuals
args' = foldl adjust setRA args
- where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst
+ where adjust rst (v, StackParam off) = (v, StackParam (off + arg_offset)) : rst
adjust rst x@(_, RegisterParam _) = x : rst
-copyOutOflow _ _ (RegSlot _) _ _ = panic "cannot copy arguments into a register slot"
--- Args passed only in registers and stack slots; no overflow space.
--- No return address may apply!
-copyOutSlot conv actuals = foldr co [] args
- where co (v, RegisterParam r) ms = CmmAssign (CmmGlobal r) (toExp v) : ms
- co (v, StackParam off) ms = CmmStore (CmmStackSlot (RegSlot v) off) (toExp v) : ms
- toExp r = CmmReg (CmmLocal r)
- args = assignArgumentsPos conv localRegType actuals
mkCallEntry :: Convention -> [CmmFormal] -> (Int, CmmAGraph)
-mkCallEntry conv formals = copyInOflow conv (CallArea Old) formals
+mkCallEntry conv formals = copyInOflow conv Old formals
-lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset ->
- (ByteOff -> CmmAGraph) -> CmmAGraph
+lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual]
+ -> UpdFrameOffset
+ -> (ByteOff -> CmmAGraph)
+ -> CmmAGraph
lastWithArgs transfer area conv actuals updfr_off last =
- let (outArgs, copies) = copyOutOflow conv transfer area actuals updfr_off in
+ lastWithArgsAndExtraStack transfer area conv actuals
+ updfr_off noExtraStack last
+
+lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual]
+ -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)])
+ -> (ByteOff -> CmmAGraph)
+ -> CmmAGraph
+lastWithArgsAndExtraStack transfer area conv actuals updfr_off
+ extra_stack last =
+ let (outArgs, copies) = copyOutOflow conv transfer area actuals
+ updfr_off extra_stack in
copies <*> last outArgs
--- The area created for the jump and return arguments is the same area as the
--- procedure entry.
-old :: Area
-old = CallArea Old
-toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff -> CmmAGraph
+noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)])
+noExtraStack = (0,[])
+
+toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff
+ -> CmmAGraph
toCall e cont updfr_off res_space arg_space =
mkLast $ CmmCall e cont arg_space res_space updfr_off
-mkJump e actuals updfr_off =
- lastWithArgs Jump old NativeNodeCall actuals updfr_off $ toCall e Nothing updfr_off 0
-mkDirectJump e actuals updfr_off =
- lastWithArgs Jump old NativeDirectCall actuals updfr_off $ toCall e Nothing updfr_off 0
-mkJumpGC e actuals updfr_off =
- lastWithArgs Jump old GC actuals updfr_off $ toCall e Nothing updfr_off 0
-mkForeignJump conv e actuals updfr_off =
- lastWithArgs Jump old conv actuals updfr_off $ toCall e Nothing updfr_off 0
-mkReturn e actuals updfr_off =
- lastWithArgs Ret old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0
- -- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord
-mkReturnSimple actuals updfr_off =
- lastWithArgs Ret old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0
- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord
-
-mkFinalCall f _ actuals updfr_off =
- lastWithArgs Call old NativeDirectCall actuals updfr_off $ toCall f Nothing updfr_off 0
-
-mkCmmCall f results actuals = mkCall f (NativeDirectCall, NativeReturn) results actuals
-
--- I'm dropping the SRT, but that should be okay: we plan to reconstruct it later.
-mkCall f (callConv, retConv) results actuals updfr_off =
- withFreshLabel "call successor" $ \k ->
- let area = CallArea $ Young k
- (off, copyin) = copyInOflow retConv area results
- copyout = lastWithArgs Call area callConv actuals updfr_off
- (toCall f (Just k) updfr_off off)
- in (copyout <*> mkLabel k <*> copyin)
diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs
index fc4706c8c4..aa83afbf8d 100644
--- a/compiler/cmm/OldCmm.hs
+++ b/compiler/cmm/OldCmm.hs
@@ -9,9 +9,7 @@
module OldCmm (
CmmGroup, GenCmmGroup, RawCmmGroup, CmmDecl, RawCmmDecl,
ListGraph(..),
-
- CmmInfo(..), CmmInfoTable(..), ClosureTypeInfo(..), UpdateFrame(..),
-
+ CmmInfoTable(..), ClosureTypeInfo(..),
CmmStatic(..), CmmStatics(..), CmmFormal, CmmActual,
cmmMapGraph, cmmTopMapGraph,
@@ -49,24 +47,6 @@ import ForeignCall
-- with assembly-language labels.
-----------------------------------------------------------------------------
--- Info Tables
------------------------------------------------------------------------------
-
-data CmmInfo
- = CmmInfo
- (Maybe BlockId) -- GC target. Nothing <=> CPS won't do stack check
- -- JD: NOT USED BY NEW CODE GEN
- (Maybe UpdateFrame) -- Update frame
- CmmInfoTable -- Info table
-
--- | A frame that is to be pushed before entry to the function.
--- Used to handle 'update' frames.
-data UpdateFrame
- = UpdateFrame
- CmmExpr -- Frame header. Behaves like the target of a 'jump'.
- [CmmExpr] -- Frame remainder. Behaves like the arguments of a 'jump'.
-
------------------------------------------------------------------------------
-- Cmm, CmmDecl, CmmBasicBlock
-----------------------------------------------------------------------------
@@ -85,8 +65,8 @@ data UpdateFrame
newtype ListGraph i = ListGraph [GenBasicBlock i]
-- | Cmm with the info table as a data type
-type CmmGroup = GenCmmGroup CmmStatics CmmInfo (ListGraph CmmStmt)
-type CmmDecl = GenCmmDecl CmmStatics CmmInfo (ListGraph CmmStmt)
+type CmmGroup = GenCmmGroup CmmStatics CmmInfoTable (ListGraph CmmStmt)
+type CmmDecl = GenCmmDecl CmmStatics CmmInfoTable (ListGraph CmmStmt)
-- | Cmm with the info tables converted to a list of 'CmmStatic' along with the info
-- table label. If we are building without tables-next-to-code there will be no statics
@@ -225,16 +205,9 @@ instance UserOfLocalRegs CmmCallTarget where
foldRegsUsed f set (CmmCallee e _) = foldRegsUsed f set e
foldRegsUsed f set (CmmPrim _ mStmts) = foldRegsUsed f set mStmts
-instance UserOfSlots CmmCallTarget where
- foldSlotsUsed f set (CmmCallee e _) = foldSlotsUsed f set e
- foldSlotsUsed _ set (CmmPrim {}) = set
-
instance UserOfLocalRegs a => UserOfLocalRegs (CmmHinted a) where
foldRegsUsed f set a = foldRegsUsed f set (hintlessCmm a)
-instance UserOfSlots a => UserOfSlots (CmmHinted a) where
- foldSlotsUsed f set a = foldSlotsUsed f set (hintlessCmm a)
-
instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmHinted a) where
foldRegsDefd f set a = foldRegsDefd f set (hintlessCmm a)
diff --git a/compiler/cmm/OldCmmLint.hs b/compiler/cmm/OldCmmLint.hs
new file mode 100644
index 0000000000..72e40ce4f8
--- /dev/null
+++ b/compiler/cmm/OldCmmLint.hs
@@ -0,0 +1,209 @@
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-- CmmLint: checking the correctness of Cmm statements and expressions
+--
+-----------------------------------------------------------------------------
+
+{-# OPTIONS -fno-warn-tabs #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and
+-- detab the module (please do the detabbing in a separate patch). See
+-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
+-- for details
+
+module OldCmmLint (
+ cmmLint, cmmLintTop
+ ) where
+
+import BlockId
+import OldCmm
+import CLabel
+import Outputable
+import OldPprCmm()
+import Constants
+import FastString
+import Platform
+
+import Data.Maybe
+
+-- -----------------------------------------------------------------------------
+-- Exported entry points:
+
+cmmLint :: (Outputable d, Outputable h)
+ => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc
+cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops
+
+cmmLintTop :: (Outputable d, Outputable h)
+ => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc
+cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top
+
+runCmmLint :: Outputable a
+ => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc
+runCmmLint _ l p =
+ case unCL (l p) of
+ Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"),
+ nest 2 err,
+ ptext $ sLit ("Program was:"),
+ nest 2 (ppr p)])
+ Right _ -> Nothing
+
+lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint ()
+lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks))
+ = addLintInfo (text "in proc " <> pprCLabel platform lbl) $
+ let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks
+ in mapM_ (lintCmmBlock platform labels) blocks
+
+lintCmmDecl _ (CmmData {})
+ = return ()
+
+lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint ()
+lintCmmBlock platform labels (BasicBlock id stmts)
+ = addLintInfo (text "in basic block " <> ppr id) $
+ mapM_ (lintCmmStmt platform labels) stmts
+
+-- -----------------------------------------------------------------------------
+-- lintCmmExpr
+
+-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking
+-- byte/word mismatches.
+
+lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType
+lintCmmExpr platform (CmmLoad expr rep) = do
+ _ <- lintCmmExpr platform expr
+ -- Disabled, if we have the inlining phase before the lint phase,
+ -- we can have funny offsets due to pointer tagging. -- EZY
+ -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
+ -- cmmCheckWordAddress expr
+ return rep
+lintCmmExpr platform expr@(CmmMachOp op args) = do
+ tys <- mapM (lintCmmExpr platform) args
+ if map (typeWidth . cmmExprType) args == machOpArgReps op
+ then cmmCheckMachOp op args tys
+ else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op)
+lintCmmExpr platform (CmmRegOff reg offset)
+ = lintCmmExpr platform (CmmMachOp (MO_Add rep)
+ [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
+ where rep = typeWidth (cmmRegType reg)
+lintCmmExpr _ expr =
+ return (cmmExprType expr)
+
+-- Check for some common byte/word mismatches (eg. Sp + 1)
+cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType
+cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys
+ = cmmCheckMachOp op [reg, lit] tys
+cmmCheckMachOp op _ tys
+ = return (machOpResultType op tys)
+
+isOffsetOp :: MachOp -> Bool
+isOffsetOp (MO_Add _) = True
+isOffsetOp (MO_Sub _) = True
+isOffsetOp _ = False
+
+-- This expression should be an address from which a word can be loaded:
+-- check for funny-looking sub-word offsets.
+_cmmCheckWordAddress :: CmmExpr -> CmmLint ()
+_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)])
+ | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0
+ = cmmLintDubiousWordOffset e
+_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg])
+ | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0
+ = cmmLintDubiousWordOffset e
+_cmmCheckWordAddress _
+ = return ()
+
+-- No warnings for unaligned arithmetic with the node register,
+-- which is used to extract fields from tagged constructor closures.
+notNodeReg :: CmmExpr -> Bool
+notNodeReg (CmmReg reg) | reg == nodeReg = False
+notNodeReg _ = True
+
+lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint ()
+lintCmmStmt platform labels = lint
+ where lint (CmmNop) = return ()
+ lint (CmmComment {}) = return ()
+ lint stmt@(CmmAssign reg expr) = do
+ erep <- lintCmmExpr platform expr
+ let reg_ty = cmmRegType reg
+ if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
+ then return ()
+ else cmmLintAssignErr stmt erep reg_ty
+ lint (CmmStore l r) = do
+ _ <- lintCmmExpr platform l
+ _ <- lintCmmExpr platform r
+ return ()
+ lint (CmmCall target _res args _) =
+ do lintTarget platform labels target
+ mapM_ (lintCmmExpr platform . hintlessCmm) args
+ lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e
+ lint (CmmSwitch e branches) = do
+ mapM_ checkTarget $ catMaybes branches
+ erep <- lintCmmExpr platform e
+ if (erep `cmmEqType_ignoring_ptrhood` bWord)
+ then return ()
+ else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <>
+ text " :: " <> ppr erep)
+ lint (CmmJump e _) = lintCmmExpr platform e >> return ()
+ lint (CmmReturn) = return ()
+ lint (CmmBranch id) = checkTarget id
+ checkTarget id = if setMember id labels then return ()
+ else cmmLintErr (text "Branch to nonexistent id" <+> ppr id)
+
+lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint ()
+lintTarget platform _ (CmmCallee e _) = do _ <- lintCmmExpr platform e
+ return ()
+lintTarget _ _ (CmmPrim _ Nothing) = return ()
+lintTarget platform labels (CmmPrim _ (Just stmts))
+ = mapM_ (lintCmmStmt platform labels) stmts
+
+
+checkCond :: CmmExpr -> CmmLint ()
+checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return ()
+checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values
+checkCond expr
+ = cmmLintErr (hang (text "expression is not a conditional:") 2
+ (ppr expr))
+
+-- -----------------------------------------------------------------------------
+-- CmmLint monad
+
+-- just a basic error monad:
+
+newtype CmmLint a = CmmLint { unCL :: Either SDoc a }
+
+instance Monad CmmLint where
+ CmmLint m >>= k = CmmLint $ case m of
+ Left e -> Left e
+ Right a -> unCL (k a)
+ return a = CmmLint (Right a)
+
+cmmLintErr :: SDoc -> CmmLint a
+cmmLintErr msg = CmmLint (Left msg)
+
+addLintInfo :: SDoc -> CmmLint a -> CmmLint a
+addLintInfo info thing = CmmLint $
+ case unCL thing of
+ Left err -> Left (hang info 2 err)
+ Right a -> Right a
+
+cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a
+cmmLintMachOpErr expr argsRep opExpectsRep
+ = cmmLintErr (text "in MachOp application: " $$
+ nest 2 (ppr expr) $$
+ (text "op is expecting: " <+> ppr opExpectsRep) $$
+ (text "arguments provide: " <+> ppr argsRep))
+
+cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a
+cmmLintAssignErr stmt e_ty r_ty
+ = cmmLintErr (text "in assignment: " $$
+ nest 2 (vcat [ppr stmt,
+ text "Reg ty:" <+> ppr r_ty,
+ text "Rhs ty:" <+> ppr e_ty]))
+
+
+
+cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a
+cmmLintDubiousWordOffset expr
+ = cmmLintErr (text "offset is not a multiple of words: " $$
+ nest 2 (ppr expr))
diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs
index d6a12221fb..9990fd26a4 100644
--- a/compiler/cmm/OldPprCmm.hs
+++ b/compiler/cmm/OldPprCmm.hs
@@ -63,10 +63,6 @@ instance Outputable instr => Outputable (GenBasicBlock instr) where
instance Outputable CmmStmt where
ppr s = pprStmt s
-instance Outputable CmmInfo where
- ppr i = pprInfo i
-
-
-- --------------------------------------------------------------------------
instance Outputable CmmSafety where
ppr CmmUnsafe = ptext (sLit "_unsafe_call_")
@@ -74,21 +70,6 @@ instance Outputable CmmSafety where
ppr (CmmSafe srt) = ppr srt
-- --------------------------------------------------------------------------
--- Info tables. The current pretty printer needs refinement
--- but will work for now.
---
--- For ideas on how to refine it, they used to be printed in the
--- style of C--'s 'stackdata' declaration, just inside the proc body,
--- and were labelled with the procedure name ++ "_info".
-pprInfo :: CmmInfo -> SDoc
-pprInfo (CmmInfo _gc_target update_frame info_table) =
- vcat [{-ptext (sLit "gc_target: ") <>
- maybe (ptext (sLit "<none>")) ppr gc_target,-}
- ptext (sLit "update_frame: ") <>
- maybe (ptext (sLit "<none>")) pprUpdateFrame update_frame,
- ppr info_table]
-
--- --------------------------------------------------------------------------
-- Basic blocks look like assembly blocks.
-- lbl: stmt ; stmt ; ..
pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc
@@ -156,18 +137,6 @@ pprStmt stmt = case stmt of
instance (Outputable a) => Outputable (CmmHinted a) where
ppr (CmmHinted a k) = ppr (a, k)
-pprUpdateFrame :: UpdateFrame -> SDoc
-pprUpdateFrame (UpdateFrame expr args) =
- hcat [ ptext (sLit "jump")
- , space
- , if isTrivialCmmExpr expr
- then pprExpr expr
- else case expr of
- CmmLoad (CmmReg _) _ -> pprExpr expr
- _ -> parens (pprExpr expr)
- , space
- , parens ( commafy $ map ppr args ) ]
-
-- --------------------------------------------------------------------------
-- goto local label. [1], section 6.6
--
diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs
deleted file mode 100644
index a85b11bcc6..0000000000
--- a/compiler/cmm/OptimizationFuel.hs
+++ /dev/null
@@ -1,142 +0,0 @@
-{-# LANGUAGE TypeFamilies #-}
--- | Optimisation fuel is used to control the amount of work the optimiser does.
---
--- Every optimisation step consumes a certain amount of fuel and stops when
--- it runs out of fuel. This can be used e.g. to debug optimiser bugs: Run
--- the optimiser with varying amount of fuel to find out the exact number of
--- steps where a bug is introduced in the output.
-module OptimizationFuel
- ( OptimizationFuel, amountOfFuel, tankFilledTo, unlimitedFuel, anyFuelLeft, oneLessFuel
- , OptFuelState, initOptFuelState
- , FuelConsumer, FuelUsingMonad, FuelState
- , fuelGet, fuelSet, lastFuelPass, setFuelPass
- , fuelExhausted, fuelDec1, tryWithFuel
- , runFuelIO, runInfiniteFuelIO, fuelConsumingPass
- , FuelUniqSM
- , liftUniq
- )
-where
-
-import Data.IORef
-import Control.Monad
-import StaticFlags (opt_Fuel)
-import UniqSupply
-import Panic
-import Util
-
-import Compiler.Hoopl
-import Compiler.Hoopl.GHC (getFuel, setFuel)
-
-#include "HsVersions.h"
-
-
--- We limit the number of transactions executed using a record of flags
--- stored in an HscEnv. The flags store the name of the last optimization
--- pass and the amount of optimization fuel remaining.
-data OptFuelState =
- OptFuelState { pass_ref :: IORef String
- , fuel_ref :: IORef OptimizationFuel
- }
-initOptFuelState :: IO OptFuelState
-initOptFuelState =
- do pass_ref' <- newIORef "unoptimized program"
- fuel_ref' <- newIORef (tankFilledTo opt_Fuel)
- return OptFuelState {pass_ref = pass_ref', fuel_ref = fuel_ref'}
-
-type FuelConsumer a = OptimizationFuel -> (a, OptimizationFuel)
-
-tankFilledTo :: Int -> OptimizationFuel
-amountOfFuel :: OptimizationFuel -> Int
-
-anyFuelLeft :: OptimizationFuel -> Bool
-oneLessFuel :: OptimizationFuel -> OptimizationFuel
-unlimitedFuel :: OptimizationFuel
-
-newtype OptimizationFuel = OptimizationFuel Int
- deriving Show
-
-tankFilledTo = OptimizationFuel
-amountOfFuel (OptimizationFuel f) = f
-
-anyFuelLeft (OptimizationFuel f) = f > 0
-oneLessFuel (OptimizationFuel f) = ASSERT (f > 0) (OptimizationFuel (f - 1))
-unlimitedFuel = OptimizationFuel infiniteFuel
-
-data FuelState = FuelState { fs_fuel :: OptimizationFuel, fs_lastpass :: String }
-newtype FuelUniqSM a = FUSM { unFUSM :: FuelState -> UniqSM (a, FuelState) }
-
-fuelConsumingPass :: String -> FuelConsumer a -> FuelUniqSM a
-fuelConsumingPass name f = do setFuelPass name
- fuel <- fuelGet
- let (a, fuel') = f fuel
- fuelSet fuel'
- return a
-
-runFuelIO :: OptFuelState -> FuelUniqSM a -> IO a
-runFuelIO fs (FUSM f) =
- do pass <- readIORef (pass_ref fs)
- fuel <- readIORef (fuel_ref fs)
- u <- mkSplitUniqSupply 'u'
- let (a, FuelState fuel' pass') = initUs_ u $ f (FuelState fuel pass)
- writeIORef (pass_ref fs) pass'
- writeIORef (fuel_ref fs) fuel'
- return a
-
--- ToDo: Do we need the pass_ref when we are doing infinite fueld
--- transformations?
-runInfiniteFuelIO :: OptFuelState -> FuelUniqSM a -> IO a
-runInfiniteFuelIO fs (FUSM f) =
- do pass <- readIORef (pass_ref fs)
- u <- mkSplitUniqSupply 'u'
- let (a, FuelState _ pass') = initUs_ u $ f (FuelState unlimitedFuel pass)
- writeIORef (pass_ref fs) pass'
- return a
-
-instance Monad FuelUniqSM where
- FUSM f >>= k = FUSM (\s -> f s >>= \(a, s') -> unFUSM (k a) s')
- return a = FUSM (\s -> return (a, s))
-
-instance MonadUnique FuelUniqSM where
- getUniqueSupplyM = liftUniq getUniqueSupplyM
- getUniqueM = liftUniq getUniqueM
- getUniquesM = liftUniq getUniquesM
-
-liftUniq :: UniqSM x -> FuelUniqSM x
-liftUniq x = FUSM (\s -> x >>= (\u -> return (u, s)))
-
-class Monad m => FuelUsingMonad m where
- fuelGet :: m OptimizationFuel
- fuelSet :: OptimizationFuel -> m ()
- lastFuelPass :: m String
- setFuelPass :: String -> m ()
-
-fuelExhausted :: FuelUsingMonad m => m Bool
-fuelExhausted = fuelGet >>= return . anyFuelLeft
-
-fuelDec1 :: FuelUsingMonad m => m ()
-fuelDec1 = fuelGet >>= fuelSet . oneLessFuel
-
-tryWithFuel :: FuelUsingMonad m => a -> m (Maybe a)
-tryWithFuel r = do f <- fuelGet
- if anyFuelLeft f then fuelSet (oneLessFuel f) >> return (Just r)
- else return Nothing
-
-instance FuelUsingMonad FuelUniqSM where
- fuelGet = extract fs_fuel
- lastFuelPass = extract fs_lastpass
- fuelSet fuel = FUSM (\s -> return ((), s { fs_fuel = fuel }))
- setFuelPass pass = FUSM (\s -> return ((), s { fs_lastpass = pass }))
-
-extract :: (FuelState -> a) -> FuelUniqSM a
-extract f = FUSM (\s -> return (f s, s))
-
-instance FuelMonad FuelUniqSM where
- getFuel = liftM amountOfFuel fuelGet
- setFuel = fuelSet . tankFilledTo
-
--- Don't bother to checkpoint the unique supply; it doesn't matter
-instance CheckpointMonad FuelUniqSM where
- type Checkpoint FuelUniqSM = FuelState
- checkpoint = FUSM $ \fuel -> return (fuel, fuel)
- restart fuel = FUSM $ \_ -> return ((), fuel)
-
diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs
index 183708c08e..dee6ee881e 100644
--- a/compiler/cmm/PprCmm.hs
+++ b/compiler/cmm/PprCmm.hs
@@ -146,8 +146,6 @@ pprConvention Slow = text "<slow-convention>"
pprConvention GC = text "<gc-convention>"
pprConvention PrimOpCall = text "<primop-call-convention>"
pprConvention PrimOpReturn = text "<primop-ret-convention>"
-pprConvention (Foreign c) = ppr c
-pprConvention (Private {}) = text "<private-convention>"
pprForeignConvention :: ForeignConvention -> SDoc
pprForeignConvention (ForeignConvention c as rs) = ppr c <> ppr as <> ppr rs
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 7503127555..119f2b7239 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -237,12 +237,8 @@ pprLocalReg (LocalReg uniq rep)
-- Stack areas
pprArea :: Area -> SDoc
-pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ]
-pprArea (CallArea id) = pprAreaId id
-
-pprAreaId :: AreaId -> SDoc
-pprAreaId Old = text "old"
-pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ]
+pprArea Old = text "old"
+pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ]
-- needs to be kept in syn with CmmExpr.hs.GlobalReg
--
diff --git a/compiler/cmm/SMRep.lhs b/compiler/cmm/SMRep.lhs
index ce30799bf6..8b3308ef97 100644
--- a/compiler/cmm/SMRep.lhs
+++ b/compiler/cmm/SMRep.lhs
@@ -21,6 +21,7 @@ module SMRep (
StgWord, StgHalfWord,
hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS,
WordOff, ByteOff,
+ roundUpToWords,
-- * Closure repesentation
SMRep(..), -- CmmInfo sees the rep; no one else does
@@ -57,6 +58,7 @@ import FastString
import Data.Char( ord )
import Data.Word
+import Data.Bits
\end{code}
@@ -69,6 +71,9 @@ import Data.Word
\begin{code}
type WordOff = Int -- Word offset, or word count
type ByteOff = Int -- Byte offset, or byte count
+
+roundUpToWords :: ByteOff -> ByteOff
+roundUpToWords n = (n + (wORD_SIZE - 1)) .&. (complement (wORD_SIZE - 1))
\end{code}
StgWord is a type representing an StgWord on the target platform.
@@ -93,6 +98,7 @@ hALF_WORD_SIZE_IN_BITS = 32
#endif
\end{code}
+
%************************************************************************
%* *
\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation}
diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes
index 81882c8c0e..0e6a2341f2 100644
--- a/compiler/cmm/cmm-notes
+++ b/compiler/cmm/cmm-notes
@@ -24,27 +24,10 @@ More notes (June 11)
* Check in ClosureInfo:
-- NB: Results here should line up with the results of SMRep.rtsClosureType
-* Possible refactoring: Nuke AGraph in favour of
- mkIfThenElse :: Expr -> Graph -> Graph -> FCode Graph
- or even
- mkIfThenElse :: HasUniques m => Expr -> Graph -> Graph -> m Graph
- (Remmber that the .cmm file parser must use this function)
-
- or parameterise FCode over its envt; the CgState part seem useful for both
-
* "Remove redundant reloads" in CmmSpillReload should be redundant; since
insertLateReloads is now gone, every reload is reloading a live variable.
Test and nuke.
-* Stack layout is very like register assignment: find non-conflicting assigments.
- In particular we can use colouring or linear scan (etc).
-
- We'd fine-grain interference (on a word by word basis) to get maximum overlap.
- But that may make very big interference graphs. So linear scan might be
- more attactive.
-
- NB: linear scan does on-the-fly live range splitting.
-
* When stubbing dead slots be careful not to write into an area that
overlaps with an area that's in use. So stubbing needs to *follow*
stack layout.
@@ -81,10 +64,6 @@ Things to do:
Old.Cmm. We should abstract it to work on both representations, it needs only to
convert a CmmInfoTable to [CmmStatic].
- - The MkGraph currenty uses a different semantics for <*> than Hoopl. Maybe
- we could convert codeGen/StgCmm* clients to the Hoopl's semantics?
- It's all deeply unsatisfactory.
-
- Improve performance of Hoopl.
A nofib comparison of -fasm vs -fnewcodegen nofib compilation parameters
@@ -101,18 +80,12 @@ Things to do:
When compiling nofib, ghc-head + libraries compiled with -fnew-codegen
is 31.4% slower (http://fox.auryn.cz/msrc/0517_hoopl/32bit.oldghcoldgen.zipghcoldgen.txt).
- So we generate a bit better code, but it takes us longer!
-
EZY: Also importantly, Hoopl uses dramatically more memory than the
old code generator.
- Are all blockToNodeList and blockOfNodeList really needed? Maybe we could
splice blocks instead?
- In the CmmContFlowOpt.blockConcat, using Dataflow seems too clumsy. Still,
- a block catenation function would be probably nicer than blockToNodeList
- / blockOfNodeList combo.
-
- lowerSafeForeignCall seems too lowlevel. Just use Dataflow. After that
delete splitEntrySeq from HooplUtils.
@@ -129,10 +102,6 @@ Things to do:
- NB that CmmProcPoint line 283 has a hack that works around a GADT-related
bug in 6.10.
- - SDM (2010-02-26) can we remove the Foreign constructor from Convention?
- Reason: we never generate code for a function with the Foreign
- calling convention, and the code for calling foreign calls is generated
-
- AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
EZY (2011-04-16): The mini-inliner has been generalized and ported,
but the constant folding and other optimizations need to still be
@@ -161,16 +130,6 @@ Things to do:
- Top-level SRT threading is a bit ugly
- - Add type/newtype for CmmModule = [CmmGroup] -- A module
- CmmGroup = [CmmTop] -- A .o file
- CmmTop = Proc | Data -- A procedure or data
-
- - This is a *change*: currently a CmmGroup is one function's-worth of code
- regardless of SplitObjs. Question: can we *always* generate M.o if there
- is just one element in the list (rather than M/M1.o, M/M2.o etc)
-
- One SRT per group.
-
- See "CAFs" below; we want to totally refactor the way SRTs are calculated
- Pull out Areas into its own module
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs
index 6c77255a62..7cdb1b6f7e 100644
--- a/compiler/codeGen/CgInfoTbls.hs
+++ b/compiler/codeGen/CgInfoTbls.hs
@@ -36,7 +36,7 @@ import CgBindery
import CgCallConv
import CgUtils
import CgMonad
-import CmmBuildInfoTables
+import CmmUtils
import OldCmm
import CLabel
@@ -66,10 +66,9 @@ emitClosureCodeAndInfoTable cl_info args body
-- Convert from 'ClosureInfo' to 'CmmInfo'.
-- Not used for return points. (The 'smRepClosureTypeInt' call would panic.)
-mkCmmInfo :: ClosureInfo -> FCode CmmInfo
+mkCmmInfo :: ClosureInfo -> FCode CmmInfoTable
mkCmmInfo cl_info
- = return (CmmInfo gc_target Nothing $
- CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info,
+ = return (CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info,
cit_rep = closureSMRep cl_info,
cit_prof = prof,
cit_srt = closureSRT cl_info })
@@ -79,14 +78,6 @@ mkCmmInfo cl_info
ty_descr_w8 = stringToWord8s (closureTypeDescr cl_info)
val_descr_w8 = stringToWord8s (closureValDescr cl_info)
- -- The gc_target is to inform the CPS pass when it inserts a stack check.
- -- Since that pass isn't used yet we'll punt for now.
- -- When the CPS pass is fully integrated, this should
- -- be replaced by the label that any heap check jumped to,
- -- so that branch can be shared by both the heap (from codeGen)
- -- and stack checks (from the CPS pass).
- gc_target = panic "TODO: gc_target"
-
-------------------------------------------------------------------------
--
-- Generating the info table and code for a return point
@@ -105,8 +96,7 @@ emitReturnTarget name stmts
; blks <- cgStmtsToBlocks stmts
; frame <- mkStackLayout
; let smrep = mkStackRep (mkLiveness frame)
- info = CmmInfo gc_target Nothing info_tbl
- info_tbl = CmmInfoTable { cit_lbl = info_lbl
+ info = CmmInfoTable { cit_lbl = info_lbl
, cit_prof = NoProfilingInfo
, cit_rep = smrep
, cit_srt = srt_info }
@@ -118,14 +108,6 @@ emitReturnTarget name stmts
info_lbl = mkReturnInfoLabel uniq
entry_lbl = mkReturnPtLabel uniq
- -- The gc_target is to inform the CPS pass when it inserts a stack check.
- -- Since that pass isn't used yet we'll punt for now.
- -- When the CPS pass is fully integrated, this should
- -- be replaced by the label that any heap check jumped to,
- -- so that branch can be shared by both the heap (from codeGen)
- -- and stack checks (from the CPS pass).
- gc_target = panic "TODO: gc_target"
-
-- Build stack layout information from the state of the 'FCode' monad.
-- Should go away once 'codeGen' starts using the CPS conversion
-- pass to handle the stack. Until then, this is really just
@@ -375,8 +357,8 @@ funInfoTable info_ptr
emitInfoTableAndCode
:: CLabel -- Label of entry or ret
- -> CmmInfo -- ...the info table
- -> [CmmFormal] -- ...args
+ -> CmmInfoTable -- ...the info table
+ -> [CmmFormal] -- ...args
-> [CmmBasicBlock] -- ...and body
-> Code
diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs
index b96898f591..71da9e9ae0 100644
--- a/compiler/codeGen/CgMonad.lhs
+++ b/compiler/codeGen/CgMonad.lhs
@@ -13,8 +13,8 @@ stuff fits into the Big Picture.
module CgMonad (
Code, FCode,
- initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
- returnFC, fixC, fixC_, checkedAbsC,
+ initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
+ returnFC, fixC, fixC_, checkedAbsC,
stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC,
newUnique, newUniqSupply,
@@ -386,11 +386,12 @@ instance Monad FCode where
{-# INLINE thenFC #-}
{-# INLINE returnFC #-}
-initC :: DynFlags -> Module -> FCode a -> IO a
-initC dflags mod (FCode code) = do
- uniqs <- mkSplitUniqSupply 'c'
- case code (initCgInfoDown dflags mod) (initCgState uniqs) of
- (res, _) -> return res
+initC :: IO CgState
+initC = do { uniqs <- mkSplitUniqSupply 'c'
+ ; return (initCgState uniqs) }
+
+runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState)
+runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st
returnFC :: a -> FCode a
returnFC val = FCode $ \_ state -> (val, state)
@@ -708,7 +709,7 @@ emitDecl decl = do
state <- getState
setState $ state { cgs_tops = cgs_tops state `snocOL` decl }
-emitProc :: CmmInfo -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code
+emitProc :: CmmInfoTable -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code
emitProc info lbl [] blocks = do
let proc_block = CmmProc info lbl (ListGraph blocks)
state <- getState
@@ -720,7 +721,7 @@ emitSimpleProc :: CLabel -> Code -> Code
emitSimpleProc lbl code = do
stmts <- getCgStmts code
blks <- cgStmtsToBlocks stmts
- emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] blks
+ emitProc CmmNonInfoTable lbl [] blks
-- Get all the CmmTops (there should be no stmts)
-- Return a single Cmm which may be split from other Cmms by
diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs
index ce12d43bbf..c9b2bf8ab0 100644
--- a/compiler/codeGen/CodeGen.lhs
+++ b/compiler/codeGen/CodeGen.lhs
@@ -30,7 +30,7 @@ import CgHpc
import CLabel
import OldCmm
-import OldPprCmm
+import OldPprCmm ()
import StgSyn
import PrelNames
@@ -45,40 +45,52 @@ import TyCon
import Module
import ErrUtils
import Panic
+import Outputable
import Util
+import OrdList
+import Stream (Stream, liftIO)
+import qualified Stream
+
+import Data.IORef
+
codeGen :: DynFlags
-> Module -- Module we are compiling
-> [TyCon] -- Type constructors
-> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
-> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs
-> HpcInfo -- Profiling info
- -> IO [CmmGroup]
+ -> Stream IO CmmGroup ()
-- N.B. returning '[Cmm]' and not 'Cmm' here makes it
-- possible for object splitting to split up the
-- pieces later.
-codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info = do
- showPass dflags "CodeGen"
- code_stuff <-
- initC dflags this_mod $ do
- cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds
- cmm_tycons <- mapM cgTyCon data_tycons
- cmm_init <- getCmm (mkModuleInit dflags cost_centre_info this_mod hpc_info)
- return (cmm_init : cmm_binds ++ cmm_tycons)
- -- Put datatype_stuff after code_stuff, because the
- -- datatype closure table (for enumeration types) to
- -- (say) PrelBase_True_closure, which is defined in
- -- code_stuff
-
- -- Note [codegen-split-init] the cmm_init block must
- -- come FIRST. This is because when -split-objs is on
- -- we need to combine this block with its
- -- initialisation routines; see Note
- -- [pipeline-split-init].
-
- dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff)
- return code_stuff
+codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info
+
+ = do { liftIO $ showPass dflags "CodeGen"
+
+ ; cgref <- liftIO $ newIORef =<< initC
+ ; let cg :: FCode CmmGroup -> Stream IO CmmGroup ()
+ cg fcode = do
+ cmm <- liftIO $ do
+ st <- readIORef cgref
+ let (a,st') = runC dflags this_mod st fcode
+
+ dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" $ ppr a
+
+ -- NB. stub-out cgs_tops and cgs_stmts. This fixes
+ -- a big space leak. DO NOT REMOVE!
+ writeIORef cgref $! st'{ cgs_tops = nilOL,
+ cgs_stmts = nilOL }
+ return a
+ Stream.yield cmm
+
+ ; cg (getCmm $ mkModuleInit dflags cost_centre_info this_mod hpc_info)
+
+ ; mapM_ (cg . getCmm . cgTopBinding dflags) stg_binds
+
+ ; mapM_ (cg . cgTyCon) data_tycons
+ }
mkModuleInit
:: DynFlags
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs
index 17a7062559..696af8107e 100644
--- a/compiler/codeGen/StgCmm.hs
+++ b/compiler/codeGen/StgCmm.hs
@@ -46,6 +46,13 @@ import TyCon
import Module
import ErrUtils
import Outputable
+import Stream
+
+import OrdList
+import MkGraph
+
+import Data.IORef
+import Control.Monad (when)
import Util
codeGen :: DynFlags
@@ -54,39 +61,51 @@ codeGen :: DynFlags
-> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
-> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs
-> HpcInfo
- -> IO [CmmGroup] -- Output
+ -> Stream IO CmmGroup () -- Output as a stream, so codegen can
+ -- be interleaved with output
codeGen dflags this_mod data_tycons
cost_centre_info stg_binds hpc_info
- = do { showPass dflags "New CodeGen"
-
--- Why?
--- ; mapM_ (\x -> seq x (return ())) data_tycons
-
- ; code_stuff <- initC dflags this_mod $ do
- { cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds
- ; cmm_tycons <- mapM cgTyCon data_tycons
- ; cmm_init <- getCmm (mkModuleInit cost_centre_info
- this_mod hpc_info)
- ; return (cmm_init : cmm_binds ++ cmm_tycons)
- }
+ = do { liftIO $ showPass dflags "New CodeGen"
+
+ -- cg: run the code generator, and yield the resulting CmmGroup
+ -- Using an IORef to store the state is a bit crude, but otherwise
+ -- we would need to add a state monad layer.
+ ; cgref <- liftIO $ newIORef =<< initC
+ ; let cg :: FCode () -> Stream IO CmmGroup ()
+ cg fcode = do
+ cmm <- liftIO $ do
+ st <- readIORef cgref
+ let (a,st') = runC dflags this_mod st (getCmm fcode)
+
+ -- NB. stub-out cgs_tops and cgs_stmts. This fixes
+ -- a big space leak. DO NOT REMOVE!
+ writeIORef cgref $! st'{ cgs_tops = nilOL,
+ cgs_stmts = mkNop }
+ return a
+ yield cmm
+
+ -- Note [codegen-split-init] the cmm_init block must come
+ -- FIRST. This is because when -split-objs is on we need to
+ -- combine this block with its initialisation routines; see
+ -- Note [pipeline-split-init].
+ ; cg (mkModuleInit cost_centre_info this_mod hpc_info)
+
+ ; mapM_ (cg . cgTopBinding dflags) stg_binds
+
-- Put datatype_stuff after code_stuff, because the
-- datatype closure table (for enumeration types) to
-- (say) PrelBase_True_closure, which is defined in
-- code_stuff
-
- -- N.B. returning '[Cmm]' and not 'Cmm' here makes it
- -- possible for object splitting to split up the
- -- pieces later.
-
- -- Note [codegen-split-init] the cmm_init block must
- -- come FIRST. This is because when -split-objs is on
- -- we need to combine this block with its
- -- initialisation routines; see Note
- -- [pipeline-split-init].
-
- ; return code_stuff }
-
+ ; let do_tycon tycon = do
+ -- Generate a table of static closures for an
+ -- enumeration type Note that the closure pointers are
+ -- tagged.
+ when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon)
+ mapM_ (cg . cgDataCon) (tyConDataCons tycon)
+
+ ; mapM_ do_tycon data_tycons
+ }
---------------------------------------------------------------
-- Top-level bindings
@@ -108,7 +127,7 @@ cgTopBinding dflags (StgNonRec id rhs, _srts)
; info <- cgTopRhs id' rhs
; addBindC (cg_id info) info -- Add the *un-externalised* Id to the envt,
-- so we find it when we look up occurrences
- }
+ }
cgTopBinding dflags (StgRec pairs, _srts)
= do { let (bndrs, rhss) = unzip pairs
@@ -117,7 +136,7 @@ cgTopBinding dflags (StgRec pairs, _srts)
; fixC_(\ new_binds -> do
{ addBindsC new_binds
; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' })
- ; return () }
+ ; return () }
-- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs
-- to enclose the listFCs in cgTopBinding, but that tickled the
@@ -187,65 +206,19 @@ mkModuleInit cost_centre_info this_mod hpc_info
; emitDecl (CmmData Data (Statics (mkPlainModuleInitLabel this_mod) []))
}
+
---------------------------------------------------------------
-- Generating static stuff for algebraic data types
---------------------------------------------------------------
-{- [These comments are rather out of date]
-
-Macro Kind of constructor
-CONST_INFO_TABLE@ Zero arity (no info -- compiler uses static closure)
-CHARLIKE_INFO_TABLE Charlike (no info -- compiler indexes fixed array)
-INTLIKE_INFO_TABLE Intlike; the one macro generates both info tbls
-SPEC_INFO_TABLE SPECish, and bigger than or equal to MIN_UPD_SIZE
-GEN_INFO_TABLE GENish (hence bigger than or equal to MIN_UPD_SIZE@)
-Possible info tables for constructor con:
-
-* _con_info:
- Used for dynamically let(rec)-bound occurrences of
- the constructor, and for updates. For constructors
- which are int-like, char-like or nullary, when GC occurs,
- the closure tries to get rid of itself.
-
-* _static_info:
- Static occurrences of the constructor macro: STATIC_INFO_TABLE.
-
-For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure;
-it's place is taken by the top level defn of the constructor.
-
-For charlike and intlike closures there is a fixed array of static
-closures predeclared.
--}
-
-cgTyCon :: TyCon -> FCode CmmGroup -- All constructors merged together
-cgTyCon tycon
- = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon)
-
- -- Generate a table of static closures for an enumeration type
- -- Put the table after the data constructor decls, because the
- -- datatype closure table (for enumeration types)
- -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff
- -- Note that the closure pointers are tagged.
-
- -- N.B. comment says to put table after constructor decls, but
- -- code puts it before --- NR 16 Aug 2007
- ; extra <- cgEnumerationTyCon tycon
-
- ; return (concat (extra ++ constrs))
- }
-
-cgEnumerationTyCon :: TyCon -> FCode [CmmGroup]
+cgEnumerationTyCon :: TyCon -> FCode ()
cgEnumerationTyCon tycon
- | isEnumerationTyCon tycon
- = do { tbl <- getCmm $
- emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
- [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
- (tagForCon con)
- | con <- tyConDataCons tycon]
- ; return [tbl] }
- | otherwise
- = return []
+ = emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
+ [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
+ (tagForCon con)
+ | con <- tyConDataCons tycon]
+
cgDataCon :: DataCon -> FCode ()
-- Generate the entry code, info tables, and (for niladic constructor)
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 3b166e3b6a..f98283f737 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -76,17 +76,17 @@ cgTopRhsClosure :: Id
cgTopRhsClosure id ccs _ upd_flag srt args body = do
{ -- LAY OUT THE OBJECT
let name = idName id
- ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args
- ; srt_info <- getSRTInfo srt
+ ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args
+ ; has_srt <- getSRTInfo srt
; mod_name <- getModuleName
; dflags <- getDynFlags
; let descr = closureDescription dflags mod_name name
- closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr
- closure_label = mkLocalClosureLabel name (idCafInfo id)
+ closure_info = mkClosureInfo True id lf_info 0 0 descr
+ closure_label = mkLocalClosureLabel name (idCafInfo id)
cg_id_info = litIdInfo id lf_info (CmmLabel closure_label)
caffy = idCafInfo id
info_tbl = mkCmmInfo closure_info -- XXX short-cut
- closure_rep = mkStaticClosureFields info_tbl ccs caffy []
+ closure_rep = mkStaticClosureFields info_tbl ccs caffy has_srt []
-- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY)
; emitDataLits closure_label closure_rep
@@ -110,7 +110,7 @@ cgBind (StgNonRec name rhs)
; emit (init <*> body) }
cgBind (StgRec pairs)
- = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits ->
+ = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits ->
do { addBindsC $ fst new_binds_inits -- avoid premature deconstruction
; liftM unzip $ listFCs [ cgRhs b e | (b,e) <- pairs ] })
; addBindsC new_binds
@@ -162,8 +162,8 @@ cgRhs :: Id -> StgRhs -> FCode (CgIdInfo, CmmAGraph)
cgRhs name (StgRhsCon cc con args)
= buildDynCon name cc con args
-cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body)
- = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body
+cgRhs name (StgRhsClosure cc bi fvs upd_flag _srt args body)
+ = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag args body
------------------------------------------------------------------------
-- Non-constructor right hand sides
@@ -171,7 +171,7 @@ cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body)
mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo
-> [NonVoid Id] -- Free vars
- -> UpdateFlag -> SRT
+ -> UpdateFlag
-> [Id] -- Args
-> StgExpr
-> FCode (CgIdInfo, CmmAGraph)
@@ -215,8 +215,7 @@ for semi-obvious reasons.
mkRhsClosure bndr cc bi
[NonVoid the_fv] -- Just one free var
upd_flag -- Updatable thunk
- _srt
- [] -- A thunk
+ [] -- A thunk
body@(StgCase (StgApp scrutinee [{-no args-}])
_ _ _ _ -- ignore uniq, etc.
(AlgAlt _)
@@ -247,8 +246,7 @@ mkRhsClosure bndr cc bi
mkRhsClosure bndr cc bi
fvs
upd_flag
- _srt
- [] -- No args; a thunk
+ [] -- No args; a thunk
body@(StgApp fun_id args)
| args `lengthIs` (arity-1)
@@ -269,7 +267,7 @@ mkRhsClosure bndr cc bi
arity = length fvs
---------- Default case ------------------
-mkRhsClosure bndr cc _ fvs upd_flag srt args body
+mkRhsClosure bndr cc _ fvs upd_flag args body
= do { -- LAY OUT THE OBJECT
-- If the binder is itself a free variable, then don't store
-- it in the closure. Instead, just bind it to Node on entry.
@@ -288,17 +286,16 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body
-- MAKE CLOSURE INFO FOR THIS CLOSURE
; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args
; mod_name <- getModuleName
- ; c_srt <- getSRTInfo srt
- ; dflags <- getDynFlags
- ; let name = idName bndr
- descr = closureDescription dflags mod_name name
- fv_details :: [(NonVoid Id, VirtualHpOffset)]
+ ; dflags <- getDynFlags
+ ; let name = idName bndr
+ descr = closureDescription dflags mod_name name
+ fv_details :: [(NonVoid Id, VirtualHpOffset)]
(tot_wds, ptr_wds, fv_details)
= mkVirtHeapOffsets (isLFThunk lf_info)
(addIdReps (map stripNV reduced_fvs))
closure_info = mkClosureInfo False -- Not static
bndr lf_info tot_wds ptr_wds
- c_srt descr
+ descr
-- BUILD ITS INFO TABLE AND CODE
; forkClosureBody $
@@ -345,8 +342,7 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload
descr = closureDescription dflags mod_name (idName bndr)
closure_info = mkClosureInfo False -- Not static
bndr lf_info tot_wds ptr_wds
- NoC_SRT -- No SRT for a std-form closure
- descr
+ descr
-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body
; let use_cc = curCCS; blame_cc = curCCS
@@ -546,10 +542,10 @@ emitBlackHoleCode is_single_entry = do
whenC eager_blackholing $ do
tickyBlackHole (not is_single_entry)
- emit (mkStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize)
- (CmmReg (CmmGlobal CurrentTSO)))
+ emitStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize)
+ (CmmReg (CmmGlobal CurrentTSO))
emitPrimCall [] MO_WriteBarrier []
- emit (mkStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo)))
+ emitStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo))
setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()
-- Nota Bene: this function does not change Node (even if it's a CAF),
@@ -598,7 +594,7 @@ pushUpdateFrame es body
offset <- foldM push updfr es
withUpdFrameOff offset body
where push off e =
- do emit (mkStore (CmmStackSlot (CallArea Old) base) e)
+ do emitStore (CmmStackSlot Old base) e
return base
where base = off + widthInBytes (cmmExprWidth e)
@@ -666,13 +662,14 @@ link_caf _is_upd = do
-- node is live, so save it.
-- see Note [atomic CAF entry] in rts/sm/Storage.c
- ; emit $ mkCmmIfThen
- (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit]) $
+ ; updfr <- getUpdFrameOff
+ ; emit =<< mkCmmIfThen
+ (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit])
-- re-enter R1. Doing this directly is slightly dodgy; we're
-- assuming lots of things, like the stack pointer hasn't
-- moved since we entered the CAF.
- let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in
- mkJump target [] 0
+ (let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in
+ mkJump target [] updfr)
; return hp_rel }
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
index 483a67c1fa..8023abddec 100644
--- a/compiler/codeGen/StgCmmClosure.hs
+++ b/compiler/codeGen/StgCmmClosure.hs
@@ -657,7 +657,6 @@ data ClosureInfo
-- the rest is just an unpacked CmmInfoTable.
closureInfoLabel :: !CLabel,
closureSMRep :: !SMRep, -- representation used by storage mgr
- closureSRT :: !C_SRT, -- What SRT applies to this closure
closureProf :: !ProfilingInfo
}
@@ -667,7 +666,7 @@ mkCmmInfo ClosureInfo {..}
= CmmInfoTable { cit_lbl = closureInfoLabel
, cit_rep = closureSMRep
, cit_prof = closureProf
- , cit_srt = closureSRT }
+ , cit_srt = NoC_SRT }
--------------------------------------
@@ -678,16 +677,14 @@ mkClosureInfo :: Bool -- Is static
-> Id
-> LambdaFormInfo
-> Int -> Int -- Total and pointer words
- -> C_SRT
- -> String -- String descriptor
+ -> String -- String descriptor
-> ClosureInfo
-mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info val_descr
+mkClosureInfo is_static id lf_info tot_wds ptr_wds val_descr
= ClosureInfo { closureName = name,
closureLFInfo = lf_info,
- closureInfoLabel = info_lbl,
- closureSMRep = sm_rep, -- These four fields are a
- closureSRT = srt_info, -- CmmInfoTable
- closureProf = prof } -- ---
+ closureInfoLabel = info_lbl, -- These three fields are
+ closureSMRep = sm_rep, -- (almost) an info table
+ closureProf = prof } -- (we don't have an SRT yet)
where
name = idName id
sm_rep = mkHeapRep is_static ptr_wds nonptr_wds (lfClosureType lf_info)
@@ -920,15 +917,21 @@ cafBlackHoleInfoTable
, cit_prof = NoProfilingInfo
, cit_srt = NoC_SRT }
-staticClosureNeedsLink :: CmmInfoTable -> Bool
+staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool
-- A static closure needs a link field to aid the GC when traversing
-- the static closure graph. But it only needs such a field if either
-- a) it has an SRT
-- b) it's a constructor with one or more pointer fields
-- In case (b), the constructor's fields themselves play the role
-- of the SRT.
-staticClosureNeedsLink info_tbl@CmmInfoTable{ cit_rep = smrep }
+--
+-- At this point, the cit_srt field has not been calculated (that
+-- happens right at the end of the Cmm pipeline), but we do have the
+-- VarSet of CAFs that CoreToStg attached, and if that is empty there
+-- will definitely not be an SRT.
+--
+staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep }
| isConRep smrep = not (isStaticNoCafCon smrep)
- | otherwise = needsSRT (cit_srt info_tbl)
-staticClosureNeedsLink _ = False
+ | otherwise = has_srt -- needsSRT (cit_srt info_tbl)
+staticClosureNeedsLink _ _ = False
diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs
index a7af5662e9..c348570a54 100644
--- a/compiler/codeGen/StgCmmCon.hs
+++ b/compiler/codeGen/StgCmmCon.hs
@@ -92,6 +92,7 @@ cgTopRhsCon id con args
info_tbl
dontCareCCS -- Because it's static data
caffy -- Has CAF refs
+ False -- no SRT
payload
-- BUILD THE OBJECT
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
index f128e3ad60..2edd09da12 100644
--- a/compiler/codeGen/StgCmmEnv.hs
+++ b/compiler/codeGen/StgCmmEnv.hs
@@ -27,7 +27,7 @@ module StgCmmEnv (
bindArgsToRegs, bindToReg, rebindToReg,
bindArgToReg, idToReg,
- getArgAmode, getNonVoidArgAmodes,
+ getArgAmode, getNonVoidArgAmodes,
getCgIdInfo,
maybeLetNoEscape,
) where
@@ -212,7 +212,6 @@ getNonVoidArgAmodes (arg:args)
; amodes <- getNonVoidArgAmodes args
; return ( amode : amodes ) }
-
------------------------------------------------------------------------
-- Interface functions for binding and re-binding names
------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 9faad02f46..68bfb6d9fe 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -77,7 +77,7 @@ cgExpr (StgLetNoEscape _ _ binds expr) =
; let join_id = mkBlockId (uniqFromSupply us)
; cgLneBinds join_id binds
; cgExpr expr
- ; emit $ mkLabel join_id}
+ ; emitLabel join_id}
cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) =
cgCase expr bndr srt alt_type alts
@@ -130,7 +130,7 @@ cgLetNoEscapeRhs
cgLetNoEscapeRhs join_id local_cc bndr rhs =
do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs
; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
- ; emit (outOfLine $ mkLabel bid <*> rhs_body <*> mkBranch join_id)
+ ; emitOutOfLine bid $ rhs_body <*> mkBranch join_id
; return info
}
@@ -278,21 +278,69 @@ Hence: two basic plans for
data GcPlan
= GcInAlts -- Put a GC check at the start the case alternatives,
[LocalReg] -- which binds these registers
- SRT -- using this SRT
- | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
+ | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
-- primitive op which does no GC. Absorb the allocation
-- of the case alternative(s) into the upstream check
-------------------------------------
--- See Note [case on Bool]
cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode ()
+
+cgCase (StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts
+ | isEnumerationTyCon tycon -- Note [case on bool]
+ = do { tag_expr <- do_enum_primop op args
+
+ -- If the binder is not dead, convert the tag to a constructor
+ -- and assign it.
+ ; when (not (isDeadBinder bndr)) $ do
+ { tmp_reg <- bindArgToReg (NonVoid bndr)
+ ; emitAssign (CmmLocal tmp_reg)
+ (tagToClosure tycon tag_expr) }
+
+ ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts Nothing
+ (NonVoid bndr) alts
+ ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
+ }
+ where
+ do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
+ do_enum_primop TagToEnumOp [arg] -- No code!
+ = getArgAmode (NonVoid arg)
+ do_enum_primop primop args
+ = do tmp <- newTemp bWord
+ cgPrimOp [tmp] primop args
+ return (CmmReg (CmmLocal tmp))
+
{-
-cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2]
- | isBoolTy (idType bndr)
- , isDeadBndr bndr
- =
+Note [case on bool]
+
+This special case handles code like
+
+ case a <# b of
+ True ->
+ False ->
+
+If we let the ordinary case code handle it, we'll get something like
+
+ tmp1 = a < b
+ tmp2 = Bool_closure_tbl[tmp1]
+ if (tmp2 & 7 != 0) then ... // normal tagged case
+
+but this junk won't optimise away. What we really want is just an
+inline comparison:
+
+ if (a < b) then ...
+
+So we add a special case to generate
+
+ tmp1 = a < b
+ if (tmp1 == 0) then ...
+
+and later optimisations will further improve this.
+
+We should really change all these primops to return Int# instead, that
+would make this special case go away.
-}
+
-- Note [ticket #3132]: we might be looking at a case of a lifted Id
-- that was cast to an unlifted type. The Id will always be bottom,
-- but we don't want the code generator to fall over here. If we
@@ -319,7 +367,7 @@ cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts
do { when (not reps_compatible) $
panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
; v_info <- getCgIdInfo v
- ; emit (mkAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info))
+ ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)
; _ <- bindArgsToRegs [NonVoid bndr]
; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts }
where
@@ -330,8 +378,11 @@ cgCase scrut@(StgApp v []) _ _ (PrimAlt _) _
do { mb_cc <- maybeSaveCostCentre True
; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut)
; restoreCurrentCostCentre mb_cc
- ; emit $ mkComment $ mkFastString "should be unreachable code"
- ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)}
+ ; emitComment $ mkFastString "should be unreachable code"
+ ; l <- newLabelC
+ ; emitLabel l
+ ; emit (mkBranch l)
+ }
{-
case seq# a s of v
@@ -349,7 +400,7 @@ cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr srt alt_type alts
= -- handle seq#, same return convention as vanilla 'a'.
cgCase (StgApp a []) bndr srt alt_type alts
-cgCase scrut bndr srt alt_type alts
+cgCase scrut bndr _srt alt_type alts
= -- the general case
do { up_hp_usg <- getVirtHp -- Upstream heap usage
; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
@@ -359,7 +410,7 @@ cgCase scrut bndr srt alt_type alts
| isSingleton alts = False
| up_hp_usg > 0 = False
| otherwise = True
- gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts
+ gc_plan = if gcInAlts then GcInAlts alt_regs else NoGcInAlts
; mb_cc <- maybeSaveCostCentre simple_scrut
; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut)
@@ -417,14 +468,14 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode ()
-- At this point the result of the case are in the binders
cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
- = maybeAltHeapCheck gc_plan (cgExpr rhs)
+ = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs)
cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
- = maybeAltHeapCheck gc_plan (cgExpr rhs)
+ = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs)
-- Here bndrs are *already* in scope, so don't rebind them
cgAlts gc_plan bndr (PrimAlt _) alts
- = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
+ = do { tagged_cmms <- cgAltRhss gc_plan Nothing bndr alts
; let bndr_reg = CmmLocal (idToReg bndr)
(DEFAULT,deflt) = head tagged_cmms
@@ -433,20 +484,17 @@ cgAlts gc_plan bndr (PrimAlt _) alts
tagged_cmms' = [(lit,code)
| (LitAlt lit, code) <- tagged_cmms]
- ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) }
+ ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt }
cgAlts gc_plan bndr (AlgAlt tycon) alts
- = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
-
+ = do { retry_lbl <- newLabelC
+ ; emitLabel retry_lbl -- Note [alg-alt heap checks]
+
+ ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan (Just retry_lbl)
+ bndr alts
+
; let fam_sz = tyConFamilySize tycon
bndr_reg = CmmLocal (idToReg bndr)
- mb_deflt = case tagged_cmms of
- ((DEFAULT,rhs) : _) -> Just rhs
- _other -> Nothing
- -- DEFAULT is always first, if present
-
- branches = [ (dataConTagZ con, cmm)
- | (DataAlt con, cmm) <- tagged_cmms ]
-- Is the constructor tag in the node reg?
; if isSmallFamily fam_sz
@@ -467,23 +515,68 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts
cgAlts _ _ _ _ = panic "cgAlts"
-- UbxTupAlt and PolyAlt have only one alternative
+
+-- Note [alg-alt heap check]
+--
+-- In an algebraic case with more than one alternative, we will have
+-- code like
+--
+-- L0:
+-- x = R1
+-- goto L1
+-- L1:
+-- if (x & 7 >= 2) then goto L2 else goto L3
+-- L2:
+-- Hp = Hp + 16
+-- if (Hp > HpLim) then goto L4
+-- ...
+-- L4:
+-- call gc() returns to L5
+-- L5:
+-- x = R1
+-- goto L1
+
-------------------
-cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)]
-cgAltRhss gc_plan bndr alts
+cgAlgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt]
+ -> FCode ( Maybe CmmAGraph
+ , [(ConTagZ, CmmAGraph)] )
+cgAlgAltRhss gc_plan retry_lbl bndr alts
+ = do { tagged_cmms <- cgAltRhss gc_plan retry_lbl bndr alts
+
+ ; let { mb_deflt = case tagged_cmms of
+ ((DEFAULT,rhs) : _) -> Just rhs
+ _other -> Nothing
+ -- DEFAULT is always first, if present
+
+ ; branches = [ (dataConTagZ con, cmm)
+ | (DataAlt con, cmm) <- tagged_cmms ]
+ }
+
+ ; return (mb_deflt, branches)
+ }
+
+
+-------------------
+cgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt]
+ -> FCode [(AltCon, CmmAGraph)]
+cgAltRhss gc_plan retry_lbl bndr alts
= forkAlts (map cg_alt alts)
where
base_reg = idToReg bndr
cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
cg_alt (con, bndrs, _uses, rhs)
= getCodeR $
- maybeAltHeapCheck gc_plan $
+ maybeAltHeapCheck gc_plan retry_lbl $
do { _ <- bindConArgs con base_reg bndrs
; cgExpr rhs
; return con }
-maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a
-maybeAltHeapCheck NoGcInAlts code = code
-maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code
+maybeAltHeapCheck :: GcPlan -> Maybe BlockId -> FCode a -> FCode a
+maybeAltHeapCheck NoGcInAlts _ code = code
+maybeAltHeapCheck (GcInAlts regs) mlbl code =
+ case mlbl of
+ Nothing -> altHeapCheck regs code
+ Just retry_lbl -> altHeapCheckReturnsTo regs retry_lbl code
-----------------------------------------------------------------------------
-- Tail calls
@@ -517,8 +610,8 @@ cgIdApp fun_id args
cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ()
cgLneJump blk_id lne_regs args -- Join point; discard sequel
= do { cmm_args <- getNonVoidArgAmodes args
- ; emit (mkMultiAssign lne_regs cmm_args
- <*> mkBranch blk_id) }
+ ; emitMultiAssign lne_regs cmm_args
+ ; emit (mkBranch blk_id) }
cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ()
cgTailCall fun_id fun_info args = do
@@ -529,27 +622,21 @@ cgTailCall fun_id fun_info args = do
ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
EnterIt -> ASSERT( null args ) -- Discarding arguments
- do { let fun' = CmmLoad fun (cmmExprType fun)
- ; [ret,call] <- forkAlts [
- getCode $ emitReturn [fun], -- Is tagged; no need to untag
- getCode $ do -- emit (mkAssign nodeReg fun)
- emitCall (NativeNodeCall, NativeReturn)
- (entryCode fun') [fun]] -- Not tagged
- ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) }
-
- SlowCall -> do -- A slow function call via the RTS apply routines
+ emitEnter fun
+
+ SlowCall -> do -- A slow function call via the RTS apply routines
{ tickySlowCall lf_info args
- ; emit $ mkComment $ mkFastString "slowCall"
+ ; emitComment $ mkFastString "slowCall"
; slowCall fun args }
-- A direct function call (possibly with some left-over arguments)
DirectEntry lbl arity -> do
{ tickyDirectCall arity args
; if node_points then
- do emit $ mkComment $ mkFastString "directEntry"
- emit (mkAssign nodeReg fun)
+ do emitComment $ mkFastString "directEntry"
+ emitAssign nodeReg fun
directCall lbl arity args
- else do emit $ mkComment $ mkFastString "directEntry else"
+ else do emitComment $ mkFastString "directEntry else"
directCall lbl arity args }
JumpToIt {} -> panic "cgTailCall" -- ???
@@ -561,33 +648,67 @@ cgTailCall fun_id fun_info args = do
node_points = nodeMustPointToIt lf_info
-{- Note [case on Bool]
- ~~~~~~~~~~~~~~~~~~~
-A case on a Boolean value does two things:
- 1. It looks up the Boolean in a closure table and assigns the
- result to the binder.
- 2. It branches to the True or False case through analysis
- of the closure assigned to the binder.
-But the indirection through the closure table is unnecessary
-if the assignment to the binder will be dead code (use isDeadBndr).
+emitEnter :: CmmExpr -> FCode ()
+emitEnter fun = do
+ { adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; case sequel of
+ -- For a return, we have the option of generating a tag-test or
+ -- not. If the value is tagged, we can return directly, which
+ -- is quicker than entering the value. This is a code
+ -- size/speed trade-off: when optimising for speed rather than
+ -- size we could generate the tag test.
+ --
+ -- Right now, we do what the old codegen did, and omit the tag
+ -- test, just generating an enter.
+ Return _ -> do
+ { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg
+ ; emit $ mkForeignJump NativeNodeCall entry
+ [cmmUntag fun] updfr_off
+ }
+
+ -- The result will be scrutinised in the sequel. This is where
+ -- we generate a tag-test to avoid entering the closure if
+ -- possible.
+ --
+ -- The generated code will be something like this:
+ --
+ -- R1 = fun -- copyout
+ -- if (fun & 7 != 0) goto Lcall else goto Lret
+ -- Lcall:
+ -- call [fun] returns to Lret
+ -- Lret:
+ -- fun' = R1 -- copyin
+ -- ...
+ --
+ -- Note in particular that the label Lret is used as a
+ -- destination by both the tag-test and the call. This is
+ -- becase Lret will necessarily be a proc-point, and we want to
+ -- ensure that we generate only one proc-point for this
+ -- sequence.
+ --
+ AssignTo res_regs _ -> do
+ { lret <- newLabelC
+ ; lcall <- newLabelC
+ ; let area = Young lret
+ ; let (off, copyin) = copyInOflow NativeReturn area res_regs
+ (outArgs, copyout) = copyOutOflow NativeNodeCall Call area
+ [fun] updfr_off (0,[])
+ -- refer to fun via nodeReg after the copyout, to avoid having
+ -- both live simultaneously; this sometimes enables fun to be
+ -- inlined in the RHS of the R1 assignment.
+ ; let entry = entryCode (closureInfoPtr (CmmReg nodeReg))
+ the_call = toCall entry (Just lret) updfr_off off outArgs
+ ; emit $
+ copyout <*>
+ mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*>
+ outOfLine lcall the_call <*>
+ mkLabel lret <*>
+ copyin
+ }
+ }
-The following example illustrates how badly the code turns out:
- STG:
- case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 {
- GHC.Types.False -> <true code> // sbH8 dead
- GHC.Types.True -> <false code> // sbH8 dead
- };
- Cmm:
- _s7HD::F64 = F64[_sbH7::I64 + 7]; // MidAssign
- _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64); // MidAssign
- // emitReturn // MidComment
- _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)]; // MidAssign
- _ccsX::I64 = _sbH8::I64 & 7; // MidAssign
- if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI; // LastCondBranch
-
-The assignments to _sbH8 and _ccsX are completely unnecessary.
-Instead, we should branch based on the value of _ccsW.
--}
{- Note [Better Alt Heap Checks]
If two function calls can share a return point, then they will also
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index 5bc0f7af4e..c67e0e0c95 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -22,6 +22,7 @@ import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
import StgCmmClosure
+import StgCmmLayout
import BlockId
import Cmm
@@ -45,15 +46,16 @@ import Control.Monad
-- Code generation for Foreign Calls
-----------------------------------------------------------------------------
-cgForeignCall :: [LocalReg] -- r1,r2 where to put the results
- -> [ForeignHint]
- -> ForeignCall -- the op
+-- | emit code for a foreign call, and return the results to the sequel.
+--
+cgForeignCall :: ForeignCall -- the op
-> [StgArg] -- x,y arguments
+ -> Type -- result type
-> FCode ()
--- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z )
-cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args
+cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
= do { cmm_args <- getFCallArgs stg_args
+ ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
; let ((call_args, arg_hints), cmm_target)
= case target of
StaticTarget _ _ False ->
@@ -63,7 +65,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
= case mPkgId of
Nothing -> ForeignLabelInThisPackage
Just pkgId -> ForeignLabelInPackage pkgId
- size = call_size cmm_args
+ size = call_size cmm_args
in ( unzip cmm_args
, CmmLit (CmmLabel
(mkForeignLabel lbl size labelSource IsFunction)))
@@ -71,13 +73,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
DynamicTarget -> case cmm_args of
(fn,_):rest -> (unzip rest, fn)
[] -> panic "cgForeignCall []"
- fc = ForeignConvention cconv arg_hints result_hints
+ fc = ForeignConvention cconv arg_hints res_hints
call_target = ForeignTarget cmm_target fc
- ; srt <- getSRTInfo NoSRT -- SLPJ: Not sure what SRT
- -- is right here
- -- JD: Does it matter in the new codegen?
- ; emitForeignCall safety results call_target call_args srt CmmMayReturn }
+ -- we want to emit code for the call, and then emitReturn.
+ -- However, if the sequel is AssignTo, we shortcut a little
+ -- and generate a foreign call that assigns the results
+ -- directly. Otherwise we end up generating a bunch of
+ -- useless "r = r" assignments, which are not merely annoying:
+ -- they prevent the common block elimination from working correctly
+ -- in the case of a safe foreign call.
+ -- See Note [safe foreign call convention]
+ --
+ ; sequel <- getSequel
+ ; case sequel of
+ AssignTo assign_to_these _ ->
+ do { emitForeignCall safety assign_to_these call_target
+ call_args CmmMayReturn
+ }
+
+ _something_else ->
+ do { emitForeignCall safety res_regs call_target
+ call_args CmmMayReturn
+ ; emitReturn (map (CmmReg . CmmLocal) res_regs)
+ }
+ }
where
-- in the stdcall calling convention, the symbol needs @size appended
-- to it, where size is the total number of bytes of arguments. We
@@ -88,16 +108,83 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
| otherwise = Nothing
-- ToDo: this might not be correct for 64-bit API
- arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE
+ arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg)
+ wORD_SIZE
+
+{- Note [safe foreign call convention]
+
+The simple thing to do for a safe foreign call would be the same as an
+unsafe one: just
+
+ emitForeignCall ...
+ emitReturn ...
+
+but consider what happens in this case
+
+ case foo x y z of
+ (# s, r #) -> ...
+
+The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
+as the result reg, and we generate
+
+ r = foo(x,y,z) returns to L1 -- emitForeignCall
+ L1:
+ r = r -- emitReturn
+ goto L2
+L2:
+ ...
+
+Now L1 is a proc point (by definition, it is the continuation of the
+safe foreign call). If L2 does a heap check, then L2 will also be a
+proc point.
+
+Furthermore, the stack layout algorithm has to arrange to save r
+somewhere between the call and the jump to L1, which is annoying: we
+would have to treat r differently from the other live variables, which
+have to be saved *before* the call.
+
+So we adopt a special convention for safe foreign calls: the results
+are copied out according to the NativeReturn convention by the call,
+and the continuation of the call should copyIn the results. (The
+copyOut code is actually inserted when the safe foreign call is
+lowered later). The result regs attached to the safe foreign call are
+only used temporarily to hold the results before they are copied out.
+
+We will now generate this:
+
+ r = foo(x,y,z) returns to L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+And when the safe foreign call is lowered later (see Note [lower safe
+foreign calls]) we get this:
+
+ suspendThread()
+ r = foo(x,y,z)
+ resumeThread()
+ R1 = r -- copyOut, inserted by lowerSafeForeignCall
+ jump L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+Now consider what happens if L2 does a heap check: the Adams
+optimisation kicks in and commons up L1 with the heap-check
+continuation, resulting in just one proc point instead of two. Yay!
+-}
+
emitCCall :: [(CmmFormal,ForeignHint)]
-> CmmExpr
-> [(CmmActual,ForeignHint)]
-> FCode ()
emitCCall hinted_results fn hinted_args
- = emitForeignCall PlayRisky results target args
- NoC_SRT -- No SRT b/c we PlayRisky
- CmmMayReturn
+ = emitForeignCall PlayRisky results target args CmmMayReturn
where
(args, arg_hints) = unzip hinted_args
(results, result_hints) = unzip hinted_results
@@ -107,7 +194,7 @@ emitCCall hinted_results fn hinted_args
emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
emitPrimCall res op args
- = emitForeignCall PlayRisky res (PrimTarget op) args NoC_SRT CmmMayReturn
+ = emitForeignCall PlayRisky res (PrimTarget op) args CmmMayReturn
-- alternative entry point, used by CmmParse
emitForeignCall
@@ -115,11 +202,10 @@ emitForeignCall
-> [CmmFormal] -- where to put the results
-> ForeignTarget -- the op
-> [CmmActual] -- arguments
- -> C_SRT -- the SRT of the calls continuation
-> CmmReturnInfo -- This can say "never returns"
-- only RTS procedures do this
-> FCode ()
-emitForeignCall safety results target args _srt _ret
+emitForeignCall safety results target args _ret
| not (playSafe safety) = do
let (caller_save, caller_load) = callerSaveVolatileRegs
emit caller_save
@@ -129,7 +215,9 @@ emitForeignCall safety results target args _srt _ret
| otherwise = do
updfr_off <- getUpdFrameOff
temp_target <- load_target_into_temp target
- emit $ mkSafeCall temp_target results args updfr_off (playInterruptible safety)
+ emit =<< mkSafeCall temp_target results args updfr_off
+ (playInterruptible safety)
+
{-
@@ -162,7 +250,7 @@ maybe_assign_temp e
-- expressions, which are wrong here.
-- this is a NonPtr because it only duplicates an existing
reg <- newTemp (cmmExprType e) --TODO FIXME NOW
- emit (mkAssign (CmmLocal reg) e)
+ emitAssign (CmmLocal reg) e
return (CmmReg (CmmLocal reg))
-- -----------------------------------------------------------------------------
@@ -184,12 +272,12 @@ saveThreadState =
emitSaveThreadState :: BlockId -> FCode ()
emitSaveThreadState bid = do
-- CurrentTSO->stackobj->sp = Sp;
- emit $ mkStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP)
- (CmmStackSlot (CallArea (Young bid)) (widthInBytes (typeWidth gcWord)))
+ emitStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP)
+ (CmmStackSlot (Young bid) (widthInBytes (typeWidth gcWord)))
emit closeNursery
-- and save the current cost centre stack in the TSO when profiling:
when opt_SccProfilingOn $
- emit (mkStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS)
+ emitStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS
-- CurrentNursery->free = Hp+1;
closeNursery :: CmmAGraph
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 25161722f7..611304b5e0 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -10,7 +10,7 @@ module StgCmmHeap (
getVirtHp, setVirtHp, setRealHp,
getHpRelOffset, hpRel,
- entryHeapCheck, altHeapCheck,
+ entryHeapCheck, altHeapCheck, altHeapCheckReturnsTo,
mkVirtHeapOffsets, mkVirtConstrOffsets,
mkStaticClosureFields, mkStaticClosure,
@@ -20,7 +20,6 @@ module StgCmmHeap (
#include "HsVersions.h"
-import CmmType
import StgSyn
import CLabel
import StgCmmLayout
@@ -34,6 +33,7 @@ import StgCmmEnv
import MkGraph
+import Hoopl hiding ((<*>), mkBranch)
import SMRep
import Cmm
import CmmUtils
@@ -109,7 +109,7 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
-- ALLOCATE THE OBJECT
; base <- getHpRelOffset info_offset
- ; emit (mkComment $ mkFastString "allocDynClosure")
+ ; emitComment $ mkFastString "allocDynClosure"
; emitSetDynHdr base info_ptr use_cc
; let (cmm_args, offsets) = unzip amodes_w_offsets
; hpStore base cmm_args offsets
@@ -151,9 +151,10 @@ mkStaticClosureFields
:: CmmInfoTable
-> CostCentreStack
-> CafInfo
+ -> Bool -- SRT is non-empty?
-> [CmmLit] -- Payload
-> [CmmLit] -- The full closure
-mkStaticClosureFields info_tbl ccs caf_refs payload
+mkStaticClosureFields info_tbl ccs caf_refs has_srt payload
= mkStaticClosure info_lbl ccs payload padding
static_link_field saved_info_field
where
@@ -178,8 +179,10 @@ mkStaticClosureFields info_tbl ccs caf_refs payload
| otherwise = ASSERT(null payload) [mkIntCLit 0]
static_link_field
- | is_caf || staticClosureNeedsLink info_tbl = [static_link_value]
- | otherwise = []
+ | is_caf || staticClosureNeedsLink has_srt info_tbl
+ = [static_link_value]
+ | otherwise
+ = []
saved_info_field
| is_caf = [mkIntCLit 0]
@@ -335,11 +338,12 @@ entryHeapCheck cl_info offset nodeSet arity args code
args' = map (CmmReg . CmmLocal) args
setN = case nodeSet of
- Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n)
+ Just _ -> mkNop -- No need to assign R1, it already
+ -- points to the closure
Nothing -> mkAssign nodeReg $
CmmLit (CmmLabel $ staticClosureLabel cl_info)
- {- Thunks: Set R1 = node, jump GCEnter1
+ {- Thunks: jump GCEnter1
Function (fast): Set R1 = node, jump GCFun
Function (slow): Set R1 = node, call generic_gc -}
gc_call upd = setN <*> gc_lbl upd
@@ -354,7 +358,10 @@ entryHeapCheck cl_info offset nodeSet arity args code
- GC calls, but until then this fishy code works -}
updfr_sz <- getUpdFrameOff
- heapCheck True (gc_call updfr_sz) code
+
+ loop_id <- newLabelC
+ emitLabel loop_id
+ heapCheck True (gc_call updfr_sz <*> mkBranch loop_id) code
{-
-- This code is slightly outdated now and we could easily keep the above
@@ -400,21 +407,29 @@ entryHeapCheck cl_info offset nodeSet arity args code
-}
---------------------------------------------------------------
--- A heap/stack check at in a case alternative
+-- ------------------------------------------------------------
+-- A heap/stack check in a case alternative
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
altHeapCheck regs code
+ = do loop_id <- newLabelC
+ emitLabel loop_id
+ altHeapCheckReturnsTo regs loop_id code
+
+altHeapCheckReturnsTo :: [LocalReg] -> Label -> FCode a -> FCode a
+altHeapCheckReturnsTo regs retry_lbl code
= do updfr_sz <- getUpdFrameOff
- heapCheck False (gc_call updfr_sz) code
+ gc_call_code <- gc_call updfr_sz
+ heapCheck False (gc_call_code <*> mkBranch retry_lbl) code
where
reg_exprs = map (CmmReg . CmmLocal) regs
+ -- Note [stg_gc arguments]
gc_call sp =
case rts_label regs of
- Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp
- Nothing -> mkCall generic_gc (GC, GC) [] [] sp
+ Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[])
+ Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[])
rts_label [reg]
| isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
@@ -432,6 +447,23 @@ altHeapCheck regs code
rts_label _ = Nothing
+-- Note [stg_gc arguments]
+-- It might seem that we could avoid passing the arguments to the
+-- stg_gc function, because they are already in the right registers.
+-- While this is usually the case, it isn't always. Sometimes the
+-- code generator has cleverly avoided the eval in a case, e.g. in
+-- ffi/should_run/4221.hs we found
+--
+-- case a_r1mb of z
+-- FunPtr x y -> ...
+--
+-- where a_r1mb is bound a top-level constructor, and is known to be
+-- evaluated. The codegen just assigns x, y and z, and continues;
+-- R1 is never assigned.
+--
+-- So we'll have to rely on optimisations to eliminatethese
+-- assignments where possible.
+
-- | The generic GC procedure; no params, no results
generic_gc :: CmmExpr
@@ -447,7 +479,7 @@ heapCheck checkStack do_gc code
= getHeapUsage $ \ hpHw ->
-- Emit heap checks, but be sure to do it lazily so
-- that the conditionals on hpHw don't cause a black hole
- do { emit $ do_checks checkStack hpHw do_gc
+ do { codeOnly $ do_checks checkStack hpHw do_gc
; tickyAllocHeap hpHw
; doGranAllocate hpHw
; setRealHp hpHw
@@ -456,22 +488,25 @@ heapCheck checkStack do_gc code
do_checks :: Bool -- Should we check the stack?
-> WordOff -- Heap headroom
-> CmmAGraph -- What to do on failure
- -> CmmAGraph
-do_checks checkStack alloc do_gc
- = withFreshLabel "gc" $ \ loop_id ->
- withFreshLabel "gc" $ \ gc_id ->
- mkLabel loop_id
- <*> (let hpCheck = if alloc == 0 then mkNop
- else mkAssign hpReg bump_hp <*>
- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
- in if checkStack
- then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck
- else hpCheck)
- <*> mkComment (mkFastString "outOfLine should follow:")
- <*> outOfLine (mkLabel gc_id
- <*> mkComment (mkFastString "outOfLine here")
- <*> do_gc
- <*> mkBranch loop_id)
+ -> FCode ()
+do_checks checkStack alloc do_gc = do
+ gc_id <- newLabelC
+ hp_check <- if alloc == 0
+ then return mkNop
+ else do
+ ifthen <- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
+ return (mkAssign hpReg bump_hp <*> ifthen)
+
+ if checkStack
+ then emit =<< mkCmmIfThenElse sp_oflo (mkBranch gc_id) hp_check
+ else emit hp_check
+
+ emit $ mkComment (mkFastString "outOfLine should follow:")
+
+ emitOutOfLine gc_id $
+ mkComment (mkFastString "outOfLine here") <*>
+ do_gc -- this is expected to jump back somewhere
+
-- Test for stack pointer exhaustion, then
-- bump heap pointer, and test for heap exhaustion
-- Note that we don't move the heap pointer unless the
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 86986efdfa..9593af1f50 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -15,7 +15,7 @@
module StgCmmLayout (
mkArgDescr,
- emitCall, emitReturn,
+ emitCall, emitReturn, adjustHpBackwards,
emitClosureProcAndInfoTable,
emitClosureAndInfoTable,
@@ -41,10 +41,12 @@ import StgCmmEnv
import StgCmmTicky
import StgCmmMonad
import StgCmmUtils
+import StgCmmProf
import MkGraph
import SMRep
import Cmm
+import CmmUtils
import CLabel
import StgSyn
import Id
@@ -52,6 +54,7 @@ import Name
import TyCon ( PrimRep(..) )
import BasicTypes ( RepArity )
import StaticFlags
+import Module
import Constants
import Util
@@ -63,38 +66,60 @@ import FastString
-- Call and return sequences
------------------------------------------------------------------------
-emitReturn :: [CmmExpr] -> FCode ()
--- Return multiple values to the sequel
+-- | Return multiple values to the sequel
+--
+-- If the sequel is @Return@
+--
+-- > return (x,y)
+--
+-- If the sequel is @AssignTo [p,q]@
+--
+-- > p=x; q=y;
--
--- If the sequel is Return
--- return (x,y)
--- If the sequel is AssignTo [p,q]
--- p=x; q=y;
+emitReturn :: [CmmExpr] -> FCode ()
emitReturn results
= do { sequel <- getSequel;
; updfr_off <- getUpdFrameOff
- ; emit $ mkComment $ mkFastString ("emitReturn: " ++ show sequel)
+ ; emitComment $ mkFastString ("emitReturn: " ++ show sequel)
; case sequel of
Return _ ->
do { adjustHpBackwards
; emit (mkReturnSimple results updfr_off) }
AssignTo regs adjust ->
do { if adjust then adjustHpBackwards else return ()
- ; emit (mkMultiAssign regs results) }
+ ; emitMultiAssign regs results }
}
+
+-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
+-- using the call/return convention @conv@, passing @args@, and
+-- returning the results to the current sequel.
+--
emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ()
--- (cgCall fun args) makes a call to the entry-code of 'fun',
--- passing 'args', and returning the results to the current sequel
-emitCall convs@(callConv, _) fun args
+emitCall convs fun args
+ = emitCallWithExtraStack convs fun args noExtraStack
+
+
+-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
+-- entry-code of @fun@, using the call/return convention @conv@,
+-- passing @args@, pushing some extra stack frames described by
+-- @stack@, and returning the results to the current sequel.
+--
+emitCallWithExtraStack
+ :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
+emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
= do { adjustHpBackwards
; sequel <- getSequel
; updfr_off <- getUpdFrameOff
- ; emit $ mkComment $ mkFastString ("emitCall: " ++ show sequel)
- ; case sequel of
- Return _ -> emit (mkForeignJump callConv fun args updfr_off)
- AssignTo res_regs _ -> emit (mkCall fun convs res_regs args updfr_off)
- }
+ ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel)
+ ; case sequel of
+ Return _ ->
+ emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack
+ AssignTo res_regs _ -> do
+ emit =<< mkCall fun convs res_regs args updfr_off extra_stack
+ }
+
adjustHpBackwards :: FCode ()
-- This function adjusts and heap pointers just before a tail call or
@@ -127,59 +152,137 @@ adjustHpBackwards
-- Making calls: directCall and slowCall
-------------------------------------------------------------------------
+-- General plan is:
+-- - we'll make *one* fast call, either to the function itself
+-- (directCall) or to stg_ap_<pat>_fast (slowCall)
+-- Any left-over arguments will be pushed on the stack,
+--
+-- e.g. Sp[old+8] = arg1
+-- Sp[old+16] = arg2
+-- Sp[old+32] = stg_ap_pp_info
+-- R2 = arg3
+-- R3 = arg4
+-- call f() return to Nothing updfr_off: 32
+
+
directCall :: CLabel -> RepArity -> [StgArg] -> FCode ()
-- (directCall f n args)
-- calls f(arg1, ..., argn), and applies the result to the remaining args
-- The function f has arity n, and there are guaranteed at least n args
-- Both arity and args include void args
directCall lbl arity stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) }
+ = do { argreps <- getArgRepsAmodes stg_args
+ ; direct_call "directCall" lbl arity argreps }
+
slowCall :: CmmExpr -> [StgArg] -> FCode ()
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; slow_call fun cmm_args (argsReps stg_args) }
+ = do { dflags <- getDynFlags
+ ; argsreps <- getArgRepsAmodes stg_args
+ ; let (rts_fun, arity) = slowCallPattern (map fst argsreps)
+ ; call <- getCode $ direct_call "slow_call"
+ (mkRtsApFastLabel rts_fun) arity argsreps
+ ; emitComment $ mkFastString ("slow_call for " ++
+ showSDoc dflags (ppr fun) ++
+ " with pat " ++ unpackFS rts_fun)
+ ; emit (mkAssign nodeReg fun <*> call)
+ }
+
--------------
-direct_call :: String -> CLabel -> RepArity -> [CmmExpr] -> [ArgRep] -> FCode ()
--- NB1: (length args) may be less than (length reps), because
--- the args exclude the void ones
--- NB2: 'arity' refers to the *reps*
-direct_call caller lbl arity args reps
- | debugIsOn && arity > length reps -- Too few args
+direct_call :: String -> CLabel -> RepArity -> [(ArgRep,Maybe CmmExpr)] -> FCode ()
+direct_call caller lbl arity args
+ | debugIsOn && arity > length args -- Too few args
= do -- Caller should ensure that there enough args!
- pprPanic "direct_call" (text caller <+> ppr arity
- <+> ppr lbl <+> ppr (length reps)
- <+> ppr args <+> ppr reps )
-
- | null rest_reps -- Precisely the right number of arguments
- = emitCall (NativeDirectCall, NativeReturn) target args
-
- | otherwise -- Over-saturated call
- = ASSERT( arity == length initial_reps )
- do { pap_id <- newTemp gcWord
- ; withSequel (AssignTo [pap_id] True)
- (emitCall (NativeDirectCall, NativeReturn) target fast_args)
- ; slow_call (CmmReg (CmmLocal pap_id))
- rest_args rest_reps }
+ pprPanic "direct_call" $
+ text caller <+> ppr arity <+>
+ ppr lbl <+> ppr (length args) <+>
+ ppr (map snd args) <+> ppr (map fst args)
+
+ | null rest_args -- Precisely the right number of arguments
+ = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args)
+
+ | otherwise -- Note [over-saturated calls]
+ = emitCallWithExtraStack (NativeDirectCall, NativeReturn)
+ target (nonVArgs fast_args) (mkStkOffsets stack_args)
where
target = CmmLit (CmmLabel lbl)
- (initial_reps, rest_reps) = splitAt arity reps
- arg_arity = count isNonV initial_reps
- (fast_args, rest_args) = splitAt arg_arity args
+ (fast_args, rest_args) = splitAt arity args
+ stack_args = slowArgs rest_args
---------------
-slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode ()
-slow_call fun args reps
- = do dflags <- getDynFlags
- call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps
- emit $ mkComment $ mkFastString ("slow_call for " ++ showSDoc dflags (ppr fun) ++
- " with pat " ++ unpackFS rts_fun)
- emit (mkAssign nodeReg fun <*> call)
+
+-- When constructing calls, it is easier to keep the ArgReps and the
+-- CmmExprs zipped together. However, a void argument has no
+-- representation, so we need to use Maybe CmmExpr (the alternative of
+-- using zeroCLit or even undefined would work, but would be ugly).
+--
+getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
+getArgRepsAmodes = mapM getArgRepAmode
+ where getArgRepAmode arg
+ | V <- rep = return (V, Nothing)
+ | otherwise = do expr <- getArgAmode (NonVoid arg)
+ return (rep, Just expr)
+ where rep = toArgRep (argPrimRep arg)
+
+nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
+nonVArgs [] = []
+nonVArgs ((_,Nothing) : args) = nonVArgs args
+nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
+
+{-
+Note [over-saturated calls]
+
+The natural thing to do for an over-saturated call would be to call
+the function with the correct number of arguments, and then apply the
+remaining arguments to the value returned, e.g.
+
+ f a b c d (where f has arity 2)
+ -->
+ r = call f(a,b)
+ call r(c,d)
+
+but this entails
+ - saving c and d on the stack
+ - making a continuation info table
+ - at the continuation, loading c and d off the stack into regs
+ - finally, call r
+
+Note that since there are a fixed number of different r's
+(e.g. stg_ap_pp_fast), we can also pre-compile continuations
+that correspond to each of them, rather than generating a fresh
+one for each over-saturated call.
+
+Not only does this generate much less code, it is faster too. We will
+generate something like:
+
+Sp[old+16] = c
+Sp[old+24] = d
+Sp[old+32] = stg_ap_pp_info
+call f(a,b) -- usual calling convention
+
+For the purposes of the CmmCall node, we count this extra stack as
+just more arguments that we are passing on the stack (cml_args).
+-}
+
+-- | 'slowArgs' takes a list of function arguments and prepares them for
+-- pushing on the stack for "extra" arguments to a function which requires
+-- fewer arguments than we currently have.
+slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
+slowArgs [] = []
+slowArgs args -- careful: reps contains voids (V), but args does not
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args
+ | otherwise = this_pat ++ slowArgs rest_args
where
- (rts_fun, arity) = slowCallPattern reps
+ (arg_pat, n) = slowCallPattern (map fst args)
+ (call_args, rest_args) = splitAt n args
+
+ stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
+ this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
+ save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
+ save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
+
+
-- These cases were found to cover about 99% of all slow calls:
slowCallPattern :: [ArgRep] -> (FastString, RepArity)
@@ -202,6 +305,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-------------------------------------------------------------------------
+-- Fix the byte-offsets of a bunch of things to push on the stack
+
+-- This is used for pushing slow-call continuations.
+-- See Note [over-saturated calls].
+
+mkStkOffsets
+ :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for
+ -> ( ByteOff -- OUTPUTS: Topmost allocated word
+ , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
+mkStkOffsets things
+ = loop 0 [] (reverse things)
+ where
+ loop offset offs [] = (offset,offs)
+ loop offset offs ((_,Nothing):things) = loop offset offs things
+ -- ignore Void arguments
+ loop offset offs ((rep,Just thing):things)
+ = loop thing_off ((thing, thing_off):offs) things
+ where
+ thing_off = offset + argRepSizeW rep * wORD_SIZE
+ -- offset of thing is offset+size, because we're
+ -- growing the stack *downwards* as the offsets increase.
+
+
+-------------------------------------------------------------------------
-- Classifying arguments: ArgRep
-------------------------------------------------------------------------
@@ -237,10 +364,7 @@ isNonV :: ArgRep -> Bool
isNonV V = False
isNonV _ = True
-argsReps :: [StgArg] -> [ArgRep]
-argsReps = map (toArgRep . argPrimRep)
-
-argRepSizeW :: ArgRep -> WordOff -- Size in words
+argRepSizeW :: ArgRep -> WordOff -- Size in words
argRepSizeW N = 1
argRepSizeW P = 1
argRepSizeW F = 1
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index 4eea38e22c..cc9919a4a0 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE GADTs #-}
-----------------------------------------------------------------------------
--
-- Monad for Stg to C-- code generation
@@ -16,16 +17,21 @@
module StgCmmMonad (
FCode, -- type
- initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
+ initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
returnFC, fixC, fixC_, nopC, whenC,
newUnique, newUniqSupply,
+ newLabelC, emitLabel,
+
emit, emitDecl, emitProc, emitProcWithConvention, emitSimpleProc,
+ emitOutOfLine, emitAssign, emitStore, emitComment,
getCmm, cgStmtsToBlocks,
getCodeR, getCode, getHeapUsage,
- forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly,
+ mkCmmIfThenElse, mkCmmIfThen, mkCall, mkCmmCall, mkSafeCall,
+
+ forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly,
ConTagZ,
@@ -69,12 +75,12 @@ import VarEnv
import OrdList
import Unique
import UniqSupply
-import FastString(sLit)
+import FastString
import Outputable
import Control.Monad
import Data.List
-import Prelude hiding( sequence )
+import Prelude hiding( sequence, succ )
import qualified Prelude( sequence )
infixr 9 `thenC` -- Right-associative!
@@ -95,12 +101,12 @@ instance Monad FCode where
{-# INLINE thenFC #-}
{-# INLINE returnFC #-}
-initC :: DynFlags -> Module -> FCode a -> IO a
-initC dflags mod (FCode code)
- = do { uniqs <- mkSplitUniqSupply 'c'
- ; case code (initCgInfoDown dflags mod) (initCgState uniqs) of
- (res, _) -> return res
- }
+initC :: IO CgState
+initC = do { uniqs <- mkSplitUniqSupply 'c'
+ ; return (initCgState uniqs) }
+
+runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState)
+runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st
returnFC :: a -> FCode a
returnFC val = FCode (\_info_down state -> (val, state))
@@ -270,6 +276,8 @@ data HeapUsage =
type VirtualHpOffset = WordOff
+
+
initCgState :: UniqSupply -> CgState
initCgState uniqs
= MkCgState { cgs_stmts = mkNop, cgs_tops = nilOL,
@@ -308,7 +316,6 @@ initHpUsage = HeapUsage { virtHp = 0, realHp = 0 }
maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage
hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw }
-
--------------------------------------------------------
-- Operators for getting and setting the state and "info_down".
--------------------------------------------------------
@@ -591,6 +598,33 @@ getHeapUsage fcode
-- ----------------------------------------------------------------------------
-- Combinators for emitting code
+emitCgStmt :: CgStmt -> FCode ()
+emitCgStmt stmt
+ = do { state <- getState
+ ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt }
+ }
+
+emitLabel :: BlockId -> FCode ()
+emitLabel id = emitCgStmt (CgLabel id)
+
+emitComment :: FastString -> FCode ()
+#if 0 /* def DEBUG */
+emitComment s = emitCgStmt (CgStmt (CmmComment s))
+#else
+emitComment _ = return ()
+#endif
+
+emitAssign :: CmmReg -> CmmExpr -> FCode ()
+emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r))
+
+emitStore :: CmmExpr -> CmmExpr -> FCode ()
+emitStore l r = emitCgStmt (CgStmt (CmmStore l r))
+
+
+newLabelC :: FCode BlockId
+newLabelC = do { u <- newUnique
+ ; return $ mkBlockId u }
+
emit :: CmmAGraph -> FCode ()
emit ag
= do { state <- getState
@@ -601,6 +635,9 @@ emitDecl decl
= do { state <- getState
; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } }
+emitOutOfLine :: BlockId -> CmmAGraph -> FCode ()
+emitOutOfLine l stmts = emitCgStmt (CgFork l stmts)
+
emitProcWithConvention :: Convention -> CmmInfoTable -> CLabel -> [CmmFormal] ->
CmmAGraph -> FCode ()
emitProcWithConvention conv info lbl args blocks
@@ -629,6 +666,55 @@ getCmm code
; setState $ state2 { cgs_tops = cgs_tops state1 }
; return (fromOL (cgs_tops state2)) }
+
+mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph
+mkCmmIfThenElse e tbranch fbranch = do
+ endif <- newLabelC
+ tid <- newLabelC
+ fid <- newLabelC
+ return $ mkCbranch e tid fid <*>
+ mkLabel tid <*> tbranch <*> mkBranch endif <*>
+ mkLabel fid <*> fbranch <*> mkLabel endif
+
+mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph
+mkCmmIfThen e tbranch = do
+ endif <- newLabelC
+ tid <- newLabelC
+ return $ mkCbranch e tid endif <*>
+ mkLabel tid <*> tbranch <*> mkLabel endif
+
+
+mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual]
+ -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph
+mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do
+ k <- newLabelC
+ let area = Young k
+ (off, copyin) = copyInOflow retConv area results
+ copyout = mkCallReturnsTo f callConv actuals k off updfr_off extra_stack
+ return (copyout <*> mkLabel k <*> copyin)
+
+mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset
+ -> FCode CmmAGraph
+mkCmmCall f results actuals updfr_off
+ = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[])
+
+
+mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
+ -> UpdFrameOffset -> Bool
+ -> FCode CmmAGraph
+mkSafeCall t fs as upd i = do
+ k <- newLabelC
+ let (_off, copyout) = copyInOflow NativeReturn (Young k) fs
+ -- see Note [safe foreign call convention]
+ return
+ ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth))
+ (CmmLit (CmmBlock k))
+ <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k
+ , updfr=upd, intrbl=i })
+ <*> mkLabel k
+ <*> copyout
+ )
+
-- ----------------------------------------------------------------------------
-- CgStmts
@@ -640,4 +726,3 @@ cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph
cgStmtsToBlocks stmts
= do { us <- newUniqSupply
; return (initUs_ us (lgraphOfAGraph stmts)) }
-
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index efa234b5a6..bd783a3b30 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -14,7 +14,9 @@
-- for details
module StgCmmPrim (
- cgOpApp
+ cgOpApp,
+ cgPrimOp -- internal(ish), used by cgCase to get code for a
+ -- comparison without also turning it into a Bool.
) where
#include "HsVersions.h"
@@ -67,14 +69,9 @@ cgOpApp :: StgOp -- The op
-- Foreign calls
cgOpApp (StgFCallOp fcall _) stg_args res_ty
- = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
- -- Choose result regs r1, r2
- -- Note [Foreign call results]
- ; cgForeignCall res_regs res_hints fcall stg_args
- -- r1, r2 = foo( x, y )
- ; emitReturn (map (CmmReg . CmmLocal) res_regs) }
- -- return (r1, r2)
-
+ = cgForeignCall fcall stg_args res_ty
+ -- Note [Foreign call results]
+
-- tagToEnum# is special: we need to pull the constructor
-- out of the table, and perform an appropriate return.
@@ -229,23 +226,23 @@ emitPrimOp [res] SparkOp [arg]
[(tmp2,NoHint)]
(CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
[(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
- emit (mkAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
+ emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
emitPrimOp [res] GetCCSOfOp [arg]
- = emit (mkAssign (CmmLocal res) val)
+ = emitAssign (CmmLocal res) val
where
val | opt_SccProfilingOn = costCentreFrom (cmmUntag arg)
| otherwise = CmmLit zeroCLit
emitPrimOp [res] GetCurrentCCSOp [_dummy_arg]
- = emit (mkAssign (CmmLocal res) curCCS)
+ = emitAssign (CmmLocal res) curCCS
emitPrimOp [res] ReadMutVarOp [mutv]
- = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord))
+ = emitAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)
emitPrimOp [] WriteMutVarOp [mutv,var]
= do
- emit (mkStore (cmmOffsetW mutv fixedHdrSize) var)
+ emitStore (cmmOffsetW mutv fixedHdrSize) var
emitCCall
[{-no results-}]
(CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
@@ -269,32 +266,32 @@ emitPrimOp res@[] TouchOp args@[_arg]
-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
emitPrimOp [res] ByteArrayContents_Char [arg]
- = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize))
+ = emitAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)
-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
emitPrimOp [res] StableNameToIntOp [arg]
- = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord))
+ = emitAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)
-- #define eqStableNamezh(r,sn1,sn2) \
-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
emitPrimOp [res] EqStableNameOp [arg1,arg2]
- = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [
+ = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [
cmmLoadIndexW arg1 fixedHdrSize bWord,
cmmLoadIndexW arg2 fixedHdrSize bWord
- ]))
+ ])
emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
- = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2]))
+ = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])
-- #define addrToHValuezh(r,a) r=(P_)a
emitPrimOp [res] AddrToAnyOp [arg]
- = emit (mkAssign (CmmLocal res) arg)
+ = emitAssign (CmmLocal res) arg
-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
-- Note: argument may be tagged!
emitPrimOp [res] DataToTagOp [arg]
- = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg)))
+ = emitAssign (CmmLocal res) (getConstrTag (cmmUntag arg))
{- Freezing arrays-of-ptrs requires changing an info table, for the
benefit of the generational collector. It needs to scavenge mutable
@@ -317,7 +314,7 @@ emitPrimOp [res] UnsafeFreezeArrayArrayOp [arg]
-- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
emitPrimOp [res] UnsafeFreezeByteArrayOp [arg]
- = emit (mkAssign (CmmLocal res) arg)
+ = emitAssign (CmmLocal res) arg
-- Copying pointer arrays
@@ -497,11 +494,11 @@ emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth
-- The rest just translate straightforwardly
emitPrimOp [res] op [arg]
| nopOp op
- = emit (mkAssign (CmmLocal res) arg)
+ = emitAssign (CmmLocal res) arg
| Just (mop,rep) <- narrowOp op
- = emit (mkAssign (CmmLocal res) $
- CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]])
+ = emitAssign (CmmLocal res) $
+ CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]
emitPrimOp r@[res] op args
| Just prim <- callishOp op
@@ -746,15 +743,15 @@ loadArrPtrsSize addr = CmmLoad (cmmOffsetB addr off) bWord
mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType
-> LocalReg -> CmmExpr -> CmmExpr -> FCode ()
mkBasicIndexedRead off Nothing read_rep res base idx
- = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx))
+ = emitAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)
mkBasicIndexedRead off (Just cast) read_rep res base idx
- = emit (mkAssign (CmmLocal res) (CmmMachOp cast [
- cmmLoadIndexOffExpr off read_rep base idx]))
+ = emitAssign (CmmLocal res) (CmmMachOp cast [
+ cmmLoadIndexOffExpr off read_rep base idx])
mkBasicIndexedWrite :: ByteOff -> Maybe MachOp
-> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
mkBasicIndexedWrite off Nothing base idx val
- = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val)
+ = emitStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val
mkBasicIndexedWrite off (Just cast) base idx val
= mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val])
@@ -805,7 +802,7 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit 1)),
getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit 1))
]
- emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
+ emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode ())
@@ -875,7 +872,7 @@ doCopyMutableArrayOp = emitCopyArray copy
getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)),
getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE))
]
- emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
+ emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode ())
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index 6a53317385..9ff4d0be07 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -103,7 +103,7 @@ initUpdFrameProf :: CmmExpr -> FCode ()
-- Initialise the profiling field of an update frame
initUpdFrameProf frame_amode
= ifProfiling $ -- frame->header.prof.ccs = CCCS
- emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS)
+ emitStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS
-- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
-- is unnecessary because it is not used anyhow.
@@ -143,7 +143,7 @@ saveCurrentCostCentre
= return Nothing
| otherwise
= do { local_cc <- newTemp ccType
- ; emit (mkAssign (CmmLocal local_cc) curCCS)
+ ; emitAssign (CmmLocal local_cc) curCCS
; return (Just local_cc) }
restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
@@ -338,9 +338,9 @@ ldvEnter cl_ptr
-- if (era > 0) {
-- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
-- era | LDV_STATE_USE }
- emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
+ emit =<< mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
(mkStore ldv_wd new_ldv_wd)
- mkNop)
+ mkNop
where
-- don't forget to substract node's tag
ldv_wd = ldvWord cl_ptr
diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs
index d0432315ab..698bf32709 100644
--- a/compiler/codeGen/StgCmmTicky.hs
+++ b/compiler/codeGen/StgCmmTicky.hs
@@ -180,7 +180,7 @@ registerTickyCtr :: CLabel -> FCode ()
-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */
-- f_ct.registeredp = 1 }
registerTickyCtr ctr_lbl
- = emit (mkCmmIfThen test (catAGraphs register_stmts))
+ = emit =<< mkCmmIfThen test (catAGraphs register_stmts)
where
-- krc: code generator doesn't handle Not, so we test for Eq 0 instead
test = CmmMachOp (MO_Eq wordWidth)
@@ -352,7 +352,7 @@ bumpHistogram _lbl _n
bumpHistogramE :: LitString -> CmmExpr -> FCode ()
bumpHistogramE lbl n
= do t <- newTemp cLong
- emit (mkAssign (CmmLocal t) n)
+ emitAssign (CmmLocal t) n
emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight])
(mkAssign (CmmLocal t) eight))
emit (addToMem cLong
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index bb4a653c05..273e59b0b5 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -18,12 +18,11 @@ module StgCmmUtils (
emitDataLits, mkDataLits,
emitRODataLits, mkRODataLits,
emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, emitRtsCallGen,
- assignTemp, newTemp, withTemp,
+ assignTemp, newTemp,
newUnboxedTupleRegs,
- mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch,
- emitSwitch,
+ emitMultiAssign, emitCmmLitSwitch, emitSwitch,
tagToClosure, mkTaggedObjectLoad,
@@ -72,6 +71,7 @@ import Module
import Literal
import Digraph
import ListSetOps
+import VarSet
import Util
import Unique
import DynFlags
@@ -204,14 +204,14 @@ emitRtsCallGen
emitRtsCallGen res pkg fun args _vols safe
= do { updfr_off <- getUpdFrameOff
; emit caller_save
- ; emit $ call updfr_off
+ ; call updfr_off
; emit caller_load }
where
call updfr_off =
if safe then
- mkCmmCall fun_expr res' args' updfr_off
+ emit =<< mkCmmCall fun_expr res' args' updfr_off
else
- mkUnsafeCall (ForeignTarget fun_expr
+ emit $ mkUnsafeCall (ForeignTarget fun_expr
(ForeignConvention CCallConv arg_hints res_hints)) res' args'
(args', arg_hints) = unzip args
(res', res_hints) = unzip res
@@ -441,7 +441,7 @@ assignTemp :: CmmExpr -> FCode LocalReg
assignTemp (CmmReg (CmmLocal reg)) = return reg
assignTemp e = do { uniq <- newUnique
; let reg = LocalReg uniq (cmmExprType e)
- ; emit (mkAssign (CmmLocal reg) e)
+ ; emitAssign (CmmLocal reg) e
; return reg }
newTemp :: CmmType -> FCode LocalReg
@@ -471,10 +471,10 @@ newUnboxedTupleRegs res_ty
-------------------------------------------------------------------------
--- mkMultiAssign
+-- emitMultiAssign
-------------------------------------------------------------------------
-mkMultiAssign :: [LocalReg] -> [CmmExpr] -> CmmAGraph
+emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode ()
-- Emit code to perform the assignments in the
-- input simultaneously, using temporary variables when necessary.
@@ -489,14 +489,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e
-- s1 assigns to something s2 uses
-- that is, if s1 should *follow* s2 in the final order
-mkMultiAssign [] [] = mkNop
-mkMultiAssign [reg] [rhs] = mkAssign (CmmLocal reg) rhs
-mkMultiAssign regs rhss = ASSERT( equalLength regs rhss )
- unscramble ([1..] `zip` (regs `zip` rhss))
+emitMultiAssign [] [] = return ()
+emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs
+emitMultiAssign regs rhss = ASSERT( equalLength regs rhss )
+ unscramble ([1..] `zip` (regs `zip` rhss))
-unscramble :: [Vrtx] -> CmmAGraph
-unscramble vertices
- = catAGraphs (map do_component components)
+unscramble :: [Vrtx] -> FCode ()
+unscramble vertices = mapM_ do_component components
where
edges :: [ (Vrtx, Key, [Key]) ]
edges = [ (vertex, key1, edges_from stmt1)
@@ -511,19 +510,19 @@ unscramble vertices
-- do_components deal with one strongly-connected component
-- Not cyclic, or singleton? Just do it
- do_component :: SCC Vrtx -> CmmAGraph
- do_component (AcyclicSCC (_,stmt)) = mk_graph stmt
+ do_component :: SCC Vrtx -> FCode ()
+ do_component (AcyclicSCC (_,stmt)) = mk_graph stmt
do_component (CyclicSCC []) = panic "do_component"
do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt
-- Cyclic? Then go via temporaries. Pick one to
-- break the loop and try again with the rest.
- do_component (CyclicSCC ((_,first_stmt) : rest))
- = withUnique $ \u ->
+ do_component (CyclicSCC ((_,first_stmt) : rest)) = do
+ u <- newUnique
let (to_tmp, from_tmp) = split u first_stmt
- in mk_graph to_tmp
- <*> unscramble rest
- <*> mk_graph from_tmp
+ mk_graph to_tmp
+ unscramble rest
+ mk_graph from_tmp
split :: Unique -> Stmt -> (Stmt, Stmt)
split uniq (reg, rhs)
@@ -532,8 +531,8 @@ unscramble vertices
rep = cmmExprType rhs
tmp = LocalReg uniq rep
- mk_graph :: Stmt -> CmmAGraph
- mk_graph (reg, rhs) = mkAssign (CmmLocal reg) rhs
+ mk_graph :: Stmt -> FCode ()
+ mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs
mustFollow :: Stmt -> Stmt -> Bool
(reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs
@@ -551,7 +550,7 @@ emitSwitch :: CmmExpr -- Tag to switch on
-> FCode ()
emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
= do { dflags <- getDynFlags
- ; emit (mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag) }
+ ; mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag }
where
via_C dflags | HscC <- hscTarget dflags = True
| otherwise = False
@@ -563,38 +562,40 @@ mkCmmSwitch :: Bool -- True <=> never generate a conditional tree
-> Maybe CmmAGraph -- Default branch (if any)
-> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
-- outside this range is undefined
- -> CmmAGraph
+ -> FCode ()
-- First, two rather common cases in which there is no work to do
-mkCmmSwitch _ _ [] (Just code) _ _ = code
-mkCmmSwitch _ _ [(_,code)] Nothing _ _ = code
+mkCmmSwitch _ _ [] (Just code) _ _ = emit code
+mkCmmSwitch _ _ [(_,code)] Nothing _ _ = emit code
-- Right, off we go
-mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag
- = withFreshLabel "switch join" $ \ join_lbl ->
- label_default join_lbl mb_deflt $ \ mb_deflt ->
- label_branches join_lbl branches $ \ branches ->
- assignTemp' tag_expr $ \tag_expr' ->
+mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag = do
+ join_lbl <- newLabelC
+ mb_deflt_lbl <- label_default join_lbl mb_deflt
+ branches_lbls <- label_branches join_lbl branches
+ tag_expr' <- assignTemp' tag_expr
- mk_switch tag_expr' (sortBy (comparing fst) branches) mb_deflt
- lo_tag hi_tag via_C
- -- Sort the branches before calling mk_switch
- <*> mkLabel join_lbl
+ emit =<< mk_switch tag_expr' (sortBy (comparing fst) branches_lbls)
+ mb_deflt_lbl lo_tag hi_tag via_C
+
+ -- Sort the branches before calling mk_switch
+
+ emitLabel join_lbl
mk_switch :: CmmExpr -> [(ConTagZ, BlockId)]
-> Maybe BlockId
-> ConTagZ -> ConTagZ -> Bool
- -> CmmAGraph
+ -> FCode CmmAGraph
-- SINGLETON TAG RANGE: no case analysis to do
mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C
| lo_tag == hi_tag
= ASSERT( tag == lo_tag )
- mkBranch lbl
+ return (mkBranch lbl)
-- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _
- = mkBranch lbl
+ = return (mkBranch lbl)
-- The simplifier might have eliminated a case
-- so we may have e.g. case xs of
-- [] -> e
@@ -603,7 +604,7 @@ mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _
-- SINGLETON BRANCH: one equality check to do
mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _
- = mkCbranch cond deflt lbl
+ = return (mkCbranch cond deflt lbl)
where
cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
-- We have lo_tag < hi_tag, but there's only one branch,
@@ -636,30 +637,34 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
arms :: [Maybe BlockId]
arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
in
- mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
+ return (mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms)
-- if we can knock off a bunch of default cases with one if, then do so
| Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
- = mkCmmIfThenElse
+ = do stmts <- mk_switch tag_expr branches mb_deflt
+ lowest_branch hi_tag via_C
+ mkCmmIfThenElse
(cmmULtWord tag_expr (CmmLit (mkIntCLit lowest_branch)))
(mkBranch deflt)
- (mk_switch tag_expr branches mb_deflt
- lowest_branch hi_tag via_C)
+ stmts
| Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
- = mkCmmIfThenElse
+ = do stmts <- mk_switch tag_expr branches mb_deflt
+ lo_tag highest_branch via_C
+ mkCmmIfThenElse
(cmmUGtWord tag_expr (CmmLit (mkIntCLit highest_branch)))
(mkBranch deflt)
- (mk_switch tag_expr branches mb_deflt
- lo_tag highest_branch via_C)
+ stmts
| otherwise -- Use an if-tree
- = mkCmmIfThenElse
+ = do lo_stmts <- mk_switch tag_expr lo_branches mb_deflt
+ lo_tag (mid_tag-1) via_C
+ hi_stmts <- mk_switch tag_expr hi_branches mb_deflt
+ mid_tag hi_tag via_C
+ mkCmmIfThenElse
(cmmUGeWord tag_expr (CmmLit (mkIntCLit mid_tag)))
- (mk_switch tag_expr hi_branches mb_deflt
- mid_tag hi_tag via_C)
- (mk_switch tag_expr lo_branches mb_deflt
- lo_tag (mid_tag-1) via_C)
+ hi_stmts
+ lo_stmts
-- we test (e >= mid_tag) rather than (e < mid_tag), because
-- the former works better when e is a comparison, and there
-- are two tags 0 & 1 (mid_tag == 1). In this case, the code
@@ -714,30 +719,31 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
is_lo (t,_) = t < mid_tag
--------------
-mkCmmLitSwitch :: CmmExpr -- Tag to switch on
+emitCmmLitSwitch :: CmmExpr -- Tag to switch on
-> [(Literal, CmmAGraph)] -- Tagged branches
-> CmmAGraph -- Default branch (always)
- -> CmmAGraph -- Emit the code
+ -> FCode () -- Emit the code
-- Used for general literals, whose size might not be a word,
-- where there is always a default case, and where we don't know
-- the range of values for certain. For simplicity we always generate a tree.
--
-- ToDo: for integers we could do better here, perhaps by generalising
-- mk_switch and using that. --SDM 15/09/2004
-mkCmmLitSwitch _scrut [] deflt = deflt
-mkCmmLitSwitch scrut branches deflt
- = assignTemp' scrut $ \ scrut' ->
- withFreshLabel "switch join" $ \ join_lbl ->
- label_code join_lbl deflt $ \ deflt ->
- label_branches join_lbl branches $ \ branches ->
- mk_lit_switch scrut' deflt (sortBy (comparing fst) branches)
- <*> mkLabel join_lbl
+emitCmmLitSwitch _scrut [] deflt = emit deflt
+emitCmmLitSwitch scrut branches deflt = do
+ scrut' <- assignTemp' scrut
+ join_lbl <- newLabelC
+ deflt_lbl <- label_code join_lbl deflt
+ branches_lbls <- label_branches join_lbl branches
+ emit =<< mk_lit_switch scrut' deflt_lbl
+ (sortBy (comparing fst) branches_lbls)
+ emitLabel join_lbl
mk_lit_switch :: CmmExpr -> BlockId
-> [(Literal,BlockId)]
- -> CmmAGraph
+ -> FCode CmmAGraph
mk_lit_switch scrut deflt [(lit,blk)]
- = mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk
+ = return (mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk)
where
cmm_lit = mkSimpleLit lit
cmm_ty = cmmLitType cmm_lit
@@ -745,9 +751,9 @@ mk_lit_switch scrut deflt [(lit,blk)]
ne = if isFloatType cmm_ty then MO_F_Ne rep else MO_Ne rep
mk_lit_switch scrut deflt_blk_id branches
- = mkCmmIfThenElse cond
- (mk_lit_switch scrut deflt_blk_id lo_branches)
- (mk_lit_switch scrut deflt_blk_id hi_branches)
+ = do lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
+ hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
+ mkCmmIfThenElse cond lo_blk hi_blk
where
n_branches = length branches
(mid_lit,_) = branches !! (n_branches `div` 2)
@@ -761,49 +767,42 @@ mk_lit_switch scrut deflt_blk_id branches
--------------
-label_default :: BlockId -> Maybe CmmAGraph
- -> (Maybe BlockId -> CmmAGraph)
- -> CmmAGraph
-label_default _ Nothing thing_inside
- = thing_inside Nothing
-label_default join_lbl (Just code) thing_inside
- = label_code join_lbl code $ \ lbl ->
- thing_inside (Just lbl)
+label_default :: BlockId -> Maybe CmmAGraph -> FCode (Maybe BlockId)
+label_default _ Nothing
+ = return Nothing
+label_default join_lbl (Just code)
+ = do lbl <- label_code join_lbl code
+ return (Just lbl)
--------------
-label_branches :: BlockId -> [(a,CmmAGraph)]
- -> ([(a,BlockId)] -> CmmAGraph)
- -> CmmAGraph
-label_branches _join_lbl [] thing_inside
- = thing_inside []
-label_branches join_lbl ((tag,code):branches) thing_inside
- = label_code join_lbl code $ \ lbl ->
- label_branches join_lbl branches $ \ branches' ->
- thing_inside ((tag,lbl):branches')
+label_branches :: BlockId -> [(a,CmmAGraph)] -> FCode [(a,BlockId)]
+label_branches _join_lbl []
+ = return []
+label_branches join_lbl ((tag,code):branches)
+ = do lbl <- label_code join_lbl code
+ branches' <- label_branches join_lbl branches
+ return ((tag,lbl):branches')
--------------
-label_code :: BlockId -> CmmAGraph -> (BlockId -> CmmAGraph) -> CmmAGraph
--- (label_code J code fun)
+label_code :: BlockId -> CmmAGraph -> FCode BlockId
+-- label_code J code
-- generates
--- [L: code; goto J] fun L
-label_code join_lbl code thing_inside
- = withFreshLabel "switch" $ \lbl ->
- outOfLine (mkLabel lbl <*> code <*> mkBranch join_lbl)
- <*> thing_inside lbl
-
+-- [L: code; goto J]
+-- and returns L
+label_code join_lbl code = do
+ lbl <- newLabelC
+ emitOutOfLine lbl (code <*> mkBranch join_lbl)
+ return lbl
--------------
-assignTemp' :: CmmExpr -> (CmmExpr -> CmmAGraph) -> CmmAGraph
-assignTemp' e thing_inside
- | isTrivialCmmExpr e = thing_inside e
- | otherwise = withTemp (cmmExprType e) $ \ lreg ->
- let reg = CmmLocal lreg in
- mkAssign reg e <*> thing_inside (CmmReg reg)
-
-withTemp :: CmmType -> (LocalReg -> CmmAGraph) -> CmmAGraph
-withTemp rep thing_inside
- = withUnique $ \uniq -> thing_inside (LocalReg uniq rep)
-
+assignTemp' :: CmmExpr -> FCode CmmExpr
+assignTemp' e
+ | isTrivialCmmExpr e = return e
+ | otherwise = do
+ lreg <- newTemp (cmmExprType e)
+ let reg = CmmLocal lreg
+ emitAssign reg e
+ return (CmmReg reg)
-------------------------------------------------------------------------
--
@@ -811,36 +810,13 @@ withTemp rep thing_inside
--
-------------------------------------------------------------------------
--- There is just one SRT for each top level binding; all the nested
--- bindings use sub-sections of this SRT. The label is passed down to
--- the nested bindings via the monad.
-
-getSRTInfo :: SRT -> FCode C_SRT
-getSRTInfo (SRTEntries {}) = panic "getSRTInfo"
-
-getSRTInfo (SRT off len bmp)
- | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
- = do { id <- newUnique
- -- ; top_srt <- getSRTLabel
- ; let srt_desc_lbl = mkLargeSRTLabel id
- -- JD: We're not constructing and emitting SRTs in the back end,
- -- which renders this code wrong (it now names a now-non-existent label).
- -- ; emitRODataLits srt_desc_lbl
- -- ( cmmLabelOffW top_srt off
- -- : mkWordCLit (fromIntegral len)
- -- : map mkWordCLit bmp)
- ; return (C_SRT srt_desc_lbl 0 srt_escape) }
-
- | otherwise
- = do { top_srt <- getSRTLabel
- ; return (C_SRT top_srt off (fromIntegral (head bmp))) }
- -- The fromIntegral converts to StgHalfWord
-
-getSRTInfo NoSRT
- = -- TODO: Should we panic in this case?
- -- Someone obviously thinks there should be an SRT
- return NoC_SRT
-
+-- | Returns 'True' if there is a non-empty SRT, or 'False' otherwise
+-- NB. the SRT attached to an StgBind is still used in the new codegen
+-- to decide whether we need a static link field on a static closure
+-- or not.
+getSRTInfo :: SRT -> FCode Bool
+getSRTInfo (SRTEntries vs) = return (not (isEmptyVarSet vs))
+getSRTInfo _ = return False
srt_escape :: StgHalfWord
srt_escape = -1
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 4371bca95e..3c13bb4704 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -185,16 +185,15 @@ Library
CmmOpt
CmmParse
CmmProcPoint
- CmmSpillReload
CmmRewriteAssignments
- CmmStackLayout
CmmType
CmmUtils
+ CmmLayoutStack
MkGraph
OldCmm
+ OldCmmLint
OldCmmUtils
OldPprCmm
- OptimizationFuel
PprBase
PprC
PprCmm
@@ -440,6 +439,7 @@ Library
Pretty
Serialized
State
+ Stream
StringBuffer
UniqFM
UniqSet
@@ -473,6 +473,8 @@ Library
Vectorise.Env
Vectorise.Exp
Vectorise
+ Hoopl.Dataflow
+ Hoopl
Exposed-Modules:
AsmCodeGen
diff --git a/compiler/ghc.mk b/compiler/ghc.mk
index f541841046..1ea6159812 100644
--- a/compiler/ghc.mk
+++ b/compiler/ghc.mk
@@ -355,7 +355,7 @@ ifeq "$(GhcProfiled)" "YES"
# parts of the compiler of interest, and then add further cost centres
# as necessary. Turn on -auto-all for individual modules like this:
-compiler/main/DriverPipeline_HC_OPTS += -auto-all
+# compiler/main/DriverPipeline_HC_OPTS += -auto-all
compiler/main/GhcMake_HC_OPTS += -auto-all
compiler/main/GHC_HC_OPTS += -auto-all
diff --git a/compiler/main/CodeOutput.lhs b/compiler/main/CodeOutput.lhs
index 24906671cd..e92eb4f34c 100644
--- a/compiler/main/CodeOutput.lhs
+++ b/compiler/main/CodeOutput.lhs
@@ -15,22 +15,22 @@ import UniqSupply ( mkSplitUniqSupply )
import Finder ( mkStubPaths )
import PprC ( writeCs )
-import CmmLint ( cmmLint )
+import OldCmmLint ( cmmLint )
import Packages
import OldCmm ( RawCmmGroup )
import HscTypes
import DynFlags
import Config
import SysTools
+import Stream (Stream)
+import qualified Stream
import ErrUtils
import Outputable
import Module
-import Maybes ( firstJusts )
import SrcLoc
import Control.Exception
-import Control.Monad
import System.Directory
import System.FilePath
import System.IO
@@ -48,19 +48,26 @@ codeOutput :: DynFlags
-> ModLocation
-> ForeignStubs
-> [PackageId]
- -> [RawCmmGroup] -- Compiled C--
+ -> Stream IO RawCmmGroup () -- Compiled C--
-> IO (Bool{-stub_h_exists-}, Maybe FilePath{-stub_c_exists-})
-codeOutput dflags this_mod location foreign_stubs pkg_deps flat_abstractC
+codeOutput dflags this_mod location foreign_stubs pkg_deps cmm_stream
=
- do { when (dopt Opt_DoCmmLinting dflags) $ do
+ do {
+ -- Lint each CmmGroup as it goes past
+ ; let linted_cmm_stream =
+ if dopt Opt_DoCmmLinting dflags
+ then Stream.mapM do_lint cmm_stream
+ else cmm_stream
+
+ do_lint cmm = do
{ showPass dflags "CmmLint"
- ; let lints = map (cmmLint (targetPlatform dflags)) flat_abstractC
- ; case firstJusts lints of
+ ; case cmmLint (targetPlatform dflags) cmm of
Just err -> do { log_action dflags dflags SevDump noSrcSpan defaultDumpStyle err
; ghcExit dflags 1
}
Nothing -> return ()
+ ; return cmm
}
; showPass dflags "CodeOutput"
@@ -68,9 +75,9 @@ codeOutput dflags this_mod location foreign_stubs pkg_deps flat_abstractC
; stubs_exist <- outputForeignStubs dflags this_mod location foreign_stubs
; case hscTarget dflags of {
HscInterpreted -> return ();
- HscAsm -> outputAsm dflags filenm flat_abstractC;
- HscC -> outputC dflags filenm flat_abstractC pkg_deps;
- HscLlvm -> outputLlvm dflags filenm flat_abstractC;
+ HscAsm -> outputAsm dflags filenm linted_cmm_stream;
+ HscC -> outputC dflags filenm linted_cmm_stream pkg_deps;
+ HscLlvm -> outputLlvm dflags filenm linted_cmm_stream;
HscNothing -> panic "codeOutput: HscNothing"
}
; return stubs_exist
@@ -90,12 +97,16 @@ doOutput filenm io_action = bracket (openFile filenm WriteMode) hClose io_action
\begin{code}
outputC :: DynFlags
-> FilePath
- -> [RawCmmGroup]
+ -> Stream IO RawCmmGroup ()
-> [PackageId]
-> IO ()
-outputC dflags filenm flat_absC packages
+outputC dflags filenm cmm_stream packages
= do
+ -- ToDo: make the C backend consume the C-- incrementally, by
+ -- pushing the cmm_stream inside (c.f. nativeCodeGen)
+ rawcmms <- Stream.collect cmm_stream
+
-- figure out which header files to #include in the generated .hc file:
--
-- * extra_includes from packages
@@ -117,7 +128,7 @@ outputC dflags filenm flat_absC packages
doOutput filenm $ \ h -> do
hPutStr h ("/* GHC_PACKAGES " ++ unwords pkg_names ++ "\n*/\n")
hPutStr h cc_injects
- writeCs dflags h flat_absC
+ writeCs dflags h rawcmms
\end{code}
@@ -128,14 +139,14 @@ outputC dflags filenm flat_absC packages
%************************************************************************
\begin{code}
-outputAsm :: DynFlags -> FilePath -> [RawCmmGroup] -> IO ()
-outputAsm dflags filenm flat_absC
+outputAsm :: DynFlags -> FilePath -> Stream IO RawCmmGroup () -> IO ()
+outputAsm dflags filenm cmm_stream
| cGhcWithNativeCodeGen == "YES"
= do ncg_uniqs <- mkSplitUniqSupply 'n'
{-# SCC "OutputAsm" #-} doOutput filenm $
\f -> {-# SCC "NativeCodeGen" #-}
- nativeCodeGen dflags f ncg_uniqs flat_absC
+ nativeCodeGen dflags f ncg_uniqs cmm_stream
| otherwise
= panic "This compiler was built without a native code generator"
@@ -149,12 +160,17 @@ outputAsm dflags filenm flat_absC
%************************************************************************
\begin{code}
-outputLlvm :: DynFlags -> FilePath -> [RawCmmGroup] -> IO ()
-outputLlvm dflags filenm flat_absC
+outputLlvm :: DynFlags -> FilePath -> Stream IO RawCmmGroup () -> IO ()
+outputLlvm dflags filenm cmm_stream
= do ncg_uniqs <- mkSplitUniqSupply 'n'
+
+ -- ToDo: make the LLVM backend consume the C-- incrementally,
+ -- by pushing the cmm_stream inside (c.f. nativeCodeGen)
+ rawcmms <- Stream.collect cmm_stream
+
{-# SCC "llvm_output" #-} doOutput filenm $
\f -> {-# SCC "llvm_CodeGen" #-}
- llvmCodeGen dflags f ncg_uniqs flat_absC
+ llvmCodeGen dflags f ncg_uniqs rawcmms
\end{code}
@@ -240,4 +256,3 @@ outputForeignStubs_help fname doc_str header footer
= do writeFile fname (header ++ doc_str ++ '\n':footer ++ "\n")
return True
\end{code}
-
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 53aa39f04e..60b6e82bb7 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -164,9 +164,9 @@ data DynFlag
= Opt_D_dump_cmm
| Opt_D_dump_raw_cmm
| Opt_D_dump_cmmz
- | Opt_D_dump_cmmz_pretty
-- All of the cmmz subflags (there are a lot!) Automatically
-- enabled if you run -ddump-cmmz
+ | Opt_D_dump_cmmz_cfg
| Opt_D_dump_cmmz_cbe
| Opt_D_dump_cmmz_proc
| Opt_D_dump_cmmz_spills
@@ -1675,7 +1675,7 @@ dynamic_flags = [
, Flag "ddump-cmm" (setDumpFlag Opt_D_dump_cmm)
, Flag "ddump-raw-cmm" (setDumpFlag Opt_D_dump_raw_cmm)
, Flag "ddump-cmmz" (setDumpFlag Opt_D_dump_cmmz)
- , Flag "ddump-cmmz-pretty" (setDumpFlag Opt_D_dump_cmmz_pretty)
+ , Flag "ddump-cmmz-cfg" (setDumpFlag Opt_D_dump_cmmz_cbe)
, Flag "ddump-cmmz-cbe" (setDumpFlag Opt_D_dump_cmmz_cbe)
, Flag "ddump-cmmz-spills" (setDumpFlag Opt_D_dump_cmmz_spills)
, Flag "ddump-cmmz-proc" (setDumpFlag Opt_D_dump_cmmz_proc)
diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs
index 562332d52a..0b03e83029 100644
--- a/compiler/main/HscMain.hs
+++ b/compiler/main/HscMain.hs
@@ -119,13 +119,12 @@ import TyCon
import Name
import SimplStg ( stg2stg )
import CodeGen ( codeGen )
-import OldCmm as Old ( CmmGroup )
-import PprCmm ( pprCmms )
+import qualified OldCmm as Old
+import qualified Cmm as New
import CmmParse ( parseCmmFile )
import CmmBuildInfoTables
import CmmPipeline
import CmmInfo
-import OptimizationFuel ( initOptFuelState )
import CmmCvt
import CodeOutput
import NameEnv ( emptyNameEnv )
@@ -147,6 +146,9 @@ import UniqFM ( emptyUFM )
import UniqSupply ( initUs_ )
import Bag
import Exception
+import qualified Stream
+import Stream (Stream)
+
import Util
import Data.List
@@ -172,7 +174,6 @@ newHscEnv dflags = do
nc_var <- newIORef (initNameCache us knownKeyNames)
fc_var <- newIORef emptyUFM
mlc_var <- newIORef emptyModuleEnv
- optFuel <- initOptFuelState
return HscEnv { hsc_dflags = dflags,
hsc_targets = [],
hsc_mod_graph = [],
@@ -182,7 +183,6 @@ newHscEnv dflags = do
hsc_NC = nc_var,
hsc_FC = fc_var,
hsc_MLC = mlc_var,
- hsc_OptFuel = optFuel,
hsc_type_env_var = Nothing }
@@ -1276,20 +1276,27 @@ hscGenHardCode cgguts mod_summary = do
cost_centre_info
stg_binds hpc_info
else {-# SCC "CodeGen" #-}
- codeGen dflags this_mod data_tycons
- cost_centre_info
- stg_binds hpc_info
+ return (codeGen dflags this_mod data_tycons
+ cost_centre_info
+ stg_binds hpc_info)
+
------------------ Code output -----------------------
- rawcmms <- {-# SCC "cmmToRawCmm" #-}
+ rawcmms0 <- {-# SCC "cmmToRawCmm" #-}
cmmToRawCmm platform cmms
- dumpIfSet_dyn dflags Opt_D_dump_raw_cmm "Raw Cmm" (ppr rawcmms)
+
+ let dump a = do dumpIfSet_dyn dflags Opt_D_dump_raw_cmm "Raw Cmm"
+ (ppr a)
+ return a
+ rawcmms1 = Stream.mapM dump rawcmms0
+
(_stub_h_exists, stub_c_exists)
<- {-# SCC "codeOutput" #-}
codeOutput dflags this_mod location foreign_stubs
- dependencies rawcmms
+ dependencies rawcmms1
return stub_c_exists
+
hscInteractive :: (ModIface, ModDetails, CgGuts)
-> ModSummary
-> Hsc (InteractiveStatus, ModIface, ModDetails)
@@ -1335,7 +1342,7 @@ hscCompileCmmFile hsc_env filename = runHsc hsc_env $ do
let dflags = hsc_dflags hsc_env
cmm <- ioMsgMaybe $ parseCmmFile dflags filename
liftIO $ do
- rawCmms <- cmmToRawCmm (targetPlatform dflags) [cmm]
+ rawCmms <- cmmToRawCmm (targetPlatform dflags) (Stream.yield cmm)
_ <- codeOutput dflags no_mod no_loc NoStubs [] rawCmms
return ()
where
@@ -1350,24 +1357,52 @@ tryNewCodeGen :: HscEnv -> Module -> [TyCon]
-> CollectedCCs
-> [(StgBinding,[(Id,[Id])])]
-> HpcInfo
- -> IO [Old.CmmGroup]
+ -> IO (Stream IO Old.CmmGroup ())
+ -- Note we produce a 'Stream' of CmmGroups, so that the
+ -- backend can be run incrementally. Otherwise it generates all
+ -- the C-- up front, which has a significant space cost.
tryNewCodeGen hsc_env this_mod data_tycons
cost_centre_info stg_binds hpc_info = do
let dflags = hsc_dflags hsc_env
- prog <- StgCmm.codeGen dflags this_mod data_tycons
+
+ let cmm_stream :: Stream IO New.CmmGroup ()
+ cmm_stream = {-# SCC "StgCmm" #-}
+ StgCmm.codeGen dflags this_mod data_tycons
cost_centre_info stg_binds hpc_info
- dumpIfSet_dyn dflags Opt_D_dump_cmmz "Cmm produced by new codegen"
- (pprCmms prog)
+
+ -- codegen consumes a stream of CmmGroup, and produces a new
+ -- stream of CmmGroup (not necessarily synchronised: one
+ -- CmmGroup on input may produce many CmmGroups on output due
+ -- to proc-point splitting).
+
+ let dump1 a = do dumpIfSet_dyn dflags Opt_D_dump_cmmz
+ "Cmm produced by new codegen" (ppr a)
+ return a
+
+ ppr_stream1 = Stream.mapM dump1 cmm_stream
-- We are building a single SRT for the entire module, so
-- we must thread it through all the procedures as we cps-convert them.
us <- mkSplitUniqSupply 'S'
let initTopSRT = initUs_ us emptySRT
- (topSRT, prog) <- foldM (cmmPipeline hsc_env) (initTopSRT, []) prog
- let prog' = map cmmOfZgraph (srtToData topSRT : prog)
- dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" (ppr prog')
- return prog'
+ let run_pipeline topSRT cmmgroup = do
+ (topSRT, cmmgroup) <- cmmPipeline hsc_env topSRT cmmgroup
+ return (topSRT,cmmOfZgraph cmmgroup)
+
+ let pipeline_stream = {-# SCC "cmmPipeline" #-} do
+ topSRT <- Stream.mapAccumL run_pipeline initTopSRT ppr_stream1
+ Stream.yield (cmmOfZgraph (srtToData topSRT))
+
+ let
+ dump2 a = do dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" $ ppr a
+ return a
+
+ ppr_stream2 = Stream.mapM dump2 pipeline_stream
+
+ return ppr_stream2
+
+
myCoreToStg :: DynFlags -> Module -> CoreProgram
-> IO ( [(StgBinding,[(Id,[Id])])] -- output program
diff --git a/compiler/main/HscTypes.lhs b/compiler/main/HscTypes.lhs
index 156f081d3e..adaa9a3171 100644
--- a/compiler/main/HscTypes.lhs
+++ b/compiler/main/HscTypes.lhs
@@ -142,7 +142,6 @@ import Packages hiding ( Version(..) )
import DynFlags
import DriverPhases
import BasicTypes
-import OptimizationFuel ( OptFuelState )
import IfaceSyn
import CoreSyn ( CoreRule, CoreVect )
import Maybes
@@ -318,11 +317,6 @@ data HscEnv
-- ^ This caches the location of modules, so we don't have to
-- search the filesystem multiple times. See also 'hsc_FC'.
- hsc_OptFuel :: OptFuelState,
- -- ^ Settings to control the use of \"optimization fuel\":
- -- by limiting the number of transformations,
- -- we can use binary search to help find compiler bugs.
-
hsc_type_env_var :: Maybe (Module, IORef TypeEnv)
-- ^ Used for one-shot compilation only, to initialise
-- the 'IfGblEnv'. See 'TcRnTypes.tcg_type_env_var' for
diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs
index 51adf46005..4b49fe304e 100644
--- a/compiler/nativeGen/AsmCodeGen.lhs
+++ b/compiler/nativeGen/AsmCodeGen.lhs
@@ -71,6 +71,8 @@ import FastString
import UniqSet
import ErrUtils
import Module
+import Stream (Stream)
+import qualified Stream
-- DEBUGGING ONLY
--import OrdList
@@ -147,7 +149,7 @@ data NcgImpl statics instr jumpDest = NcgImpl {
}
--------------------
-nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmmGroup] -> IO ()
+nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO ()
nativeCodeGen dflags h us cmms
= let platform = targetPlatform dflags
nCG' :: (Outputable statics, Outputable instr, Instruction instr) => NcgImpl statics instr jumpDest -> IO ()
@@ -209,16 +211,16 @@ nativeCodeGen dflags h us cmms
nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
=> DynFlags
-> NcgImpl statics instr jumpDest
- -> Handle -> UniqSupply -> [RawCmmGroup] -> IO ()
+ -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO ()
nativeCodeGen' dflags ncgImpl h us cmms
= do
let platform = targetPlatform dflags
- split_cmms = concat $ map add_split cmms
+ split_cmms = Stream.map add_split cmms
-- BufHandle is a performance hack. We could hide it inside
-- Pretty if it weren't for the fact that we do lots of little
-- printDocs here (in order to do codegen in constant space).
bufh <- newBufHandle h
- (imports, prof) <- cmmNativeGens dflags ncgImpl bufh us split_cmms [] [] 0
+ (imports, prof) <- cmmNativeGenStream dflags ncgImpl bufh us split_cmms [] [] 0
bFlush bufh
let (native, colorStats, linearStats)
@@ -272,6 +274,34 @@ nativeCodeGen' dflags ncgImpl h us cmms
split_marker = CmmProc Nothing mkSplitMarkerLabel (ListGraph [])
+cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
+ => DynFlags
+ -> NcgImpl statics instr jumpDest
+ -> BufHandle
+ -> UniqSupply
+ -> Stream IO RawCmmGroup ()
+ -> [[CLabel]]
+ -> [ ([NatCmmDecl statics instr],
+ Maybe [Color.RegAllocStats statics instr],
+ Maybe [Linear.RegAllocStats]) ]
+ -> Int
+ -> IO ( [[CLabel]],
+ [([NatCmmDecl statics instr],
+ Maybe [Color.RegAllocStats statics instr],
+ Maybe [Linear.RegAllocStats])] )
+
+cmmNativeGenStream dflags ncgImpl h us cmm_stream impAcc profAcc count
+ = do
+ r <- Stream.runStream cmm_stream
+ case r of
+ Left () -> return (reverse impAcc, reverse profAcc)
+ Right (cmms, cmm_stream') -> do
+ (impAcc,profAcc,us') <- cmmNativeGens dflags ncgImpl h us cmms
+ impAcc profAcc count
+ cmmNativeGenStream dflags ncgImpl h us' cmm_stream'
+ impAcc profAcc count
+
+
-- | Do native code generation on all these cmms.
--
cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
@@ -287,11 +317,12 @@ cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
-> Int
-> IO ( [[CLabel]],
[([NatCmmDecl statics instr],
- Maybe [Color.RegAllocStats statics instr],
- Maybe [Linear.RegAllocStats])] )
+ Maybe [Color.RegAllocStats statics instr],
+ Maybe [Linear.RegAllocStats])],
+ UniqSupply )
-cmmNativeGens _ _ _ _ [] impAcc profAcc _
- = return (reverse impAcc, reverse profAcc)
+cmmNativeGens _ _ _ us [] impAcc profAcc _
+ = return (impAcc,profAcc,us)
cmmNativeGens dflags ncgImpl h us (cmm : cmms) impAcc profAcc count
= do
@@ -817,7 +848,11 @@ Ideas for other things we could do (put these in Hoopl please!):
cmmToCmm :: DynFlags -> RawCmmDecl -> (RawCmmDecl, [CLabel])
cmmToCmm _ top@(CmmData _ _) = (top, [])
cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do
- blocks' <- mapM cmmBlockConFold (cmmMiniInline dflags (cmmEliminateDeadBlocks blocks))
+ let reachable_blocks | dopt Opt_TryNewCodeGen dflags = blocks
+ | otherwise = cmmEliminateDeadBlocks blocks
+ -- The new codegen path has already eliminated unreachable blocks by now
+
+ blocks' <- mapM cmmBlockConFold (cmmMiniInline dflags reachable_blocks)
return $ CmmProc info lbl (ListGraph blocks')
newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
@@ -911,7 +946,8 @@ cmmExprConFold referenceKind expr = do
dflags <- getDynFlags
-- Skip constant folding if new code generator is running
-- (this optimization is done in Hoopl)
- let expr' = if dopt Opt_TryNewCodeGen dflags
+ -- SDM: re-enabled for now, while cmmRewriteAssignments is turned off
+ let expr' = if False -- dopt Opt_TryNewCodeGen dflags
then expr
else cmmExprCon (targetPlatform dflags) expr
cmmExprNative referenceKind expr'
diff --git a/compiler/simplStg/SimplStg.lhs b/compiler/simplStg/SimplStg.lhs
index d5024ab2e0..635df3ce41 100644
--- a/compiler/simplStg/SimplStg.lhs
+++ b/compiler/simplStg/SimplStg.lhs
@@ -55,7 +55,10 @@ stg2stg dflags module_name binds
; (processed_binds, _, cost_centres)
<- foldl_mn do_stg_pass (binds', us0, ccs) (getStgToDo dflags)
- ; let srt_binds = computeSRTs (unarise us1 processed_binds)
+ ; let un_binds = unarise us1 processed_binds
+ ; let srt_binds
+ | dopt Opt_TryNewCodeGen dflags = zip un_binds (repeat [])
+ | otherwise = computeSRTs un_binds
; dumpIfSet_dyn dflags Opt_D_dump_stg "STG syntax:"
(pprStgBindingsWithSRTs srt_binds)
diff --git a/compiler/utils/OrdList.lhs b/compiler/utils/OrdList.lhs
index a56cdf3f58..7e3b24a5da 100644
--- a/compiler/utils/OrdList.lhs
+++ b/compiler/utils/OrdList.lhs
@@ -27,11 +27,14 @@ infixl 5 `snocOL`
infixr 5 `consOL`
data OrdList a
- = Many [a] -- Invariant: non-empty
+ = None
+ | One a
+ | Many [a] -- Invariant: non-empty
+ | Cons a (OrdList a)
+ | Snoc (OrdList a) a
| Two (OrdList a) -- Invariant: non-empty
(OrdList a) -- Invariant: non-empty
- | One a
- | None
+
nilOL :: OrdList a
isNilOL :: OrdList a -> Bool
@@ -44,22 +47,33 @@ concatOL :: [OrdList a] -> OrdList a
nilOL = None
unitOL as = One as
-snocOL None b = One b
-snocOL as b = Two as (One b)
-consOL a None = One a
-consOL a bs = Two (One a) bs
+snocOL as b = Snoc as b
+consOL a bs = Cons a bs
concatOL aas = foldr appOL None aas
isNilOL None = True
isNilOL _ = False
-appOL None bs = bs
-appOL as None = as
-appOL as bs = Two as bs
+None `appOL` b = b
+a `appOL` None = a
+One a `appOL` b = Cons a b
+a `appOL` One b = Snoc a b
+a `appOL` b = Two a b
+
+fromOL :: OrdList a -> [a]
+fromOL a = go a []
+ where go None acc = acc
+ go (One a) acc = a : acc
+ go (Cons a b) acc = a : go b acc
+ go (Snoc a b) acc = go a (b:acc)
+ go (Two a b) acc = go a (go b acc)
+ go (Many xs) acc = xs ++ acc
mapOL :: (a -> b) -> OrdList a -> OrdList b
mapOL _ None = None
mapOL f (One x) = One (f x)
+mapOL f (Cons x xs) = Cons (f x) (mapOL f xs)
+mapOL f (Snoc xs x) = Snoc (mapOL f xs) (f x)
mapOL f (Two x y) = Two (mapOL f x) (mapOL f y)
mapOL f (Many xs) = Many (map f xs)
@@ -69,24 +83,19 @@ instance Functor OrdList where
foldrOL :: (a->b->b) -> b -> OrdList a -> b
foldrOL _ z None = z
foldrOL k z (One x) = k x z
+foldrOL k z (Cons x xs) = k x (foldrOL k z xs)
+foldrOL k z (Snoc xs x) = foldrOL k (k x z) xs
foldrOL k z (Two b1 b2) = foldrOL k (foldrOL k z b2) b1
foldrOL k z (Many xs) = foldr k z xs
foldlOL :: (b->a->b) -> b -> OrdList a -> b
foldlOL _ z None = z
foldlOL k z (One x) = k z x
+foldlOL k z (Cons x xs) = foldlOL k (k z x) xs
+foldlOL k z (Snoc xs x) = k (foldlOL k z xs) x
foldlOL k z (Two b1 b2) = foldlOL k (foldlOL k z b1) b2
foldlOL k z (Many xs) = foldl k z xs
-fromOL :: OrdList a -> [a]
-fromOL ol
- = flat ol []
- where
- flat None rest = rest
- flat (One x) rest = x:rest
- flat (Two a b) rest = flat a (flat b rest)
- flat (Many xs) rest = xs ++ rest
-
toOL :: [a] -> OrdList a
toOL [] = None
toOL xs = Many xs
diff --git a/compiler/utils/Stream.hs b/compiler/utils/Stream.hs
new file mode 100644
index 0000000000..2fa76d2345
--- /dev/null
+++ b/compiler/utils/Stream.hs
@@ -0,0 +1,97 @@
+-- -----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 2012
+--
+-- Monadic streams
+--
+-- -----------------------------------------------------------------------------
+
+module Stream (
+ Stream(..), yield, liftIO,
+ collect, fromList,
+ Stream.map, Stream.mapM, Stream.mapAccumL
+ ) where
+
+-- |
+-- @Stream m a b@ is a computation in some Monad @m@ that delivers a sequence
+-- of elements of type @a@ followed by a result of type @b@.
+--
+-- More concretely, a value of type @Stream m a b@ can be run using @runStream@
+-- in the Monad @m@, and it delivers either
+--
+-- * the final result: @Left b@, or
+-- * @Right (a,str)@, where @a@ is the next element in the stream, and @str@
+-- is a computation to get the rest of the stream.
+--
+-- Stream is itself a Monad, and provides an operation 'yield' that
+-- produces a new element of the stream. This makes it convenient to turn
+-- existing monadic computations into streams.
+--
+-- The idea is that Stream is useful for making a monadic computation
+-- that produces values from time to time. This can be used for
+-- knitting together two complex monadic operations, so that the
+-- producer does not have to produce all its values before the
+-- consumer starts consuming them. We make the producer into a
+-- Stream, and the consumer pulls on the stream each time it wants a
+-- new value.
+--
+newtype Stream m a b = Stream { runStream :: m (Either b (a, Stream m a b)) }
+
+instance Monad m => Monad (Stream m a) where
+ return a = Stream (return (Left a))
+
+ Stream m >>= k = Stream $ do
+ r <- m
+ case r of
+ Left b -> runStream (k b)
+ Right (a,str) -> return (Right (a, str >>= k))
+
+yield :: Monad m => a -> Stream m a ()
+yield a = Stream (return (Right (a, return ())))
+
+liftIO :: IO a -> Stream IO b a
+liftIO io = Stream $ io >>= return . Left
+
+-- | Turn a Stream into an ordinary list, by demanding all the elements.
+collect :: Monad m => Stream m a () -> m [a]
+collect str = go str []
+ where
+ go str acc = do
+ r <- runStream str
+ case r of
+ Left () -> return (reverse acc)
+ Right (a, str') -> go str' (a:acc)
+
+-- | Turn a list into a 'Stream', by yielding each element in turn.
+fromList :: Monad m => [a] -> Stream m a ()
+fromList = mapM_ yield
+
+-- | Apply a function to each element of a 'Stream', lazilly
+map :: Monad m => (a -> b) -> Stream m a x -> Stream m b x
+map f str = Stream $ do
+ r <- runStream str
+ case r of
+ Left x -> return (Left x)
+ Right (a, str') -> return (Right (f a, Stream.map f str'))
+
+-- | Apply a monadic operation to each element of a 'Stream', lazilly
+mapM :: Monad m => (a -> m b) -> Stream m a x -> Stream m b x
+mapM f str = Stream $ do
+ r <- runStream str
+ case r of
+ Left x -> return (Left x)
+ Right (a, str') -> do
+ b <- f a
+ return (Right (b, Stream.mapM f str'))
+
+-- | analog of the list-based 'mapAccumL' on Streams. This is a simple
+-- way to map over a Stream while carrying some state around.
+mapAccumL :: Monad m => (c -> a -> m (c,b)) -> c -> Stream m a ()
+ -> Stream m b c
+mapAccumL f c str = Stream $ do
+ r <- runStream str
+ case r of
+ Left () -> return (Left c)
+ Right (a, str') -> do
+ (c',b) <- f c a
+ return (Right (b, mapAccumL f c' str'))