summaryrefslogtreecommitdiff
path: root/compiler/cmm
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/cmm')
-rw-r--r--compiler/cmm/CLabel.hs94
-rw-r--r--compiler/cmm/Cmm.hs21
-rw-r--r--compiler/cmm/CmmCPS.hs29
-rw-r--r--compiler/cmm/CmmExpr.hs6
-rw-r--r--compiler/cmm/CmmLint.hs16
-rw-r--r--compiler/cmm/CmmLive.hs4
-rw-r--r--compiler/cmm/CmmNode.hs2
-rw-r--r--compiler/cmm/CmmOpt.hs80
-rw-r--r--compiler/cmm/CmmParse.y28
-rw-r--r--compiler/cmm/CmmProcPoint.hs2
-rw-r--r--compiler/cmm/CmmSpillReload.hs10
-rw-r--r--compiler/cmm/CmmStackLayout.hs154
-rw-r--r--compiler/cmm/MkGraph.hs2
-rw-r--r--compiler/cmm/OptimizationFuel.hs29
-rw-r--r--compiler/cmm/PprC.hs41
-rw-r--r--compiler/cmm/cmm-notes29
16 files changed, 329 insertions, 218 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs
index 4d9596197e..3451c7d5a9 100644
--- a/compiler/cmm/CLabel.hs
+++ b/compiler/cmm/CLabel.hs
@@ -51,9 +51,7 @@ module CLabel (
mkAsmTempLabel,
- mkModuleInitLabel,
- mkPlainModuleInitLabel,
- mkModuleInitTableLabel,
+ mkPlainModuleInitLabel,
mkSplitMarkerLabel,
mkDirty_MUT_VAR_Label,
@@ -70,10 +68,7 @@ module CLabel (
mkRtsPrimOpLabel,
mkRtsSlowTickyCtrLabel,
- moduleRegdLabel,
- moduleRegTableLabel,
-
- mkSelectorInfoLabel,
+ mkSelectorInfoLabel,
mkSelectorEntryLabel,
mkCmmInfoLabel,
@@ -102,7 +97,6 @@ module CLabel (
mkDeadStripPreventer,
mkHpcTicksLabel,
- mkHpcModuleNameLabel,
hasCAF,
infoLblToEntryLbl, entryLblToInfoLbl, cvtToClosureLbl, cvtToSRTLbl,
@@ -202,23 +196,9 @@ data CLabel
| StringLitLabel
{-# UNPACK #-} !Unique
- | ModuleInitLabel
- Module -- the module name
- String -- its "way"
- -- at some point we might want some kind of version number in
- -- the module init label, to guard against compiling modules in
- -- the wrong order. We can't use the interface file version however,
- -- because we don't always recompile modules which depend on a module
- -- whose version has changed.
-
- | PlainModuleInitLabel -- without the version & way info
+ | PlainModuleInitLabel -- without the version & way info
Module
- | ModuleInitTableLabel -- table of imported modules to init
- Module
-
- | ModuleRegdLabel
-
| CC_Label CostCentre
| CCS_Label CostCentreStack
@@ -242,9 +222,6 @@ data CLabel
-- | Per-module table of tick locations
| HpcTicksLabel Module
- -- | Per-module name of the module for Hpc
- | HpcModuleNameLabel
-
-- | Label of an StgLargeSRT
| LargeSRTLabel
{-# UNPACK #-} !Unique
@@ -490,7 +467,6 @@ mkRtsSlowTickyCtrLabel pat = RtsLabel (RtsSlowTickyCtr pat)
-- Constructing Code Coverage Labels
mkHpcTicksLabel = HpcTicksLabel
-mkHpcModuleNameLabel = HpcModuleNameLabel
-- Constructing labels used for dynamic linking
@@ -515,19 +491,9 @@ mkStringLitLabel = StringLitLabel
mkAsmTempLabel :: Uniquable a => a -> CLabel
mkAsmTempLabel a = AsmTempLabel (getUnique a)
-mkModuleInitLabel :: Module -> String -> CLabel
-mkModuleInitLabel mod way = ModuleInitLabel mod way
-
mkPlainModuleInitLabel :: Module -> CLabel
mkPlainModuleInitLabel mod = PlainModuleInitLabel mod
-mkModuleInitTableLabel :: Module -> CLabel
-mkModuleInitTableLabel mod = ModuleInitTableLabel mod
-
-moduleRegdLabel = ModuleRegdLabel
-moduleRegTableLabel = ModuleInitTableLabel
-
-
-- -----------------------------------------------------------------------------
-- Converting between info labels and entry/ret labels.
@@ -591,10 +557,7 @@ needsCDecl (LargeSRTLabel _) = False
needsCDecl (LargeBitmapLabel _) = False
needsCDecl (IdLabel _ _ _) = True
needsCDecl (CaseLabel _ _) = True
-needsCDecl (ModuleInitLabel _ _) = True
-needsCDecl (PlainModuleInitLabel _) = True
-needsCDecl (ModuleInitTableLabel _) = True
-needsCDecl ModuleRegdLabel = False
+needsCDecl (PlainModuleInitLabel _) = True
needsCDecl (StringLitLabel _) = False
needsCDecl (AsmTempLabel _) = False
@@ -612,7 +575,6 @@ needsCDecl l@(ForeignLabel{}) = not (isMathFun l)
needsCDecl (CC_Label _) = True
needsCDecl (CCS_Label _) = True
needsCDecl (HpcTicksLabel _) = True
-needsCDecl HpcModuleNameLabel = False
-- | Check whether a label is a local temporary for native code generation
@@ -630,7 +592,7 @@ maybeAsmTemp _ = Nothing
-- | Check whether a label corresponds to a C function that has
-- a prototype in a system header somehere, or is built-in
--- to the C compiler. For these labels we abovoid generating our
+-- to the C compiler. For these labels we avoid generating our
-- own C prototypes.
isMathFun :: CLabel -> Bool
isMathFun (ForeignLabel fs _ _ _) = fs `elementOfUniqSet` math_funs
@@ -725,11 +687,8 @@ externallyVisibleCLabel :: CLabel -> Bool -- not C "static"
externallyVisibleCLabel (CaseLabel _ _) = False
externallyVisibleCLabel (StringLitLabel _) = False
externallyVisibleCLabel (AsmTempLabel _) = False
-externallyVisibleCLabel (ModuleInitLabel _ _) = True
externallyVisibleCLabel (PlainModuleInitLabel _)= True
-externallyVisibleCLabel (ModuleInitTableLabel _)= False
-externallyVisibleCLabel ModuleRegdLabel = False
-externallyVisibleCLabel (RtsLabel _) = True
+externallyVisibleCLabel (RtsLabel _) = True
externallyVisibleCLabel (CmmLabel _ _ _) = True
externallyVisibleCLabel (ForeignLabel{}) = True
externallyVisibleCLabel (IdLabel name _ _) = isExternalName name
@@ -737,8 +696,7 @@ externallyVisibleCLabel (CC_Label _) = True
externallyVisibleCLabel (CCS_Label _) = True
externallyVisibleCLabel (DynamicLinkerLabel _ _) = False
externallyVisibleCLabel (HpcTicksLabel _) = True
-externallyVisibleCLabel HpcModuleNameLabel = False
-externallyVisibleCLabel (LargeBitmapLabel _) = False
+externallyVisibleCLabel (LargeBitmapLabel _) = False
externallyVisibleCLabel (LargeSRTLabel _) = False
-- -----------------------------------------------------------------------------
@@ -777,9 +735,7 @@ labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel
labelType (RtsLabel (RtsApFast _)) = CodeLabel
labelType (CaseLabel _ CaseReturnInfo) = DataLabel
labelType (CaseLabel _ _) = CodeLabel
-labelType (ModuleInitLabel _ _) = CodeLabel
labelType (PlainModuleInitLabel _) = CodeLabel
-labelType (ModuleInitTableLabel _) = DataLabel
labelType (LargeSRTLabel _) = DataLabel
labelType (LargeBitmapLabel _) = DataLabel
labelType (ForeignLabel _ _ _ IsFunction) = CodeLabel
@@ -837,10 +793,8 @@ labelDynamic this_pkg lbl =
CmmLabel pkg _ _ -> True
#endif
- ModuleInitLabel m _ -> not opt_Static && this_pkg /= (modulePackageId m)
PlainModuleInitLabel m -> not opt_Static && this_pkg /= (modulePackageId m)
- ModuleInitTableLabel m -> not opt_Static && this_pkg /= (modulePackageId m)
-
+
-- Note that DynamicLinkerLabels do NOT require dynamic linking themselves.
_ -> False
@@ -896,8 +850,8 @@ instance Outputable CLabel where
pprCLabel :: CLabel -> SDoc
-#if ! OMIT_NATIVE_CODEGEN
pprCLabel (AsmTempLabel u)
+ | cGhcWithNativeCodeGen == "YES"
= getPprStyle $ \ sty ->
if asmStyle sty then
ptext asmTempLabelPrefix <> pprUnique u
@@ -905,23 +859,22 @@ pprCLabel (AsmTempLabel u)
char '_' <> pprUnique u
pprCLabel (DynamicLinkerLabel info lbl)
+ | cGhcWithNativeCodeGen == "YES"
= pprDynamicLinkerAsmLabel info lbl
pprCLabel PicBaseLabel
+ | cGhcWithNativeCodeGen == "YES"
= ptext (sLit "1b")
pprCLabel (DeadStripPreventer lbl)
+ | cGhcWithNativeCodeGen == "YES"
= pprCLabel lbl <> ptext (sLit "_dsp")
-#endif
-pprCLabel lbl =
-#if ! OMIT_NATIVE_CODEGEN
- getPprStyle $ \ sty ->
- if asmStyle sty then
- maybe_underscore (pprAsmCLbl lbl)
- else
-#endif
- pprCLbl lbl
+pprCLabel lbl
+ = getPprStyle $ \ sty ->
+ if cGhcWithNativeCodeGen == "YES" && asmStyle sty
+ then maybe_underscore (pprAsmCLbl lbl)
+ else pprCLbl lbl
maybe_underscore doc
| underscorePrefix = pp_cSEP <> doc
@@ -1008,9 +961,6 @@ pprCLbl (RtsLabel (RtsPrimOp primop))
pprCLbl (RtsLabel (RtsSlowTickyCtr pat))
= ptext (sLit "SLOW_CALL_") <> text pat <> ptext (sLit "_ctr")
-pprCLbl ModuleRegdLabel
- = ptext (sLit "_module_registered")
-
pprCLbl (ForeignLabel str _ _ _)
= ftext str
@@ -1019,22 +969,12 @@ pprCLbl (IdLabel name cafs flavor) = ppr name <> ppIdFlavor flavor
pprCLbl (CC_Label cc) = ppr cc
pprCLbl (CCS_Label ccs) = ppr ccs
-pprCLbl (ModuleInitLabel mod way)
- = ptext (sLit "__stginit_") <> ppr mod
- <> char '_' <> text way
-
pprCLbl (PlainModuleInitLabel mod)
= ptext (sLit "__stginit_") <> ppr mod
-pprCLbl (ModuleInitTableLabel mod)
- = ptext (sLit "__stginittable_") <> ppr mod
-
pprCLbl (HpcTicksLabel mod)
= ptext (sLit "_hpc_tickboxes_") <> ppr mod <> ptext (sLit "_hpc")
-pprCLbl HpcModuleNameLabel
- = ptext (sLit "_hpc_module_name_str")
-
ppIdFlavor :: IdLabelInfo -> SDoc
ppIdFlavor x = pp_cSEP <>
(case x of
diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs
index 2e9f952f7b..54b4b11662 100644
--- a/compiler/cmm/Cmm.hs
+++ b/compiler/cmm/Cmm.hs
@@ -9,10 +9,11 @@
#endif
module Cmm
- ( CmmGraph(..), CmmBlock
+ ( CmmGraph, GenCmmGraph(..), CmmBlock
, CmmStackInfo(..), CmmTopInfo(..), Cmm, CmmTop
, CmmReplGraph, CmmFwdRewrite, CmmBwdRewrite
+ , modifyGraph
, lastNode, replaceLastNode, insertBetween
, ofBlockMap, toBlockMap, insertBlock
, ofBlockList, toBlockList, bodyToBlockList
@@ -41,7 +42,8 @@ import Panic
-------------------------------------------------
-- CmmBlock, CmmGraph and Cmm
-data CmmGraph = CmmGraph { g_entry :: BlockId, g_graph :: Graph CmmNode C C }
+type CmmGraph = GenCmmGraph CmmNode
+data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C }
type CmmBlock = Block CmmNode C C
type CmmReplGraph e x = FuelUniqSM (Maybe (Graph CmmNode e x))
@@ -56,6 +58,9 @@ type CmmTop = GenCmmTop CmmStatic CmmTopInfo CmmGraph
-------------------------------------------------
-- Manipulating CmmGraphs
+modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n'
+modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)}
+
toBlockMap :: CmmGraph -> LabelMap CmmBlock
toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body
@@ -150,26 +155,26 @@ insertBetween b ms succId = insert $ lastNode b
-- Running dataflow analysis and/or rewrites
-- Constructing forward and backward analysis-only pass
-analFwd :: Monad m => DataflowLattice f -> FwdTransfer CmmNode f -> FwdPass m CmmNode f
-analBwd :: Monad m => DataflowLattice f -> BwdTransfer CmmNode f -> BwdPass m CmmNode f
+analFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdPass m n f
+analBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdPass m n f
analFwd lat xfer = analRewFwd lat xfer noFwdRewrite
analBwd lat xfer = analRewBwd lat xfer noBwdRewrite
-- Constructing forward and backward analysis + rewrite pass
-analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer CmmNode f -> FwdRewrite m CmmNode f -> FwdPass m CmmNode f
-analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer CmmNode f -> BwdRewrite m CmmNode f -> BwdPass m CmmNode f
+analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdRewrite m n f -> FwdPass m n f
+analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdRewrite m n f -> BwdPass m n f
analRewFwd lat xfer rew = FwdPass {fp_lattice = lat, fp_transfer = xfer, fp_rewrite = rew}
analRewBwd lat xfer rew = BwdPass {bp_lattice = lat, bp_transfer = xfer, bp_rewrite = rew}
-- Running forward and backward dataflow analysis + optional rewrite
-dataflowPassFwd :: CmmGraph -> [(BlockId, f)] -> FwdPass FuelUniqSM CmmNode f -> FuelUniqSM (CmmGraph, BlockEnv f)
+dataflowPassFwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> FwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
dataflowPassFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do
(graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)
return (CmmGraph {g_entry=entry, g_graph=graph}, facts)
-dataflowPassBwd :: CmmGraph -> [(BlockId, f)] -> BwdPass FuelUniqSM CmmNode f -> FuelUniqSM (CmmGraph, BlockEnv f)
+dataflowPassBwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> BwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
dataflowPassBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = do
(graph, facts, NothingO) <- analyzeAndRewriteBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts)
return (CmmGraph {g_entry=entry, g_graph=graph}, facts)
diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmCPS.hs
index 372562cfca..aad00371a1 100644
--- a/compiler/cmm/CmmCPS.hs
+++ b/compiler/cmm/CmmCPS.hs
@@ -71,10 +71,10 @@ cpsTop _ p@(CmmData {}) = return ([], [(Map.empty, p)])
cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) =
do
-- Why bother doing it this early?
- -- g <- dual_rewrite Opt_D_dump_cmmz "spills and reloads"
+ -- g <- dual_rewrite run Opt_D_dump_cmmz "spills and reloads"
-- (dualLivenessWithInsertion callPPs) g
-- g <- run $ insertLateReloads g -- Duplicate reloads just before uses
- -- g <- dual_rewrite Opt_D_dump_cmmz "Dead Assignment Elimination"
+ -- g <- dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment Elimination"
-- (removeDeadAssignmentsAndReloads callPPs) g
dump Opt_D_dump_cmmz "Pre common block elimination" g
g <- return $ elimCommonBlocks g
@@ -91,16 +91,16 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
----------- Spills and reloads -------------------
g <-
-- pprTrace "pre Spills" (ppr g) $
- dual_rewrite Opt_D_dump_cmmz "spills and reloads"
+ dual_rewrite run Opt_D_dump_cmmz "spills and reloads"
(dualLivenessWithInsertion procPoints) g
-- Insert spills at defns; reloads at return points
g <-
-- pprTrace "pre insertLateReloads" (ppr g) $
- run $ insertLateReloads g -- Duplicate reloads just before uses
+ runOptimization $ insertLateReloads g -- Duplicate reloads just before uses
dump Opt_D_dump_cmmz "Post late reloads" g
g <-
-- pprTrace "post insertLateReloads" (ppr g) $
- dual_rewrite Opt_D_dump_cmmz "Dead Assignment Elimination"
+ dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment Elimination"
(removeDeadAssignmentsAndReloads procPoints) g
-- Remove redundant reloads (and any other redundant asst)
@@ -112,12 +112,13 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
--------------- Stack layout ----------------
slotEnv <- run $ liveSlotAnal g
+ let spEntryMap = getSpEntryMap entry_off g
mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return ()
- let areaMap = layout procPoints slotEnv entry_off g
+ let areaMap = layout procPoints spEntryMap slotEnv entry_off g
mbpprTrace "areaMap" (ppr areaMap) $ return ()
------------ Manifest the stack pointer --------
- g <- run $ manifestSP areaMap entry_off g
+ g <- run $ manifestSP spEntryMap areaMap entry_off g
dump Opt_D_dump_cmmz "after manifestSP" g
-- UGH... manifestSP can require updates to the procPointMap.
-- We can probably do something quicker here for the update...
@@ -146,12 +147,16 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
where dflags = hsc_dflags hsc_env
mbpprTrace x y z = if dopt Opt_D_dump_cmmz dflags then pprTrace x y z else z
dump f txt g = dumpIfSet_dyn dflags f txt (ppr g)
-
- run = runFuelIO (hsc_OptFuel hsc_env)
-
- dual_rewrite flag txt pass g =
+ -- Runs a required transformation/analysis
+ run = runInfiniteFuelIO (hsc_OptFuel hsc_env)
+ -- Runs an optional transformation/analysis (and should
+ -- thus be subject to optimization fuel)
+ runOptimization = runFuelIO (hsc_OptFuel hsc_env)
+
+ -- pass 'run' or 'runOptimization' for 'r'
+ dual_rewrite r flag txt pass g =
do dump flag ("Pre " ++ txt) g
- g <- run $ pass g
+ g <- r $ pass g
dump flag ("Post " ++ txt) $ g
return g
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 3ae2996213..55a5b73ac5 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -42,8 +42,8 @@ data CmmExpr
| CmmRegOff CmmReg Int
-- CmmRegOff reg i
-- ** is shorthand only, meaning **
- -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep)))
- -- where rep = cmmRegType reg
+ -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
+ -- where rep = typeWidth (cmmRegType reg)
instance Eq CmmExpr where -- Equality ignores the types
CmmLit l1 == CmmLit l2 = l1==l2
@@ -124,6 +124,8 @@ cmmExprType (CmmReg reg) = cmmRegType reg
cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args)
cmmExprType (CmmRegOff reg _) = cmmRegType reg
cmmExprType (CmmStackSlot _ _) = bWord -- an address
+-- Careful though: what is stored at the stack slot may be bigger than
+-- an address
cmmLitType :: CmmLit -> CmmType
cmmLitType (CmmInt _ width) = cmmBits width
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
index 95b1eef6a3..32fead337e 100644
--- a/compiler/cmm/CmmLint.hs
+++ b/compiler/cmm/CmmLint.hs
@@ -24,7 +24,6 @@ import OldPprCmm()
import Constants
import FastString
-import Control.Monad
import Data.Maybe
-- -----------------------------------------------------------------------------
@@ -70,8 +69,10 @@ lintCmmBlock labels (BasicBlock id stmts)
lintCmmExpr :: CmmExpr -> CmmLint CmmType
lintCmmExpr (CmmLoad expr rep) = do
_ <- lintCmmExpr expr
- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
- cmmCheckWordAddress expr
+ -- Disabled, if we have the inlining phase before the lint phase,
+ -- we can have funny offsets due to pointer tagging. -- EZY
+ -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
+ -- cmmCheckWordAddress expr
return rep
lintCmmExpr expr@(CmmMachOp op args) = do
tys <- mapM lintCmmExpr args
@@ -99,14 +100,14 @@ isOffsetOp _ = False
-- This expression should be an address from which a word can be loaded:
-- check for funny-looking sub-word offsets.
-cmmCheckWordAddress :: CmmExpr -> CmmLint ()
-cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)])
+_cmmCheckWordAddress :: CmmExpr -> CmmLint ()
+_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)])
| isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0
= cmmLintDubiousWordOffset e
-cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg])
+_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg])
| isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0
= cmmLintDubiousWordOffset e
-cmmCheckWordAddress _
+_cmmCheckWordAddress _
= return ()
-- No warnings for unaligned arithmetic with the node register,
@@ -152,6 +153,7 @@ lintTarget (CmmPrim {}) = return ()
checkCond :: CmmExpr -> CmmLint ()
checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return ()
+checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values
checkCond expr = cmmLintErr (hang (text "expression is not a conditional:") 2
(ppr expr))
diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs
index 78867b0ce3..c87a3a9b33 100644
--- a/compiler/cmm/CmmLive.hs
+++ b/compiler/cmm/CmmLive.hs
@@ -63,12 +63,12 @@ gen a live = foldRegsUsed extendRegSet live a
kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet
kill a live = foldRegsDefd delOneFromUniqSet live a
+-- Testing!
xferLive :: BwdTransfer CmmNode CmmLive
xferLive = mkBTransfer3 fst mid lst
where fst _ f = f
mid :: CmmNode O O -> CmmLive -> CmmLive
- mid n f = gen_kill n $ case n of CmmUnsafeForeignCall {} -> emptyRegSet
- _ -> f
+ mid n f = gen_kill n f
lst :: CmmNode O C -> FactBase CmmLive -> CmmLive
lst n f = gen_kill n $ case n of CmmCall {} -> emptyRegSet
CmmForeignCall {} -> emptyRegSet
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
index 93564ac946..e67321c0b0 100644
--- a/compiler/cmm/CmmNode.hs
+++ b/compiler/cmm/CmmNode.hs
@@ -92,6 +92,8 @@ data CmmNode e x where
A MidForeign call is used for *unsafe* foreign calls;
a LastForeign call is used for *safe* foreign calls.
Unsafe ones are easy: think of them as a "fat machine instruction".
+In particular, they do *not* kill all live registers (there was a bit
+of code in GHC that conservatively assumed otherwise.)
Safe ones are trickier. A safe foreign call
r = f(x)
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index 0dec26da6f..a2eecd5c48 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -14,6 +14,7 @@
-----------------------------------------------------------------------------
module CmmOpt (
+ cmmEliminateDeadBlocks,
cmmMiniInline,
cmmMachOpFold,
cmmLoopifyForC,
@@ -30,10 +31,69 @@ import UniqFM
import Unique
import FastTypes
import Outputable
+import BlockId
import Data.Bits
import Data.Word
import Data.Int
+import Data.Maybe
+import Data.List
+
+import Compiler.Hoopl hiding (Unique)
+
+-- -----------------------------------------------------------------------------
+-- Eliminates dead blocks
+
+{-
+We repeatedly expand the set of reachable blocks until we hit a
+fixpoint, and then prune any blocks that were not in this set. This is
+actually a required optimization, as dead blocks can cause problems
+for invariants in the linear register allocator (and possibly other
+places.)
+-}
+
+-- Deep fold over statements could probably be abstracted out, but it
+-- might not be worth the effort since OldCmm is moribund
+cmmEliminateDeadBlocks :: [CmmBasicBlock] -> [CmmBasicBlock]
+cmmEliminateDeadBlocks [] = []
+cmmEliminateDeadBlocks blocks@(BasicBlock base_id _:_) =
+ let -- Calculate what's reachable from what block
+ reachableMap = foldl' f emptyUFM blocks -- lazy in values
+ where f m (BasicBlock block_id stmts) = addToUFM m block_id (reachableFrom stmts)
+ reachableFrom stmts = foldl stmt [] stmts
+ where
+ stmt m CmmNop = m
+ stmt m (CmmComment _) = m
+ stmt m (CmmAssign _ e) = expr m e
+ stmt m (CmmStore e1 e2) = expr (expr m e1) e2
+ stmt m (CmmCall c _ as _ _) = f (actuals m as) c
+ where f m (CmmCallee e _) = expr m e
+ f m (CmmPrim _) = m
+ stmt m (CmmBranch b) = b:m
+ stmt m (CmmCondBranch e b) = b:(expr m e)
+ stmt m (CmmSwitch e bs) = catMaybes bs ++ expr m e
+ stmt m (CmmJump e as) = expr (actuals m as) e
+ stmt m (CmmReturn as) = actuals m as
+ actuals m as = foldl' (\m h -> expr m (hintlessCmm h)) m as
+ -- We have to do a deep fold into CmmExpr because
+ -- there may be a BlockId in the CmmBlock literal.
+ expr m (CmmLit l) = lit m l
+ expr m (CmmLoad e _) = expr m e
+ expr m (CmmReg _) = m
+ expr m (CmmMachOp _ es) = foldl' expr m es
+ expr m (CmmStackSlot _ _) = m
+ expr m (CmmRegOff _ _) = m
+ lit m (CmmBlock b) = b:m
+ lit m _ = m
+ -- go todo done
+ reachable = go [base_id] (setEmpty :: BlockSet)
+ where go [] m = m
+ go (x:xs) m
+ | setMember x m = go xs m
+ | otherwise = go (add ++ xs) (setInsert x m)
+ where add = fromMaybe (panic "cmmEliminateDeadBlocks: unknown block")
+ (lookupUFM reachableMap x)
+ in filter (\(BasicBlock block_id _) -> setMember block_id reachable) blocks
-- -----------------------------------------------------------------------------
-- The mini-inliner
@@ -115,12 +175,15 @@ cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts
cmmMiniInlineStmts uses (stmt:stmts)
= stmt : cmmMiniInlineStmts uses stmts
-lookForInline u expr (stmt : rest)
+lookForInline u expr stmts = lookForInline' u expr regset stmts
+ where regset = foldRegsUsed extendRegSet emptyRegSet expr
+
+lookForInline' u expr regset (stmt : rest)
| Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline
= Just (inlineStmt u expr stmt : rest)
| ok_to_skip
- = case lookForInline u expr rest of
+ = case lookForInline' u expr regset rest of
Nothing -> Nothing
Just stmts -> Just (stmt:stmts)
@@ -137,13 +200,18 @@ lookForInline u expr (stmt : rest)
CmmCall{} -> hasNoGlobalRegs expr
_ -> True
- -- We can skip over assignments to other tempoararies, because we
- -- know that expressions aren't side-effecting and temporaries are
- -- single-assignment.
+ -- Expressions aren't side-effecting. Temporaries may or may not
+ -- be single-assignment depending on the source (the old code
+ -- generator creates single-assignment code, but hand-written Cmm
+ -- and Cmm from the new code generator is not single-assignment.)
+ -- So we do an extra check to make sure that the register being
+ -- changed is not one we were relying on. I don't know how much of a
+ -- performance hit this is (we have to create a regset for every
+ -- instruction.) -- EZY
ok_to_skip = case stmt of
CmmNop -> True
CmmComment{} -> True
- CmmAssign (CmmLocal (LocalReg u' _)) rhs | u' /= u -> True
+ CmmAssign (CmmLocal r@(LocalReg u' _)) rhs | u' /= u && not (r `elemRegSet` regset) -> True
CmmAssign g@(CmmGlobal _) rhs -> not (g `regUsedIn` expr)
_other -> False
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index 8c2498e5f8..4dc7e3214f 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -396,13 +396,15 @@ stmt :: { ExtCode }
| NAME '(' exprs0 ')' ';'
{% stmtMacro $1 $3 }
| 'switch' maybe_range expr '{' arms default '}'
- { doSwitch $2 $3 $5 $6 }
+ { do as <- sequence $5; doSwitch $2 $3 as $6 }
| 'goto' NAME ';'
{ do l <- lookupLabel $2; stmtEC (CmmBranch l) }
| 'jump' expr maybe_actuals ';'
{ do e1 <- $2; e2 <- sequence $3; stmtEC (CmmJump e1 e2) }
| 'return' maybe_actuals ';'
{ do e <- sequence $2; stmtEC (CmmReturn e) }
+ | 'if' bool_expr 'goto' NAME
+ { do l <- lookupLabel $4; cmmRawIf $2 l }
| 'if' bool_expr '{' body '}' else
{ cmmIfThenElse $2 $4 $6 }
@@ -441,12 +443,16 @@ maybe_range :: { Maybe (Int,Int) }
: '[' INT '..' INT ']' { Just (fromIntegral $2, fromIntegral $4) }
| {- empty -} { Nothing }
-arms :: { [([Int],ExtCode)] }
+arms :: { [ExtFCode ([Int],Either BlockId ExtCode)] }
: {- empty -} { [] }
| arm arms { $1 : $2 }
-arm :: { ([Int],ExtCode) }
- : 'case' ints ':' '{' body '}' { ($2, $5) }
+arm :: { ExtFCode ([Int],Either BlockId ExtCode) }
+ : 'case' ints ':' arm_body { do b <- $4; return ($2, b) }
+
+arm_body :: { ExtFCode (Either BlockId ExtCode) }
+ : '{' body '}' { return (Right $2) }
+ | 'goto' NAME ';' { do l <- lookupLabel $2; return (Left l) }
ints :: { [Int] }
: INT { [ fromIntegral $1 ] }
@@ -458,6 +464,8 @@ default :: { Maybe ExtCode }
-- 'default' branches
| {- empty -} { Nothing }
+-- Note: OldCmm doesn't support a first class 'else' statement, though
+-- CmmNode does.
else :: { ExtCode }
: {- empty -} { nopEC }
| 'else' '{' body '}' { $3 }
@@ -952,6 +960,10 @@ cmmIfThenElse cond then_part else_part = do
-- fall through to join
code (labelC join_id)
+cmmRawIf cond then_id = do
+ c <- cond
+ emitCond c then_id
+
-- 'emitCond cond true_id' emits code to test whether the cond is true,
-- branching to true_id if so, and falling through otherwise.
emitCond (BoolTest e) then_id = do
@@ -991,7 +1003,7 @@ emitCond (e1 `BoolAnd` e2) then_id = do
-- optional range on the switch (eg. switch [0..7] {...}), or by
-- the minimum/maximum values from the branches.
-doSwitch :: Maybe (Int,Int) -> ExtFCode CmmExpr -> [([Int],ExtCode)]
+doSwitch :: Maybe (Int,Int) -> ExtFCode CmmExpr -> [([Int],Either BlockId ExtCode)]
-> Maybe ExtCode -> ExtCode
doSwitch mb_range scrut arms deflt
= do
@@ -1018,12 +1030,12 @@ doSwitch mb_range scrut arms deflt
-- ToDo: check for out of range and jump to default if necessary
stmtEC (CmmSwitch expr entries)
where
- emitArm :: ([Int],ExtCode) -> ExtFCode [(Int,BlockId)]
- emitArm (ints,code) = do
+ emitArm :: ([Int],Either BlockId ExtCode) -> ExtFCode [(Int,BlockId)]
+ emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ]
+ emitArm (ints,Right code) = do
blockid <- forkLabelledCodeEC code
return [ (i,blockid) | i <- ints ]
-
-- -----------------------------------------------------------------------------
-- Putting it all together
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
index d0d54d909d..fbe979b9ab 100644
--- a/compiler/cmm/CmmProcPoint.hs
+++ b/compiler/cmm/CmmProcPoint.hs
@@ -378,6 +378,8 @@ add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty)
-- 4. build info tables for the procedures -- and update the info table for
-- the SRTs in the entry procedure as well.
-- Input invariant: A block should only be reachable from a single ProcPoint.
+-- ToDo: use the _ret naming convention that the old code generator
+-- used. -- EZY
splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status ->
CmmTop -> FuelUniqSM [CmmTop]
splitAtProcPoints entry_label callPPs procPoints procMap
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
index 4e2dd38fd3..17364ad052 100644
--- a/compiler/cmm/CmmSpillReload.hs
+++ b/compiler/cmm/CmmSpillReload.hs
@@ -100,11 +100,11 @@ dualLiveTransfers entry procPoints = mkBTransfer3 first middle last
where check live id x = if id == entry then noLiveOnEntry id (in_regs live) x else x
middle :: CmmNode O O -> DualLive -> DualLive
- middle m live = changeStack updSlots $ changeRegs (xferLiveMiddle m) (changeRegs regs_in live)
- where xferLiveMiddle = case getBTransfer3 xferLive of (_, middle, _) -> middle
- regs_in :: RegSet -> RegSet
- regs_in live = case m of CmmUnsafeForeignCall {} -> emptyRegSet
- _ -> live
+ middle m = changeStack updSlots
+ . changeRegs updRegs
+ where -- Reuse middle of liveness analysis from CmmLive
+ updRegs = case getBTransfer3 xferLive of (_, middle, _) -> middle m
+
updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m
spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r
spill live _ = live
diff --git a/compiler/cmm/CmmStackLayout.hs b/compiler/cmm/CmmStackLayout.hs
index 01543c444e..c0fb6af037 100644
--- a/compiler/cmm/CmmStackLayout.hs
+++ b/compiler/cmm/CmmStackLayout.hs
@@ -13,7 +13,7 @@
module CmmStackLayout
( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs
- , layout, manifestSP, igraph, areaBuilder
+ , getSpEntryMap, layout, manifestSP, igraph, areaBuilder
, stubSlotsOnDeath ) -- to help crash early during debugging
where
@@ -195,7 +195,7 @@ liveLastOut env l =
type Set x = Map x ()
data IGraphBuilder n =
Builder { foldNodes :: forall z. SubArea -> (n -> z -> z) -> z -> z
- , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int]
+ , _wordsOccupied :: AreaSizeMap -> AreaMap -> n -> [Int]
}
areaBuilder :: IGraphBuilder Area
@@ -242,10 +242,13 @@ igraph builder env g = foldr interfere Map.empty (postorderDfs g)
-- what's the highest offset (in bytes) used in each Area?
-- We'll need to allocate that much space for each Area.
+-- Mapping of areas to area sizes (not offsets!)
+type AreaSizeMap = AreaMap
+
-- JD: WHY CAN'T THIS COME FROM THE slot-liveness info?
-getAreaSize :: ByteOff -> CmmGraph -> AreaMap
+getAreaSize :: ByteOff -> CmmGraph -> AreaSizeMap
-- The domain of the returned mapping consists only of Areas
- -- used for (a) variable spill slots, and (b) parameter passing ares for calls
+ -- used for (a) variable spill slots, and (b) parameter passing areas for calls
getAreaSize entry_off g =
foldGraphBlocks (foldBlockNodesF3 (first, add_regslots, last))
(Map.singleton (CallArea Old) entry_off) g
@@ -266,10 +269,11 @@ getAreaSize entry_off g =
-- The 'max' is important. Two calls, to f and g, might share a common
-- continuation (and hence a common CallArea), but their number of overflow
-- parameters might differ.
+ -- EZY: Ought to use insert with combining function...
-- Find the Stack slots occupied by the subarea's conflicts
-conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int
+conflictSlots :: Ord x => IGPair x -> AreaSizeMap -> AreaMap -> SubArea -> Set Int
conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
foldNodes subarea foldNode Map.empty
where foldNode n set = Map.foldRightWithKey conflict set $ Map.findWithDefault Map.empty n ig
@@ -278,10 +282,10 @@ conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n)
setAdd w s = Map.insert w () s
--- Find any open space on the stack, starting from the offset.
--- If the area is a CallArea or a spill slot for a pointer, then it must
--- be word-aligned.
-freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int
+-- Find any open space for 'area' on the stack, starting from the
+-- 'offset'. If the area is a CallArea or a spill slot for a pointer,
+-- then it must be word-aligned.
+freeSlotFrom :: Ord x => IGPair x -> AreaSizeMap -> Int -> AreaMap -> Area -> Int
freeSlotFrom ig areaSize offset areaMap area =
let size = Map.lookup area areaSize `orElse` 0
conflicts = conflictSlots ig areaSize areaMap (area, size, size)
@@ -299,11 +303,24 @@ freeSlotFrom ig areaSize offset areaMap area =
in findSpace (align (offset + size)) size
-- Find an open space on the stack, and assign it to the area.
-allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap
+allocSlotFrom :: Ord x => IGPair x -> AreaSizeMap -> Int -> AreaMap -> Area -> AreaMap
allocSlotFrom ig areaSize from areaMap area =
if Map.member area areaMap then areaMap
else Map.insert area (freeSlotFrom ig areaSize from areaMap area) areaMap
+-- Figure out all of the offsets from the slot location; this will be
+-- non-zero for procpoints.
+type SpEntryMap = BlockEnv Int
+getSpEntryMap :: Int -> CmmGraph -> SpEntryMap
+getSpEntryMap entry_off g@(CmmGraph {g_entry = entry})
+ = foldGraphBlocks add_sp_off (mapInsert entry entry_off emptyBlockMap) g
+ where add_sp_off :: CmmBlock -> BlockEnv Int -> BlockEnv Int
+ add_sp_off b env =
+ case lastNode b of
+ CmmCall {cml_cont=Just succ, cml_ret_args=off} -> mapInsert succ off env
+ CmmForeignCall {succ=succ} -> mapInsert succ wORD_SIZE env
+ _ -> env
+
-- | Greedy stack layout.
-- Compute liveness, build the interference graph, and allocate slots for the areas.
-- We visit each basic block in a (generally) forward order.
@@ -326,12 +343,16 @@ allocSlotFrom ig areaSize from areaMap area =
-- Note: The stack pointer only has to be younger than the youngest live stack slot
-- at proc points. Otherwise, the stack pointer can point anywhere.
-layout :: ProcPointSet -> SlotEnv -> ByteOff -> CmmGraph -> AreaMap
+layout :: ProcPointSet -> SpEntryMap -> SlotEnv -> ByteOff -> CmmGraph -> AreaMap
-- The domain of the returned map includes an Area for EVERY block
-- including each block that is not the successor of a call (ie is not a proc-point)
--- That's how we return the info of what the SP should be at the entry of every block
+-- That's how we return the info of what the SP should be at the entry of every non
+-- procpoint block. However, note that procpoint blocks have their
+-- /slot/ stored, which is not necessarily the value of the SP on entry
+-- to the block (in fact, it probably isn't, due to argument passing).
+-- See [Procpoint Sp offset]
-layout procPoints env entry_off g =
+layout procPoints spEntryMap env entry_off g =
let ig = (igraph areaBuilder env g, areaBuilder)
env' bid = mapLookup bid env `orElse` panic "unknown blockId in igraph"
areaSize = getAreaSize entry_off g
@@ -370,21 +391,87 @@ layout procPoints env entry_off g =
allocMid m areaMap = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap m) m
allocLast bid l areaMap =
foldr (setSuccSPs inSp) areaMap' (successors l)
- where inSp = expectJust "sp in" $ Map.lookup (CallArea (Young bid)) areaMap
+ where inSp = slot + spOffset -- [Procpoint Sp offset]
+ -- If it's not in the map, we should use our previous
+ -- calculation unchanged.
+ spOffset = mapLookup bid spEntryMap `orElse` 0
+ slot = expectJust "slot in" $ Map.lookup (CallArea (Young bid)) areaMap
areaMap' = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap l) l
alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
alloc' areaMap _ = areaMap
- initMap = Map.insert (CallArea (Young (g_entry g))) 0 $
- Map.insert (CallArea Old) 0 Map.empty
-
+ initMap = Map.insert (CallArea (Young (g_entry g))) 0
+ . Map.insert (CallArea Old) 0
+ $ Map.empty
+
areaMap = foldl layoutAreas initMap (postorderDfs g)
in -- pprTrace "ProcPoints" (ppr procPoints) $
- -- pprTrace "Area SizeMap" (ppr areaSize) $
- -- pprTrace "Entry SP" (ppr entrySp) $
- -- pprTrace "Area Map" (ppr areaMap) $
+ -- pprTrace "Area SizeMap" (ppr areaSize) $
+ -- pprTrace "Entry offset" (ppr entry_off) $
+ -- pprTrace "Area Map" (ppr areaMap) $
areaMap
+{- Note [Procpoint Sp offset]
+
+The calculation of inSp is a little tricky. (Un)fortunately, if you get
+it wrong, you will get inefficient but correct code. You know you've
+got it wrong if the generated stack pointer bounces up and down for no
+good reason.
+
+Why can't we just set inSp to the location of the slot? (This is what
+the code used to do.) The trouble is when we actually hit the proc
+point the start of the slot will not be the same as the actual Sp due
+to argument passing:
+
+ a:
+ I32[(young<b> + 4)] = cde;
+ // Stack pointer is moved to young end (bottom) of young<b> for call
+ // +-------+
+ // | arg 1 |
+ // +-------+ <- Sp
+ call (I32[foobar::I32])(...) returns to Just b (4) (4) with update frame 4;
+ b:
+ // After call, stack pointer is above the old end (top) of
+ // young<b> (the difference is spOffset)
+ // +-------+ <- Sp
+ // | arg 1 |
+ // +-------+
+
+If we blithely set the Sp to be the same as the slot (the young end of
+young<b>), an adjustment will be necessary when we go to the next block.
+This is wasteful. So, instead, for the next block after a procpoint,
+the actual Sp should be set to the same as the true Sp when we just
+entered the procpoint. Then manifestSP will automatically do the right
+thing.
+
+Questions you may ask:
+
+1. Why don't we need to change the mapping for the procpoint itself?
+ Because manifestSP does its own calculation of the true stack value,
+ manifestSP will notice the discrepancy between the actual stack
+ pointer and the slot start, and adjust all of its memory accesses
+ accordingly. So the only problem is when we adjust the Sp in
+ preparation for the successor block; that's why this code is here and
+ not in setSuccSPs.
+
+2. Why don't we make the procpoint call area and the true offset match
+ up? If we did that, we would never use memory above the true value
+ of the stack pointer, thus wasting all of the stack we used to store
+ arguments. You might think that some clever changes to the slot
+ offsets, using negative offsets, might fix it, but this does not make
+ semantic sense.
+
+3. If manifestSP is already calculating the true stack value, why we can't
+ do this trick inside manifestSP itself? The reason is that if two
+ branches join with inconsistent SPs, one of them has to be fixed: we
+ can't know what the fix should be without already knowing what the
+ chosen location of SP is on the next successor. (This is
+ the "succ already knows incoming SP" case), This calculation cannot
+ be easily done in manifestSP, since it processes the nodes
+ /backwards/. So we need to have figured this out before we hit
+ manifestSP.
+-}
+
-- After determining the stack layout, we can:
-- 1. Replace references to stack Areas with addresses relative to the stack
-- pointer.
@@ -394,8 +481,8 @@ layout procPoints env entry_off g =
-- stack pointer to be younger than the live values on the stack at proc points.
-- 3. Compute the maximum stack offset used in the procedure and replace
-- the stack high-water mark with that offset.
-manifestSP :: AreaMap -> ByteOff -> CmmGraph -> FuelUniqSM CmmGraph
-manifestSP areaMap entry_off g@(CmmGraph {g_entry=entry}) =
+manifestSP :: SpEntryMap -> AreaMap -> ByteOff -> CmmGraph -> FuelUniqSM CmmGraph
+manifestSP spEntryMap areaMap entry_off g@(CmmGraph {g_entry=entry}) =
ofBlockMap entry `liftM` foldl replB (return mapEmpty) (postorderDfs g)
where slot a = -- pprTrace "slot" (ppr a) $
Map.lookup a areaMap `orElse` panic "unallocated Area"
@@ -404,13 +491,6 @@ manifestSP areaMap entry_off g@(CmmGraph {g_entry=entry}) =
sp_high = maxSlot slot g
proc_entry_sp = slot (CallArea Old) + entry_off
- add_sp_off :: CmmBlock -> BlockEnv Int -> BlockEnv Int
- add_sp_off b env =
- case lastNode b of
- CmmCall {cml_cont=Just succ, cml_ret_args=off} -> mapInsert succ off env
- CmmForeignCall {succ=succ} -> mapInsert succ wORD_SIZE env
- _ -> env
- spEntryMap = foldGraphBlocks add_sp_off (mapInsert entry entry_off emptyBlockMap) g
spOffset id = mapLookup id spEntryMap `orElse` 0
sp_on_entry id | id == entry = proc_entry_sp
@@ -427,10 +507,26 @@ manifestSP areaMap entry_off g@(CmmGraph {g_entry=entry}) =
where spIn = sp_on_entry (entryLabel block)
middle spOff m = mapExpDeep (replSlot spOff) m
+ -- XXX there shouldn't be any global registers in the
+ -- CmmCall, so there shouldn't be any slots in
+ -- CmmCall... check that...
last spOff l = mapExpDeep (replSlot spOff) l
replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i))
replSlot _ (CmmLit CmmHighStackMark) = -- replacing the high water mark
CmmLit (CmmInt (toInteger (max 0 (sp_high - proc_entry_sp))) (typeWidth bWord))
+ -- Invariant: Sp is always greater than SpLim. Thus, if
+ -- the high water mark is zero, we can optimize away the
+ -- conditional branch. Relies on dead code elimination
+ -- to get rid of the dead GC blocks.
+ -- EZY: Maybe turn this into a guard that checks if a
+ -- statement is stack-check ish? Maybe we should make
+ -- an actual mach-op for it, so there's no chance of
+ -- mixing this up with something else...
+ replSlot _ (CmmMachOp (MO_U_Lt _)
+ [CmmMachOp (MO_Sub _)
+ [ CmmReg (CmmGlobal Sp)
+ , CmmLit (CmmInt 0 _)],
+ CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth)
replSlot _ e = e
replLast :: MaybeC C (CmmNode C O) -> [CmmNode O O] -> CmmNode O C -> FuelUniqSM [CmmBlock]
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 69b481b501..c9e422fb4e 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -24,7 +24,7 @@ module MkGraph
, copyInOflow, copyInSlot, copyOutOflow, copyOutSlot
-- Reexport of needed Cmm stuff
, Convention(..), ForeignConvention(..), ForeignTarget(..)
- , CmmStackInfo(..), CmmTopInfo(..), CmmGraph(..)
+ , CmmStackInfo(..), CmmTopInfo(..), CmmGraph, GenCmmGraph(..)
, Cmm, CmmTop
)
where
diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs
index 057a96521f..f624c1c7b6 100644
--- a/compiler/cmm/OptimizationFuel.hs
+++ b/compiler/cmm/OptimizationFuel.hs
@@ -6,12 +6,12 @@
-- the optimiser with varying amount of fuel to find out the exact number of
-- steps where a bug is introduced in the output.
module OptimizationFuel
- ( OptimizationFuel, amountOfFuel, tankFilledTo, anyFuelLeft, oneLessFuel
+ ( OptimizationFuel, amountOfFuel, tankFilledTo, unlimitedFuel, anyFuelLeft, oneLessFuel
, OptFuelState, initOptFuelState
, FuelConsumer, FuelUsingMonad, FuelState
, fuelGet, fuelSet, lastFuelPass, setFuelPass
, fuelExhausted, fuelDec1, tryWithFuel
- , runFuelIO, fuelConsumingPass
+ , runFuelIO, runInfiniteFuelIO, fuelConsumingPass
, FuelUniqSM
, liftUniq
)
@@ -21,9 +21,7 @@ import Data.IORef
import Control.Monad
import StaticFlags (opt_Fuel)
import UniqSupply
-#ifdef DEBUG
import Panic
-#endif
import Compiler.Hoopl
import Compiler.Hoopl.GHC (getFuel, setFuel)
@@ -51,8 +49,8 @@ amountOfFuel :: OptimizationFuel -> Int
anyFuelLeft :: OptimizationFuel -> Bool
oneLessFuel :: OptimizationFuel -> OptimizationFuel
+unlimitedFuel :: OptimizationFuel
-#ifdef DEBUG
newtype OptimizationFuel = OptimizationFuel Int
deriving Show
@@ -61,16 +59,7 @@ amountOfFuel (OptimizationFuel f) = f
anyFuelLeft (OptimizationFuel f) = f > 0
oneLessFuel (OptimizationFuel f) = ASSERT (f > 0) (OptimizationFuel (f - 1))
-#else
--- type OptimizationFuel = State# () -- would like this, but it won't work
-data OptimizationFuel = OptimizationFuel
- deriving Show
-tankFilledTo _ = OptimizationFuel
-amountOfFuel _ = maxBound
-
-anyFuelLeft _ = True
-oneLessFuel _ = OptimizationFuel
-#endif
+unlimitedFuel = OptimizationFuel infiniteFuel
data FuelState = FuelState { fs_fuel :: OptimizationFuel, fs_lastpass :: String }
newtype FuelUniqSM a = FUSM { unFUSM :: FuelState -> UniqSM (a, FuelState) }
@@ -92,6 +81,16 @@ runFuelIO fs (FUSM f) =
writeIORef (fuel_ref fs) fuel'
return a
+-- ToDo: Do we need the pass_ref when we are doing infinite fueld
+-- transformations?
+runInfiniteFuelIO :: OptFuelState -> FuelUniqSM a -> IO a
+runInfiniteFuelIO fs (FUSM f) =
+ do pass <- readIORef (pass_ref fs)
+ u <- mkSplitUniqSupply 'u'
+ let (a, FuelState _ pass') = initUs_ u $ f (FuelState unlimitedFuel pass)
+ writeIORef (pass_ref fs) pass'
+ return a
+
instance Monad FuelUniqSM where
FUSM f >>= k = FUSM (\s -> f s >>= \(a, s') -> unFUSM (k a) s')
return a = FUSM (\s -> return (a, s))
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 10c9f18310..aa7d914253 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -50,6 +50,7 @@ import Outputable
import Constants
import BasicTypes
import CLabel
+import Util
-- The rest
import Data.List
@@ -63,10 +64,6 @@ import Data.Word
import Data.Array.ST
import Control.Monad.ST
-#if x86_64_TARGET_ARCH
-import StaticFlags ( opt_Unregisterised )
-#endif
-
#if defined(alpha_TARGET_ARCH) || defined(mips_TARGET_ARCH) || defined(mipsel_TARGET_ARCH) || defined(arm_TARGET_ARCH)
#define BEWARE_LOAD_STORE_ALIGNMENT
#endif
@@ -104,18 +101,19 @@ pprTop (CmmProc info clbl (ListGraph blocks)) =
then pprDataExterns info $$
pprWordArray (entryLblToInfoLbl clbl) info
else empty) $$
- (case blocks of
- [] -> empty
- -- the first block doesn't get a label:
- (BasicBlock _ stmts : rest) -> vcat [
+ (vcat [
blankLine,
extern_decls,
(if (externallyVisibleCLabel clbl)
then mkFN_ else mkIF_) (pprCLabel clbl) <+> lbrace,
nest 8 temp_decls,
nest 8 mkFB_,
- nest 8 (vcat (map pprStmt stmts)) $$
- vcat (map pprBBlock rest),
+ case blocks of
+ [] -> empty
+ -- the first block doesn't get a label:
+ (BasicBlock _ stmts : rest) ->
+ nest 8 (vcat (map pprStmt stmts)) $$
+ vcat (map pprBBlock rest),
nest 8 mkFE_,
rbrace ]
)
@@ -818,17 +816,6 @@ pprCall ppr_fn cconv results args _
| otherwise
=
-#if x86_64_TARGET_ARCH
- -- HACK around gcc optimisations.
- -- x86_64 needs a __DISCARD__() here, to create a barrier between
- -- putting the arguments into temporaries and passing the arguments
- -- to the callee, because the argument expressions may refer to
- -- machine registers that are also used for passing arguments in the
- -- C calling convention.
- (if (not opt_Unregisterised)
- then ptext (sLit "__DISCARD__();")
- else empty) $$
-#endif
ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi
where
ppr_assign [] rhs = rhs
@@ -1022,18 +1009,6 @@ machRep_S_CType _ = panic "machRep_S_CType"
pprStringInCStyle :: [Word8] -> SDoc
pprStringInCStyle s = doubleQuotes (text (concatMap charToC s))
-charToC :: Word8 -> String
-charToC w =
- case chr (fromIntegral w) of
- '\"' -> "\\\""
- '\'' -> "\\\'"
- '\\' -> "\\\\"
- c | c >= ' ' && c <= '~' -> [c]
- | otherwise -> ['\\',
- chr (ord '0' + ord c `div` 64),
- chr (ord '0' + ord c `div` 8 `mod` 8),
- chr (ord '0' + ord c `mod` 8)]
-
-- ---------------------------------------------------------------------------
-- Initialising static objects with floating-point numbers. We can't
-- just emit the floating point number, because C will cast it to an int
diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes
index 0852711f96..e787f18b17 100644
--- a/compiler/cmm/cmm-notes
+++ b/compiler/cmm/cmm-notes
@@ -15,14 +15,11 @@ Things to do:
This will fix the spill before stack check problem but only really as a side
effect. A 'real fix' probably requires making the spiller know about sp checks.
- - There is some silly stuff happening with the Sp. We end up with code like:
- Sp = Sp + 8; R1 = _vwf::I64; Sp = Sp -8
- Seems to be perhaps caused by the issue above but also maybe a optimisation
- pass needed?
+ EZY: I don't understand this comment. David Terei, can you clarify?
- - Proc pass all arguments on the stack, adding more code and slowing down things
- a lot. We either need to fix this or even better would be to get rid of
- proc points.
+ - Proc points pass all arguments on the stack, adding more code and
+ slowing down things a lot. We either need to fix this or even better
+ would be to get rid of proc points.
- CmmInfo.cmmToRawCmm uses Old.Cmm, so it is called after converting Cmm.Cmm to
Old.Cmm. We should abstract it to work on both representations, it needs only to
@@ -32,7 +29,7 @@ Things to do:
we could convert codeGen/StgCmm* clients to the Hoopl's semantics?
It's all deeply unsatisfactory.
- - Improve preformance of Hoopl.
+ - Improve performance of Hoopl.
A nofib comparison of -fasm vs -fnewcodegen nofib compilation parameters
(using the same ghc-cmm branch +libraries compiled by the old codegenerator)
@@ -50,6 +47,9 @@ Things to do:
So we generate a bit better code, but it takes us longer!
+ EZY: Also importantly, Hoopl uses dramatically more memory than the
+ old code generator.
+
- Are all blockToNodeList and blockOfNodeList really needed? Maybe we could
splice blocks instead?
@@ -57,7 +57,7 @@ Things to do:
a block catenation function would be probably nicer than blockToNodeList
/ blockOfNodeList combo.
- - loweSafeForeignCall seems too lowlevel. Just use Dataflow. After that
+ - lowerSafeForeignCall seems too lowlevel. Just use Dataflow. After that
delete splitEntrySeq from HooplUtils.
- manifestSP seems to touch a lot of the graph representation. It is
@@ -76,6 +76,9 @@ Things to do:
calling convention, and the code for calling foreign calls is generated
- AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
+ EZY (2011-04-16): The mini-inliner has been generalized and ported,
+ but the constant folding and other optimizations need to still be
+ ported.
- AsmCodeGen has post-native-cg branch eliminator (shortCutBranches);
we ultimately want to share this with the Cmm branch eliminator.
@@ -113,7 +116,7 @@ Things to do:
- See "CAFs" below; we want to totally refactor the way SRTs are calculated
- Pull out Areas into its own module
- Parameterise AreaMap
+ Parameterise AreaMap (note there are type synonyms in CmmStackLayout!)
Add ByteWidth = Int
type SubArea = (Area, ByteOff, ByteWidth)
ByteOff should not be defined in SMRep -- that is too high up the hierarchy
@@ -293,8 +296,8 @@ cpsTop:
insert spills/reloads across
LastCalls, and
Branches to proc-points
- Now sink those reloads:
- - CmmSpillReload.insertLateReloads
+ Now sink those reloads (and other instructions):
+ - CmmSpillReload.rewriteAssignments
- CmmSpillReload.removeDeadAssignmentsAndReloads
* CmmStackLayout.stubSlotsOnDeath
@@ -344,7 +347,7 @@ to J that way. This is an awkward choice. (We think that we currently
never pass variables to join points via arguments.)
Furthermore, there is *no way* to pass q to J in a register (other
-than a paramter register).
+than a parameter register).
What we want is to do register allocation across the whole caboodle.
Then we could drop all the code that deals with the above awkward