diff options
Diffstat (limited to 'compiler/cmm')
-rw-r--r-- | compiler/cmm/CLabel.hs | 94 | ||||
-rw-r--r-- | compiler/cmm/Cmm.hs | 21 | ||||
-rw-r--r-- | compiler/cmm/CmmCPS.hs | 29 | ||||
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 6 | ||||
-rw-r--r-- | compiler/cmm/CmmLint.hs | 16 | ||||
-rw-r--r-- | compiler/cmm/CmmLive.hs | 4 | ||||
-rw-r--r-- | compiler/cmm/CmmNode.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/CmmOpt.hs | 80 | ||||
-rw-r--r-- | compiler/cmm/CmmParse.y | 28 | ||||
-rw-r--r-- | compiler/cmm/CmmProcPoint.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/CmmSpillReload.hs | 10 | ||||
-rw-r--r-- | compiler/cmm/CmmStackLayout.hs | 154 | ||||
-rw-r--r-- | compiler/cmm/MkGraph.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/OptimizationFuel.hs | 29 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 41 | ||||
-rw-r--r-- | compiler/cmm/cmm-notes | 29 |
16 files changed, 329 insertions, 218 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 4d9596197e..3451c7d5a9 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -51,9 +51,7 @@ module CLabel ( mkAsmTempLabel, - mkModuleInitLabel, - mkPlainModuleInitLabel, - mkModuleInitTableLabel, + mkPlainModuleInitLabel, mkSplitMarkerLabel, mkDirty_MUT_VAR_Label, @@ -70,10 +68,7 @@ module CLabel ( mkRtsPrimOpLabel, mkRtsSlowTickyCtrLabel, - moduleRegdLabel, - moduleRegTableLabel, - - mkSelectorInfoLabel, + mkSelectorInfoLabel, mkSelectorEntryLabel, mkCmmInfoLabel, @@ -102,7 +97,6 @@ module CLabel ( mkDeadStripPreventer, mkHpcTicksLabel, - mkHpcModuleNameLabel, hasCAF, infoLblToEntryLbl, entryLblToInfoLbl, cvtToClosureLbl, cvtToSRTLbl, @@ -202,23 +196,9 @@ data CLabel | StringLitLabel {-# UNPACK #-} !Unique - | ModuleInitLabel - Module -- the module name - String -- its "way" - -- at some point we might want some kind of version number in - -- the module init label, to guard against compiling modules in - -- the wrong order. We can't use the interface file version however, - -- because we don't always recompile modules which depend on a module - -- whose version has changed. - - | PlainModuleInitLabel -- without the version & way info + | PlainModuleInitLabel -- without the version & way info Module - | ModuleInitTableLabel -- table of imported modules to init - Module - - | ModuleRegdLabel - | CC_Label CostCentre | CCS_Label CostCentreStack @@ -242,9 +222,6 @@ data CLabel -- | Per-module table of tick locations | HpcTicksLabel Module - -- | Per-module name of the module for Hpc - | HpcModuleNameLabel - -- | Label of an StgLargeSRT | LargeSRTLabel {-# UNPACK #-} !Unique @@ -490,7 +467,6 @@ mkRtsSlowTickyCtrLabel pat = RtsLabel (RtsSlowTickyCtr pat) -- Constructing Code Coverage Labels mkHpcTicksLabel = HpcTicksLabel -mkHpcModuleNameLabel = HpcModuleNameLabel -- Constructing labels used for dynamic linking @@ -515,19 +491,9 @@ mkStringLitLabel = StringLitLabel mkAsmTempLabel :: Uniquable a => a -> CLabel mkAsmTempLabel a = AsmTempLabel (getUnique a) -mkModuleInitLabel :: Module -> String -> CLabel -mkModuleInitLabel mod way = ModuleInitLabel mod way - mkPlainModuleInitLabel :: Module -> CLabel mkPlainModuleInitLabel mod = PlainModuleInitLabel mod -mkModuleInitTableLabel :: Module -> CLabel -mkModuleInitTableLabel mod = ModuleInitTableLabel mod - -moduleRegdLabel = ModuleRegdLabel -moduleRegTableLabel = ModuleInitTableLabel - - -- ----------------------------------------------------------------------------- -- Converting between info labels and entry/ret labels. @@ -591,10 +557,7 @@ needsCDecl (LargeSRTLabel _) = False needsCDecl (LargeBitmapLabel _) = False needsCDecl (IdLabel _ _ _) = True needsCDecl (CaseLabel _ _) = True -needsCDecl (ModuleInitLabel _ _) = True -needsCDecl (PlainModuleInitLabel _) = True -needsCDecl (ModuleInitTableLabel _) = True -needsCDecl ModuleRegdLabel = False +needsCDecl (PlainModuleInitLabel _) = True needsCDecl (StringLitLabel _) = False needsCDecl (AsmTempLabel _) = False @@ -612,7 +575,6 @@ needsCDecl l@(ForeignLabel{}) = not (isMathFun l) needsCDecl (CC_Label _) = True needsCDecl (CCS_Label _) = True needsCDecl (HpcTicksLabel _) = True -needsCDecl HpcModuleNameLabel = False -- | Check whether a label is a local temporary for native code generation @@ -630,7 +592,7 @@ maybeAsmTemp _ = Nothing -- | Check whether a label corresponds to a C function that has -- a prototype in a system header somehere, or is built-in --- to the C compiler. For these labels we abovoid generating our +-- to the C compiler. For these labels we avoid generating our -- own C prototypes. isMathFun :: CLabel -> Bool isMathFun (ForeignLabel fs _ _ _) = fs `elementOfUniqSet` math_funs @@ -725,11 +687,8 @@ externallyVisibleCLabel :: CLabel -> Bool -- not C "static" externallyVisibleCLabel (CaseLabel _ _) = False externallyVisibleCLabel (StringLitLabel _) = False externallyVisibleCLabel (AsmTempLabel _) = False -externallyVisibleCLabel (ModuleInitLabel _ _) = True externallyVisibleCLabel (PlainModuleInitLabel _)= True -externallyVisibleCLabel (ModuleInitTableLabel _)= False -externallyVisibleCLabel ModuleRegdLabel = False -externallyVisibleCLabel (RtsLabel _) = True +externallyVisibleCLabel (RtsLabel _) = True externallyVisibleCLabel (CmmLabel _ _ _) = True externallyVisibleCLabel (ForeignLabel{}) = True externallyVisibleCLabel (IdLabel name _ _) = isExternalName name @@ -737,8 +696,7 @@ externallyVisibleCLabel (CC_Label _) = True externallyVisibleCLabel (CCS_Label _) = True externallyVisibleCLabel (DynamicLinkerLabel _ _) = False externallyVisibleCLabel (HpcTicksLabel _) = True -externallyVisibleCLabel HpcModuleNameLabel = False -externallyVisibleCLabel (LargeBitmapLabel _) = False +externallyVisibleCLabel (LargeBitmapLabel _) = False externallyVisibleCLabel (LargeSRTLabel _) = False -- ----------------------------------------------------------------------------- @@ -777,9 +735,7 @@ labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsApFast _)) = CodeLabel labelType (CaseLabel _ CaseReturnInfo) = DataLabel labelType (CaseLabel _ _) = CodeLabel -labelType (ModuleInitLabel _ _) = CodeLabel labelType (PlainModuleInitLabel _) = CodeLabel -labelType (ModuleInitTableLabel _) = DataLabel labelType (LargeSRTLabel _) = DataLabel labelType (LargeBitmapLabel _) = DataLabel labelType (ForeignLabel _ _ _ IsFunction) = CodeLabel @@ -837,10 +793,8 @@ labelDynamic this_pkg lbl = CmmLabel pkg _ _ -> True #endif - ModuleInitLabel m _ -> not opt_Static && this_pkg /= (modulePackageId m) PlainModuleInitLabel m -> not opt_Static && this_pkg /= (modulePackageId m) - ModuleInitTableLabel m -> not opt_Static && this_pkg /= (modulePackageId m) - + -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves. _ -> False @@ -896,8 +850,8 @@ instance Outputable CLabel where pprCLabel :: CLabel -> SDoc -#if ! OMIT_NATIVE_CODEGEN pprCLabel (AsmTempLabel u) + | cGhcWithNativeCodeGen == "YES" = getPprStyle $ \ sty -> if asmStyle sty then ptext asmTempLabelPrefix <> pprUnique u @@ -905,23 +859,22 @@ pprCLabel (AsmTempLabel u) char '_' <> pprUnique u pprCLabel (DynamicLinkerLabel info lbl) + | cGhcWithNativeCodeGen == "YES" = pprDynamicLinkerAsmLabel info lbl pprCLabel PicBaseLabel + | cGhcWithNativeCodeGen == "YES" = ptext (sLit "1b") pprCLabel (DeadStripPreventer lbl) + | cGhcWithNativeCodeGen == "YES" = pprCLabel lbl <> ptext (sLit "_dsp") -#endif -pprCLabel lbl = -#if ! OMIT_NATIVE_CODEGEN - getPprStyle $ \ sty -> - if asmStyle sty then - maybe_underscore (pprAsmCLbl lbl) - else -#endif - pprCLbl lbl +pprCLabel lbl + = getPprStyle $ \ sty -> + if cGhcWithNativeCodeGen == "YES" && asmStyle sty + then maybe_underscore (pprAsmCLbl lbl) + else pprCLbl lbl maybe_underscore doc | underscorePrefix = pp_cSEP <> doc @@ -1008,9 +961,6 @@ pprCLbl (RtsLabel (RtsPrimOp primop)) pprCLbl (RtsLabel (RtsSlowTickyCtr pat)) = ptext (sLit "SLOW_CALL_") <> text pat <> ptext (sLit "_ctr") -pprCLbl ModuleRegdLabel - = ptext (sLit "_module_registered") - pprCLbl (ForeignLabel str _ _ _) = ftext str @@ -1019,22 +969,12 @@ pprCLbl (IdLabel name cafs flavor) = ppr name <> ppIdFlavor flavor pprCLbl (CC_Label cc) = ppr cc pprCLbl (CCS_Label ccs) = ppr ccs -pprCLbl (ModuleInitLabel mod way) - = ptext (sLit "__stginit_") <> ppr mod - <> char '_' <> text way - pprCLbl (PlainModuleInitLabel mod) = ptext (sLit "__stginit_") <> ppr mod -pprCLbl (ModuleInitTableLabel mod) - = ptext (sLit "__stginittable_") <> ppr mod - pprCLbl (HpcTicksLabel mod) = ptext (sLit "_hpc_tickboxes_") <> ppr mod <> ptext (sLit "_hpc") -pprCLbl HpcModuleNameLabel - = ptext (sLit "_hpc_module_name_str") - ppIdFlavor :: IdLabelInfo -> SDoc ppIdFlavor x = pp_cSEP <> (case x of diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index 2e9f952f7b..54b4b11662 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -9,10 +9,11 @@ #endif module Cmm - ( CmmGraph(..), CmmBlock + ( CmmGraph, GenCmmGraph(..), CmmBlock , CmmStackInfo(..), CmmTopInfo(..), Cmm, CmmTop , CmmReplGraph, CmmFwdRewrite, CmmBwdRewrite + , modifyGraph , lastNode, replaceLastNode, insertBetween , ofBlockMap, toBlockMap, insertBlock , ofBlockList, toBlockList, bodyToBlockList @@ -41,7 +42,8 @@ import Panic ------------------------------------------------- -- CmmBlock, CmmGraph and Cmm -data CmmGraph = CmmGraph { g_entry :: BlockId, g_graph :: Graph CmmNode C C } +type CmmGraph = GenCmmGraph CmmNode +data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C } type CmmBlock = Block CmmNode C C type CmmReplGraph e x = FuelUniqSM (Maybe (Graph CmmNode e x)) @@ -56,6 +58,9 @@ type CmmTop = GenCmmTop CmmStatic CmmTopInfo CmmGraph ------------------------------------------------- -- Manipulating CmmGraphs +modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n' +modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)} + toBlockMap :: CmmGraph -> LabelMap CmmBlock toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body @@ -150,26 +155,26 @@ insertBetween b ms succId = insert $ lastNode b -- Running dataflow analysis and/or rewrites -- Constructing forward and backward analysis-only pass -analFwd :: Monad m => DataflowLattice f -> FwdTransfer CmmNode f -> FwdPass m CmmNode f -analBwd :: Monad m => DataflowLattice f -> BwdTransfer CmmNode f -> BwdPass m CmmNode f +analFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdPass m n f +analBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdPass m n f analFwd lat xfer = analRewFwd lat xfer noFwdRewrite analBwd lat xfer = analRewBwd lat xfer noBwdRewrite -- Constructing forward and backward analysis + rewrite pass -analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer CmmNode f -> FwdRewrite m CmmNode f -> FwdPass m CmmNode f -analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer CmmNode f -> BwdRewrite m CmmNode f -> BwdPass m CmmNode f +analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdRewrite m n f -> FwdPass m n f +analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdRewrite m n f -> BwdPass m n f analRewFwd lat xfer rew = FwdPass {fp_lattice = lat, fp_transfer = xfer, fp_rewrite = rew} analRewBwd lat xfer rew = BwdPass {bp_lattice = lat, bp_transfer = xfer, bp_rewrite = rew} -- Running forward and backward dataflow analysis + optional rewrite -dataflowPassFwd :: CmmGraph -> [(BlockId, f)] -> FwdPass FuelUniqSM CmmNode f -> FuelUniqSM (CmmGraph, BlockEnv f) +dataflowPassFwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> FwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f) dataflowPassFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) return (CmmGraph {g_entry=entry, g_graph=graph}, facts) -dataflowPassBwd :: CmmGraph -> [(BlockId, f)] -> BwdPass FuelUniqSM CmmNode f -> FuelUniqSM (CmmGraph, BlockEnv f) +dataflowPassBwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> BwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f) dataflowPassBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = do (graph, facts, NothingO) <- analyzeAndRewriteBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts) return (CmmGraph {g_entry=entry, g_graph=graph}, facts) diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmCPS.hs index 372562cfca..aad00371a1 100644 --- a/compiler/cmm/CmmCPS.hs +++ b/compiler/cmm/CmmCPS.hs @@ -71,10 +71,10 @@ cpsTop _ p@(CmmData {}) = return ([], [(Map.empty, p)]) cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) = do -- Why bother doing it this early? - -- g <- dual_rewrite Opt_D_dump_cmmz "spills and reloads" + -- g <- dual_rewrite run Opt_D_dump_cmmz "spills and reloads" -- (dualLivenessWithInsertion callPPs) g -- g <- run $ insertLateReloads g -- Duplicate reloads just before uses - -- g <- dual_rewrite Opt_D_dump_cmmz "Dead Assignment Elimination" + -- g <- dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment Elimination" -- (removeDeadAssignmentsAndReloads callPPs) g dump Opt_D_dump_cmmz "Pre common block elimination" g g <- return $ elimCommonBlocks g @@ -91,16 +91,16 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) ----------- Spills and reloads ------------------- g <- -- pprTrace "pre Spills" (ppr g) $ - dual_rewrite Opt_D_dump_cmmz "spills and reloads" + dual_rewrite run Opt_D_dump_cmmz "spills and reloads" (dualLivenessWithInsertion procPoints) g -- Insert spills at defns; reloads at return points g <- -- pprTrace "pre insertLateReloads" (ppr g) $ - run $ insertLateReloads g -- Duplicate reloads just before uses + runOptimization $ insertLateReloads g -- Duplicate reloads just before uses dump Opt_D_dump_cmmz "Post late reloads" g g <- -- pprTrace "post insertLateReloads" (ppr g) $ - dual_rewrite Opt_D_dump_cmmz "Dead Assignment Elimination" + dual_rewrite runOptimization Opt_D_dump_cmmz "Dead Assignment Elimination" (removeDeadAssignmentsAndReloads procPoints) g -- Remove redundant reloads (and any other redundant asst) @@ -112,12 +112,13 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) --------------- Stack layout ---------------- slotEnv <- run $ liveSlotAnal g + let spEntryMap = getSpEntryMap entry_off g mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return () - let areaMap = layout procPoints slotEnv entry_off g + let areaMap = layout procPoints spEntryMap slotEnv entry_off g mbpprTrace "areaMap" (ppr areaMap) $ return () ------------ Manifest the stack pointer -------- - g <- run $ manifestSP areaMap entry_off g + g <- run $ manifestSP spEntryMap areaMap entry_off g dump Opt_D_dump_cmmz "after manifestSP" g -- UGH... manifestSP can require updates to the procPointMap. -- We can probably do something quicker here for the update... @@ -146,12 +147,16 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) where dflags = hsc_dflags hsc_env mbpprTrace x y z = if dopt Opt_D_dump_cmmz dflags then pprTrace x y z else z dump f txt g = dumpIfSet_dyn dflags f txt (ppr g) - - run = runFuelIO (hsc_OptFuel hsc_env) - - dual_rewrite flag txt pass g = + -- Runs a required transformation/analysis + run = runInfiniteFuelIO (hsc_OptFuel hsc_env) + -- Runs an optional transformation/analysis (and should + -- thus be subject to optimization fuel) + runOptimization = runFuelIO (hsc_OptFuel hsc_env) + + -- pass 'run' or 'runOptimization' for 'r' + dual_rewrite r flag txt pass g = do dump flag ("Pre " ++ txt) g - g <- run $ pass g + g <- r $ pass g dump flag ("Post " ++ txt) $ g return g diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 3ae2996213..55a5b73ac5 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -42,8 +42,8 @@ data CmmExpr | CmmRegOff CmmReg Int -- CmmRegOff reg i -- ** is shorthand only, meaning ** - -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep))) - -- where rep = cmmRegType reg + -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] + -- where rep = typeWidth (cmmRegType reg) instance Eq CmmExpr where -- Equality ignores the types CmmLit l1 == CmmLit l2 = l1==l2 @@ -124,6 +124,8 @@ cmmExprType (CmmReg reg) = cmmRegType reg cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args) cmmExprType (CmmRegOff reg _) = cmmRegType reg cmmExprType (CmmStackSlot _ _) = bWord -- an address +-- Careful though: what is stored at the stack slot may be bigger than +-- an address cmmLitType :: CmmLit -> CmmType cmmLitType (CmmInt _ width) = cmmBits width diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 95b1eef6a3..32fead337e 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -24,7 +24,6 @@ import OldPprCmm() import Constants import FastString -import Control.Monad import Data.Maybe -- ----------------------------------------------------------------------------- @@ -70,8 +69,10 @@ lintCmmBlock labels (BasicBlock id stmts) lintCmmExpr :: CmmExpr -> CmmLint CmmType lintCmmExpr (CmmLoad expr rep) = do _ <- lintCmmExpr expr - when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ - cmmCheckWordAddress expr + -- Disabled, if we have the inlining phase before the lint phase, + -- we can have funny offsets due to pointer tagging. -- EZY + -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ + -- cmmCheckWordAddress expr return rep lintCmmExpr expr@(CmmMachOp op args) = do tys <- mapM lintCmmExpr args @@ -99,14 +100,14 @@ isOffsetOp _ = False -- This expression should be an address from which a word can be loaded: -- check for funny-looking sub-word offsets. -cmmCheckWordAddress :: CmmExpr -> CmmLint () -cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) +_cmmCheckWordAddress :: CmmExpr -> CmmLint () +_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset e -cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) +_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset e -cmmCheckWordAddress _ +_cmmCheckWordAddress _ = return () -- No warnings for unaligned arithmetic with the node register, @@ -152,6 +153,7 @@ lintTarget (CmmPrim {}) = return () checkCond :: CmmExpr -> CmmLint () checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return () +checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values checkCond expr = cmmLintErr (hang (text "expression is not a conditional:") 2 (ppr expr)) diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 78867b0ce3..c87a3a9b33 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -63,12 +63,12 @@ gen a live = foldRegsUsed extendRegSet live a kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet kill a live = foldRegsDefd delOneFromUniqSet live a +-- Testing! xferLive :: BwdTransfer CmmNode CmmLive xferLive = mkBTransfer3 fst mid lst where fst _ f = f mid :: CmmNode O O -> CmmLive -> CmmLive - mid n f = gen_kill n $ case n of CmmUnsafeForeignCall {} -> emptyRegSet - _ -> f + mid n f = gen_kill n f lst :: CmmNode O C -> FactBase CmmLive -> CmmLive lst n f = gen_kill n $ case n of CmmCall {} -> emptyRegSet CmmForeignCall {} -> emptyRegSet diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index 93564ac946..e67321c0b0 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -92,6 +92,8 @@ data CmmNode e x where A MidForeign call is used for *unsafe* foreign calls; a LastForeign call is used for *safe* foreign calls. Unsafe ones are easy: think of them as a "fat machine instruction". +In particular, they do *not* kill all live registers (there was a bit +of code in GHC that conservatively assumed otherwise.) Safe ones are trickier. A safe foreign call r = f(x) diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 0dec26da6f..a2eecd5c48 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -14,6 +14,7 @@ ----------------------------------------------------------------------------- module CmmOpt ( + cmmEliminateDeadBlocks, cmmMiniInline, cmmMachOpFold, cmmLoopifyForC, @@ -30,10 +31,69 @@ import UniqFM import Unique import FastTypes import Outputable +import BlockId import Data.Bits import Data.Word import Data.Int +import Data.Maybe +import Data.List + +import Compiler.Hoopl hiding (Unique) + +-- ----------------------------------------------------------------------------- +-- Eliminates dead blocks + +{- +We repeatedly expand the set of reachable blocks until we hit a +fixpoint, and then prune any blocks that were not in this set. This is +actually a required optimization, as dead blocks can cause problems +for invariants in the linear register allocator (and possibly other +places.) +-} + +-- Deep fold over statements could probably be abstracted out, but it +-- might not be worth the effort since OldCmm is moribund +cmmEliminateDeadBlocks :: [CmmBasicBlock] -> [CmmBasicBlock] +cmmEliminateDeadBlocks [] = [] +cmmEliminateDeadBlocks blocks@(BasicBlock base_id _:_) = + let -- Calculate what's reachable from what block + reachableMap = foldl' f emptyUFM blocks -- lazy in values + where f m (BasicBlock block_id stmts) = addToUFM m block_id (reachableFrom stmts) + reachableFrom stmts = foldl stmt [] stmts + where + stmt m CmmNop = m + stmt m (CmmComment _) = m + stmt m (CmmAssign _ e) = expr m e + stmt m (CmmStore e1 e2) = expr (expr m e1) e2 + stmt m (CmmCall c _ as _ _) = f (actuals m as) c + where f m (CmmCallee e _) = expr m e + f m (CmmPrim _) = m + stmt m (CmmBranch b) = b:m + stmt m (CmmCondBranch e b) = b:(expr m e) + stmt m (CmmSwitch e bs) = catMaybes bs ++ expr m e + stmt m (CmmJump e as) = expr (actuals m as) e + stmt m (CmmReturn as) = actuals m as + actuals m as = foldl' (\m h -> expr m (hintlessCmm h)) m as + -- We have to do a deep fold into CmmExpr because + -- there may be a BlockId in the CmmBlock literal. + expr m (CmmLit l) = lit m l + expr m (CmmLoad e _) = expr m e + expr m (CmmReg _) = m + expr m (CmmMachOp _ es) = foldl' expr m es + expr m (CmmStackSlot _ _) = m + expr m (CmmRegOff _ _) = m + lit m (CmmBlock b) = b:m + lit m _ = m + -- go todo done + reachable = go [base_id] (setEmpty :: BlockSet) + where go [] m = m + go (x:xs) m + | setMember x m = go xs m + | otherwise = go (add ++ xs) (setInsert x m) + where add = fromMaybe (panic "cmmEliminateDeadBlocks: unknown block") + (lookupUFM reachableMap x) + in filter (\(BasicBlock block_id _) -> setMember block_id reachable) blocks -- ----------------------------------------------------------------------------- -- The mini-inliner @@ -115,12 +175,15 @@ cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts cmmMiniInlineStmts uses (stmt:stmts) = stmt : cmmMiniInlineStmts uses stmts -lookForInline u expr (stmt : rest) +lookForInline u expr stmts = lookForInline' u expr regset stmts + where regset = foldRegsUsed extendRegSet emptyRegSet expr + +lookForInline' u expr regset (stmt : rest) | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline = Just (inlineStmt u expr stmt : rest) | ok_to_skip - = case lookForInline u expr rest of + = case lookForInline' u expr regset rest of Nothing -> Nothing Just stmts -> Just (stmt:stmts) @@ -137,13 +200,18 @@ lookForInline u expr (stmt : rest) CmmCall{} -> hasNoGlobalRegs expr _ -> True - -- We can skip over assignments to other tempoararies, because we - -- know that expressions aren't side-effecting and temporaries are - -- single-assignment. + -- Expressions aren't side-effecting. Temporaries may or may not + -- be single-assignment depending on the source (the old code + -- generator creates single-assignment code, but hand-written Cmm + -- and Cmm from the new code generator is not single-assignment.) + -- So we do an extra check to make sure that the register being + -- changed is not one we were relying on. I don't know how much of a + -- performance hit this is (we have to create a regset for every + -- instruction.) -- EZY ok_to_skip = case stmt of CmmNop -> True CmmComment{} -> True - CmmAssign (CmmLocal (LocalReg u' _)) rhs | u' /= u -> True + CmmAssign (CmmLocal r@(LocalReg u' _)) rhs | u' /= u && not (r `elemRegSet` regset) -> True CmmAssign g@(CmmGlobal _) rhs -> not (g `regUsedIn` expr) _other -> False diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 8c2498e5f8..4dc7e3214f 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -396,13 +396,15 @@ stmt :: { ExtCode } | NAME '(' exprs0 ')' ';' {% stmtMacro $1 $3 } | 'switch' maybe_range expr '{' arms default '}' - { doSwitch $2 $3 $5 $6 } + { do as <- sequence $5; doSwitch $2 $3 as $6 } | 'goto' NAME ';' { do l <- lookupLabel $2; stmtEC (CmmBranch l) } | 'jump' expr maybe_actuals ';' { do e1 <- $2; e2 <- sequence $3; stmtEC (CmmJump e1 e2) } | 'return' maybe_actuals ';' { do e <- sequence $2; stmtEC (CmmReturn e) } + | 'if' bool_expr 'goto' NAME + { do l <- lookupLabel $4; cmmRawIf $2 l } | 'if' bool_expr '{' body '}' else { cmmIfThenElse $2 $4 $6 } @@ -441,12 +443,16 @@ maybe_range :: { Maybe (Int,Int) } : '[' INT '..' INT ']' { Just (fromIntegral $2, fromIntegral $4) } | {- empty -} { Nothing } -arms :: { [([Int],ExtCode)] } +arms :: { [ExtFCode ([Int],Either BlockId ExtCode)] } : {- empty -} { [] } | arm arms { $1 : $2 } -arm :: { ([Int],ExtCode) } - : 'case' ints ':' '{' body '}' { ($2, $5) } +arm :: { ExtFCode ([Int],Either BlockId ExtCode) } + : 'case' ints ':' arm_body { do b <- $4; return ($2, b) } + +arm_body :: { ExtFCode (Either BlockId ExtCode) } + : '{' body '}' { return (Right $2) } + | 'goto' NAME ';' { do l <- lookupLabel $2; return (Left l) } ints :: { [Int] } : INT { [ fromIntegral $1 ] } @@ -458,6 +464,8 @@ default :: { Maybe ExtCode } -- 'default' branches | {- empty -} { Nothing } +-- Note: OldCmm doesn't support a first class 'else' statement, though +-- CmmNode does. else :: { ExtCode } : {- empty -} { nopEC } | 'else' '{' body '}' { $3 } @@ -952,6 +960,10 @@ cmmIfThenElse cond then_part else_part = do -- fall through to join code (labelC join_id) +cmmRawIf cond then_id = do + c <- cond + emitCond c then_id + -- 'emitCond cond true_id' emits code to test whether the cond is true, -- branching to true_id if so, and falling through otherwise. emitCond (BoolTest e) then_id = do @@ -991,7 +1003,7 @@ emitCond (e1 `BoolAnd` e2) then_id = do -- optional range on the switch (eg. switch [0..7] {...}), or by -- the minimum/maximum values from the branches. -doSwitch :: Maybe (Int,Int) -> ExtFCode CmmExpr -> [([Int],ExtCode)] +doSwitch :: Maybe (Int,Int) -> ExtFCode CmmExpr -> [([Int],Either BlockId ExtCode)] -> Maybe ExtCode -> ExtCode doSwitch mb_range scrut arms deflt = do @@ -1018,12 +1030,12 @@ doSwitch mb_range scrut arms deflt -- ToDo: check for out of range and jump to default if necessary stmtEC (CmmSwitch expr entries) where - emitArm :: ([Int],ExtCode) -> ExtFCode [(Int,BlockId)] - emitArm (ints,code) = do + emitArm :: ([Int],Either BlockId ExtCode) -> ExtFCode [(Int,BlockId)] + emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ] + emitArm (ints,Right code) = do blockid <- forkLabelledCodeEC code return [ (i,blockid) | i <- ints ] - -- ----------------------------------------------------------------------------- -- Putting it all together diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index d0d54d909d..fbe979b9ab 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -378,6 +378,8 @@ add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty) -- 4. build info tables for the procedures -- and update the info table for -- the SRTs in the entry procedure as well. -- Input invariant: A block should only be reachable from a single ProcPoint. +-- ToDo: use the _ret naming convention that the old code generator +-- used. -- EZY splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status -> CmmTop -> FuelUniqSM [CmmTop] splitAtProcPoints entry_label callPPs procPoints procMap diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs index 4e2dd38fd3..17364ad052 100644 --- a/compiler/cmm/CmmSpillReload.hs +++ b/compiler/cmm/CmmSpillReload.hs @@ -100,11 +100,11 @@ dualLiveTransfers entry procPoints = mkBTransfer3 first middle last where check live id x = if id == entry then noLiveOnEntry id (in_regs live) x else x middle :: CmmNode O O -> DualLive -> DualLive - middle m live = changeStack updSlots $ changeRegs (xferLiveMiddle m) (changeRegs regs_in live) - where xferLiveMiddle = case getBTransfer3 xferLive of (_, middle, _) -> middle - regs_in :: RegSet -> RegSet - regs_in live = case m of CmmUnsafeForeignCall {} -> emptyRegSet - _ -> live + middle m = changeStack updSlots + . changeRegs updRegs + where -- Reuse middle of liveness analysis from CmmLive + updRegs = case getBTransfer3 xferLive of (_, middle, _) -> middle m + updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r spill live _ = live diff --git a/compiler/cmm/CmmStackLayout.hs b/compiler/cmm/CmmStackLayout.hs index 01543c444e..c0fb6af037 100644 --- a/compiler/cmm/CmmStackLayout.hs +++ b/compiler/cmm/CmmStackLayout.hs @@ -13,7 +13,7 @@ module CmmStackLayout ( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs - , layout, manifestSP, igraph, areaBuilder + , getSpEntryMap, layout, manifestSP, igraph, areaBuilder , stubSlotsOnDeath ) -- to help crash early during debugging where @@ -195,7 +195,7 @@ liveLastOut env l = type Set x = Map x () data IGraphBuilder n = Builder { foldNodes :: forall z. SubArea -> (n -> z -> z) -> z -> z - , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int] + , _wordsOccupied :: AreaSizeMap -> AreaMap -> n -> [Int] } areaBuilder :: IGraphBuilder Area @@ -242,10 +242,13 @@ igraph builder env g = foldr interfere Map.empty (postorderDfs g) -- what's the highest offset (in bytes) used in each Area? -- We'll need to allocate that much space for each Area. +-- Mapping of areas to area sizes (not offsets!) +type AreaSizeMap = AreaMap + -- JD: WHY CAN'T THIS COME FROM THE slot-liveness info? -getAreaSize :: ByteOff -> CmmGraph -> AreaMap +getAreaSize :: ByteOff -> CmmGraph -> AreaSizeMap -- The domain of the returned mapping consists only of Areas - -- used for (a) variable spill slots, and (b) parameter passing ares for calls + -- used for (a) variable spill slots, and (b) parameter passing areas for calls getAreaSize entry_off g = foldGraphBlocks (foldBlockNodesF3 (first, add_regslots, last)) (Map.singleton (CallArea Old) entry_off) g @@ -266,10 +269,11 @@ getAreaSize entry_off g = -- The 'max' is important. Two calls, to f and g, might share a common -- continuation (and hence a common CallArea), but their number of overflow -- parameters might differ. + -- EZY: Ought to use insert with combining function... -- Find the Stack slots occupied by the subarea's conflicts -conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int +conflictSlots :: Ord x => IGPair x -> AreaSizeMap -> AreaMap -> SubArea -> Set Int conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea = foldNodes subarea foldNode Map.empty where foldNode n set = Map.foldRightWithKey conflict set $ Map.findWithDefault Map.empty n ig @@ -278,10 +282,10 @@ conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea = liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n) setAdd w s = Map.insert w () s --- Find any open space on the stack, starting from the offset. --- If the area is a CallArea or a spill slot for a pointer, then it must --- be word-aligned. -freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int +-- Find any open space for 'area' on the stack, starting from the +-- 'offset'. If the area is a CallArea or a spill slot for a pointer, +-- then it must be word-aligned. +freeSlotFrom :: Ord x => IGPair x -> AreaSizeMap -> Int -> AreaMap -> Area -> Int freeSlotFrom ig areaSize offset areaMap area = let size = Map.lookup area areaSize `orElse` 0 conflicts = conflictSlots ig areaSize areaMap (area, size, size) @@ -299,11 +303,24 @@ freeSlotFrom ig areaSize offset areaMap area = in findSpace (align (offset + size)) size -- Find an open space on the stack, and assign it to the area. -allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap +allocSlotFrom :: Ord x => IGPair x -> AreaSizeMap -> Int -> AreaMap -> Area -> AreaMap allocSlotFrom ig areaSize from areaMap area = if Map.member area areaMap then areaMap else Map.insert area (freeSlotFrom ig areaSize from areaMap area) areaMap +-- Figure out all of the offsets from the slot location; this will be +-- non-zero for procpoints. +type SpEntryMap = BlockEnv Int +getSpEntryMap :: Int -> CmmGraph -> SpEntryMap +getSpEntryMap entry_off g@(CmmGraph {g_entry = entry}) + = foldGraphBlocks add_sp_off (mapInsert entry entry_off emptyBlockMap) g + where add_sp_off :: CmmBlock -> BlockEnv Int -> BlockEnv Int + add_sp_off b env = + case lastNode b of + CmmCall {cml_cont=Just succ, cml_ret_args=off} -> mapInsert succ off env + CmmForeignCall {succ=succ} -> mapInsert succ wORD_SIZE env + _ -> env + -- | Greedy stack layout. -- Compute liveness, build the interference graph, and allocate slots for the areas. -- We visit each basic block in a (generally) forward order. @@ -326,12 +343,16 @@ allocSlotFrom ig areaSize from areaMap area = -- Note: The stack pointer only has to be younger than the youngest live stack slot -- at proc points. Otherwise, the stack pointer can point anywhere. -layout :: ProcPointSet -> SlotEnv -> ByteOff -> CmmGraph -> AreaMap +layout :: ProcPointSet -> SpEntryMap -> SlotEnv -> ByteOff -> CmmGraph -> AreaMap -- The domain of the returned map includes an Area for EVERY block -- including each block that is not the successor of a call (ie is not a proc-point) --- That's how we return the info of what the SP should be at the entry of every block +-- That's how we return the info of what the SP should be at the entry of every non +-- procpoint block. However, note that procpoint blocks have their +-- /slot/ stored, which is not necessarily the value of the SP on entry +-- to the block (in fact, it probably isn't, due to argument passing). +-- See [Procpoint Sp offset] -layout procPoints env entry_off g = +layout procPoints spEntryMap env entry_off g = let ig = (igraph areaBuilder env g, areaBuilder) env' bid = mapLookup bid env `orElse` panic "unknown blockId in igraph" areaSize = getAreaSize entry_off g @@ -370,21 +391,87 @@ layout procPoints env entry_off g = allocMid m areaMap = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap m) m allocLast bid l areaMap = foldr (setSuccSPs inSp) areaMap' (successors l) - where inSp = expectJust "sp in" $ Map.lookup (CallArea (Young bid)) areaMap + where inSp = slot + spOffset -- [Procpoint Sp offset] + -- If it's not in the map, we should use our previous + -- calculation unchanged. + spOffset = mapLookup bid spEntryMap `orElse` 0 + slot = expectJust "slot in" $ Map.lookup (CallArea (Young bid)) areaMap areaMap' = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap l) l alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a alloc' areaMap _ = areaMap - initMap = Map.insert (CallArea (Young (g_entry g))) 0 $ - Map.insert (CallArea Old) 0 Map.empty - + initMap = Map.insert (CallArea (Young (g_entry g))) 0 + . Map.insert (CallArea Old) 0 + $ Map.empty + areaMap = foldl layoutAreas initMap (postorderDfs g) in -- pprTrace "ProcPoints" (ppr procPoints) $ - -- pprTrace "Area SizeMap" (ppr areaSize) $ - -- pprTrace "Entry SP" (ppr entrySp) $ - -- pprTrace "Area Map" (ppr areaMap) $ + -- pprTrace "Area SizeMap" (ppr areaSize) $ + -- pprTrace "Entry offset" (ppr entry_off) $ + -- pprTrace "Area Map" (ppr areaMap) $ areaMap +{- Note [Procpoint Sp offset] + +The calculation of inSp is a little tricky. (Un)fortunately, if you get +it wrong, you will get inefficient but correct code. You know you've +got it wrong if the generated stack pointer bounces up and down for no +good reason. + +Why can't we just set inSp to the location of the slot? (This is what +the code used to do.) The trouble is when we actually hit the proc +point the start of the slot will not be the same as the actual Sp due +to argument passing: + + a: + I32[(young<b> + 4)] = cde; + // Stack pointer is moved to young end (bottom) of young<b> for call + // +-------+ + // | arg 1 | + // +-------+ <- Sp + call (I32[foobar::I32])(...) returns to Just b (4) (4) with update frame 4; + b: + // After call, stack pointer is above the old end (top) of + // young<b> (the difference is spOffset) + // +-------+ <- Sp + // | arg 1 | + // +-------+ + +If we blithely set the Sp to be the same as the slot (the young end of +young<b>), an adjustment will be necessary when we go to the next block. +This is wasteful. So, instead, for the next block after a procpoint, +the actual Sp should be set to the same as the true Sp when we just +entered the procpoint. Then manifestSP will automatically do the right +thing. + +Questions you may ask: + +1. Why don't we need to change the mapping for the procpoint itself? + Because manifestSP does its own calculation of the true stack value, + manifestSP will notice the discrepancy between the actual stack + pointer and the slot start, and adjust all of its memory accesses + accordingly. So the only problem is when we adjust the Sp in + preparation for the successor block; that's why this code is here and + not in setSuccSPs. + +2. Why don't we make the procpoint call area and the true offset match + up? If we did that, we would never use memory above the true value + of the stack pointer, thus wasting all of the stack we used to store + arguments. You might think that some clever changes to the slot + offsets, using negative offsets, might fix it, but this does not make + semantic sense. + +3. If manifestSP is already calculating the true stack value, why we can't + do this trick inside manifestSP itself? The reason is that if two + branches join with inconsistent SPs, one of them has to be fixed: we + can't know what the fix should be without already knowing what the + chosen location of SP is on the next successor. (This is + the "succ already knows incoming SP" case), This calculation cannot + be easily done in manifestSP, since it processes the nodes + /backwards/. So we need to have figured this out before we hit + manifestSP. +-} + -- After determining the stack layout, we can: -- 1. Replace references to stack Areas with addresses relative to the stack -- pointer. @@ -394,8 +481,8 @@ layout procPoints env entry_off g = -- stack pointer to be younger than the live values on the stack at proc points. -- 3. Compute the maximum stack offset used in the procedure and replace -- the stack high-water mark with that offset. -manifestSP :: AreaMap -> ByteOff -> CmmGraph -> FuelUniqSM CmmGraph -manifestSP areaMap entry_off g@(CmmGraph {g_entry=entry}) = +manifestSP :: SpEntryMap -> AreaMap -> ByteOff -> CmmGraph -> FuelUniqSM CmmGraph +manifestSP spEntryMap areaMap entry_off g@(CmmGraph {g_entry=entry}) = ofBlockMap entry `liftM` foldl replB (return mapEmpty) (postorderDfs g) where slot a = -- pprTrace "slot" (ppr a) $ Map.lookup a areaMap `orElse` panic "unallocated Area" @@ -404,13 +491,6 @@ manifestSP areaMap entry_off g@(CmmGraph {g_entry=entry}) = sp_high = maxSlot slot g proc_entry_sp = slot (CallArea Old) + entry_off - add_sp_off :: CmmBlock -> BlockEnv Int -> BlockEnv Int - add_sp_off b env = - case lastNode b of - CmmCall {cml_cont=Just succ, cml_ret_args=off} -> mapInsert succ off env - CmmForeignCall {succ=succ} -> mapInsert succ wORD_SIZE env - _ -> env - spEntryMap = foldGraphBlocks add_sp_off (mapInsert entry entry_off emptyBlockMap) g spOffset id = mapLookup id spEntryMap `orElse` 0 sp_on_entry id | id == entry = proc_entry_sp @@ -427,10 +507,26 @@ manifestSP areaMap entry_off g@(CmmGraph {g_entry=entry}) = where spIn = sp_on_entry (entryLabel block) middle spOff m = mapExpDeep (replSlot spOff) m + -- XXX there shouldn't be any global registers in the + -- CmmCall, so there shouldn't be any slots in + -- CmmCall... check that... last spOff l = mapExpDeep (replSlot spOff) l replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i)) replSlot _ (CmmLit CmmHighStackMark) = -- replacing the high water mark CmmLit (CmmInt (toInteger (max 0 (sp_high - proc_entry_sp))) (typeWidth bWord)) + -- Invariant: Sp is always greater than SpLim. Thus, if + -- the high water mark is zero, we can optimize away the + -- conditional branch. Relies on dead code elimination + -- to get rid of the dead GC blocks. + -- EZY: Maybe turn this into a guard that checks if a + -- statement is stack-check ish? Maybe we should make + -- an actual mach-op for it, so there's no chance of + -- mixing this up with something else... + replSlot _ (CmmMachOp (MO_U_Lt _) + [CmmMachOp (MO_Sub _) + [ CmmReg (CmmGlobal Sp) + , CmmLit (CmmInt 0 _)], + CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth) replSlot _ e = e replLast :: MaybeC C (CmmNode C O) -> [CmmNode O O] -> CmmNode O C -> FuelUniqSM [CmmBlock] diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 69b481b501..c9e422fb4e 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -24,7 +24,7 @@ module MkGraph , copyInOflow, copyInSlot, copyOutOflow, copyOutSlot -- Reexport of needed Cmm stuff , Convention(..), ForeignConvention(..), ForeignTarget(..) - , CmmStackInfo(..), CmmTopInfo(..), CmmGraph(..) + , CmmStackInfo(..), CmmTopInfo(..), CmmGraph, GenCmmGraph(..) , Cmm, CmmTop ) where diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs index 057a96521f..f624c1c7b6 100644 --- a/compiler/cmm/OptimizationFuel.hs +++ b/compiler/cmm/OptimizationFuel.hs @@ -6,12 +6,12 @@ -- the optimiser with varying amount of fuel to find out the exact number of -- steps where a bug is introduced in the output. module OptimizationFuel - ( OptimizationFuel, amountOfFuel, tankFilledTo, anyFuelLeft, oneLessFuel + ( OptimizationFuel, amountOfFuel, tankFilledTo, unlimitedFuel, anyFuelLeft, oneLessFuel , OptFuelState, initOptFuelState , FuelConsumer, FuelUsingMonad, FuelState , fuelGet, fuelSet, lastFuelPass, setFuelPass , fuelExhausted, fuelDec1, tryWithFuel - , runFuelIO, fuelConsumingPass + , runFuelIO, runInfiniteFuelIO, fuelConsumingPass , FuelUniqSM , liftUniq ) @@ -21,9 +21,7 @@ import Data.IORef import Control.Monad import StaticFlags (opt_Fuel) import UniqSupply -#ifdef DEBUG import Panic -#endif import Compiler.Hoopl import Compiler.Hoopl.GHC (getFuel, setFuel) @@ -51,8 +49,8 @@ amountOfFuel :: OptimizationFuel -> Int anyFuelLeft :: OptimizationFuel -> Bool oneLessFuel :: OptimizationFuel -> OptimizationFuel +unlimitedFuel :: OptimizationFuel -#ifdef DEBUG newtype OptimizationFuel = OptimizationFuel Int deriving Show @@ -61,16 +59,7 @@ amountOfFuel (OptimizationFuel f) = f anyFuelLeft (OptimizationFuel f) = f > 0 oneLessFuel (OptimizationFuel f) = ASSERT (f > 0) (OptimizationFuel (f - 1)) -#else --- type OptimizationFuel = State# () -- would like this, but it won't work -data OptimizationFuel = OptimizationFuel - deriving Show -tankFilledTo _ = OptimizationFuel -amountOfFuel _ = maxBound - -anyFuelLeft _ = True -oneLessFuel _ = OptimizationFuel -#endif +unlimitedFuel = OptimizationFuel infiniteFuel data FuelState = FuelState { fs_fuel :: OptimizationFuel, fs_lastpass :: String } newtype FuelUniqSM a = FUSM { unFUSM :: FuelState -> UniqSM (a, FuelState) } @@ -92,6 +81,16 @@ runFuelIO fs (FUSM f) = writeIORef (fuel_ref fs) fuel' return a +-- ToDo: Do we need the pass_ref when we are doing infinite fueld +-- transformations? +runInfiniteFuelIO :: OptFuelState -> FuelUniqSM a -> IO a +runInfiniteFuelIO fs (FUSM f) = + do pass <- readIORef (pass_ref fs) + u <- mkSplitUniqSupply 'u' + let (a, FuelState _ pass') = initUs_ u $ f (FuelState unlimitedFuel pass) + writeIORef (pass_ref fs) pass' + return a + instance Monad FuelUniqSM where FUSM f >>= k = FUSM (\s -> f s >>= \(a, s') -> unFUSM (k a) s') return a = FUSM (\s -> return (a, s)) diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 10c9f18310..aa7d914253 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -50,6 +50,7 @@ import Outputable import Constants import BasicTypes import CLabel +import Util -- The rest import Data.List @@ -63,10 +64,6 @@ import Data.Word import Data.Array.ST import Control.Monad.ST -#if x86_64_TARGET_ARCH -import StaticFlags ( opt_Unregisterised ) -#endif - #if defined(alpha_TARGET_ARCH) || defined(mips_TARGET_ARCH) || defined(mipsel_TARGET_ARCH) || defined(arm_TARGET_ARCH) #define BEWARE_LOAD_STORE_ALIGNMENT #endif @@ -104,18 +101,19 @@ pprTop (CmmProc info clbl (ListGraph blocks)) = then pprDataExterns info $$ pprWordArray (entryLblToInfoLbl clbl) info else empty) $$ - (case blocks of - [] -> empty - -- the first block doesn't get a label: - (BasicBlock _ stmts : rest) -> vcat [ + (vcat [ blankLine, extern_decls, (if (externallyVisibleCLabel clbl) then mkFN_ else mkIF_) (pprCLabel clbl) <+> lbrace, nest 8 temp_decls, nest 8 mkFB_, - nest 8 (vcat (map pprStmt stmts)) $$ - vcat (map pprBBlock rest), + case blocks of + [] -> empty + -- the first block doesn't get a label: + (BasicBlock _ stmts : rest) -> + nest 8 (vcat (map pprStmt stmts)) $$ + vcat (map pprBBlock rest), nest 8 mkFE_, rbrace ] ) @@ -818,17 +816,6 @@ pprCall ppr_fn cconv results args _ | otherwise = -#if x86_64_TARGET_ARCH - -- HACK around gcc optimisations. - -- x86_64 needs a __DISCARD__() here, to create a barrier between - -- putting the arguments into temporaries and passing the arguments - -- to the callee, because the argument expressions may refer to - -- machine registers that are also used for passing arguments in the - -- C calling convention. - (if (not opt_Unregisterised) - then ptext (sLit "__DISCARD__();") - else empty) $$ -#endif ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi where ppr_assign [] rhs = rhs @@ -1022,18 +1009,6 @@ machRep_S_CType _ = panic "machRep_S_CType" pprStringInCStyle :: [Word8] -> SDoc pprStringInCStyle s = doubleQuotes (text (concatMap charToC s)) -charToC :: Word8 -> String -charToC w = - case chr (fromIntegral w) of - '\"' -> "\\\"" - '\'' -> "\\\'" - '\\' -> "\\\\" - c | c >= ' ' && c <= '~' -> [c] - | otherwise -> ['\\', - chr (ord '0' + ord c `div` 64), - chr (ord '0' + ord c `div` 8 `mod` 8), - chr (ord '0' + ord c `mod` 8)] - -- --------------------------------------------------------------------------- -- Initialising static objects with floating-point numbers. We can't -- just emit the floating point number, because C will cast it to an int diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes index 0852711f96..e787f18b17 100644 --- a/compiler/cmm/cmm-notes +++ b/compiler/cmm/cmm-notes @@ -15,14 +15,11 @@ Things to do: This will fix the spill before stack check problem but only really as a side
effect. A 'real fix' probably requires making the spiller know about sp checks.
- - There is some silly stuff happening with the Sp. We end up with code like:
- Sp = Sp + 8; R1 = _vwf::I64; Sp = Sp -8
- Seems to be perhaps caused by the issue above but also maybe a optimisation
- pass needed?
+ EZY: I don't understand this comment. David Terei, can you clarify?
- - Proc pass all arguments on the stack, adding more code and slowing down things
- a lot. We either need to fix this or even better would be to get rid of
- proc points.
+ - Proc points pass all arguments on the stack, adding more code and
+ slowing down things a lot. We either need to fix this or even better
+ would be to get rid of proc points.
- CmmInfo.cmmToRawCmm uses Old.Cmm, so it is called after converting Cmm.Cmm to
Old.Cmm. We should abstract it to work on both representations, it needs only to
@@ -32,7 +29,7 @@ Things to do: we could convert codeGen/StgCmm* clients to the Hoopl's semantics?
It's all deeply unsatisfactory.
- - Improve preformance of Hoopl.
+ - Improve performance of Hoopl.
A nofib comparison of -fasm vs -fnewcodegen nofib compilation parameters
(using the same ghc-cmm branch +libraries compiled by the old codegenerator)
@@ -50,6 +47,9 @@ Things to do: So we generate a bit better code, but it takes us longer!
+ EZY: Also importantly, Hoopl uses dramatically more memory than the
+ old code generator.
+
- Are all blockToNodeList and blockOfNodeList really needed? Maybe we could
splice blocks instead?
@@ -57,7 +57,7 @@ Things to do: a block catenation function would be probably nicer than blockToNodeList
/ blockOfNodeList combo.
- - loweSafeForeignCall seems too lowlevel. Just use Dataflow. After that
+ - lowerSafeForeignCall seems too lowlevel. Just use Dataflow. After that
delete splitEntrySeq from HooplUtils.
- manifestSP seems to touch a lot of the graph representation. It is
@@ -76,6 +76,9 @@ Things to do: calling convention, and the code for calling foreign calls is generated
- AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
+ EZY (2011-04-16): The mini-inliner has been generalized and ported,
+ but the constant folding and other optimizations need to still be
+ ported.
- AsmCodeGen has post-native-cg branch eliminator (shortCutBranches);
we ultimately want to share this with the Cmm branch eliminator.
@@ -113,7 +116,7 @@ Things to do: - See "CAFs" below; we want to totally refactor the way SRTs are calculated
- Pull out Areas into its own module
- Parameterise AreaMap
+ Parameterise AreaMap (note there are type synonyms in CmmStackLayout!)
Add ByteWidth = Int
type SubArea = (Area, ByteOff, ByteWidth)
ByteOff should not be defined in SMRep -- that is too high up the hierarchy
@@ -293,8 +296,8 @@ cpsTop: insert spills/reloads across
LastCalls, and
Branches to proc-points
- Now sink those reloads:
- - CmmSpillReload.insertLateReloads
+ Now sink those reloads (and other instructions):
+ - CmmSpillReload.rewriteAssignments
- CmmSpillReload.removeDeadAssignmentsAndReloads
* CmmStackLayout.stubSlotsOnDeath
@@ -344,7 +347,7 @@ to J that way. This is an awkward choice. (We think that we currently never pass variables to join points via arguments.)
Furthermore, there is *no way* to pass q to J in a register (other
-than a paramter register).
+than a parameter register).
What we want is to do register allocation across the whole caboodle.
Then we could drop all the code that deals with the above awkward
|