diff options
author | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
---|---|---|
committer | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
commit | 84c2ad99582391005b5e873198b15e9e9eb4f78d (patch) | |
tree | caa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/cmm | |
parent | 8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff) | |
parent | e68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff) | |
download | haskell-wip/T13904.tar.gz |
update to current master againwip/T13904
Diffstat (limited to 'compiler/cmm')
40 files changed, 2134 insertions, 1165 deletions
diff --git a/compiler/cmm/Bitmap.hs b/compiler/cmm/Bitmap.hs index a5cff38a98..e6ac15f4a8 100644 --- a/compiler/cmm/Bitmap.hs +++ b/compiler/cmm/Bitmap.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, BangPatterns #-} +{-# LANGUAGE BangPatterns #-} -- -- (c) The University of Glasgow 2003-2006 @@ -15,8 +15,7 @@ module Bitmap ( seqBitmap, ) where -#include "HsVersions.h" -#include "../includes/MachDeps.h" +import GhcPrelude import SMRep import DynFlags diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs index 8f11ad194b..4f4e0e8c53 100644 --- a/compiler/cmm/BlockId.hs +++ b/compiler/cmm/BlockId.hs @@ -8,14 +8,15 @@ module BlockId , blockLbl, infoTblLbl ) where +import GhcPrelude + import CLabel import IdInfo import Name import Unique import UniqSupply -import Hoopl.Label (Label, uniqueToLbl) -import Hoopl.Unique (intToUnique) +import Hoopl.Label (Label, mkHooplLabel) ---------------------------------------------------------------- --- Block Ids, their environments, and their sets @@ -32,13 +33,14 @@ compilation unit in which it appears. type BlockId = Label mkBlockId :: Unique -> BlockId -mkBlockId unique = uniqueToLbl $ intToUnique $ getKey unique +mkBlockId unique = mkHooplLabel $ getKey unique newBlockId :: MonadUnique m => m BlockId newBlockId = mkBlockId <$> getUniqueM blockLbl :: BlockId -> CLabel -blockLbl label = mkEntryLabel (mkFCallName (getUnique label) "block") NoCafRefs +blockLbl label = mkLocalBlockLabel (getUnique label) infoTblLbl :: BlockId -> CLabel -infoTblLbl label = mkInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs +infoTblLbl label + = mkBlockInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs diff --git a/compiler/cmm/BlockId.hs-boot b/compiler/cmm/BlockId.hs-boot new file mode 100644 index 0000000000..3ad4141184 --- /dev/null +++ b/compiler/cmm/BlockId.hs-boot @@ -0,0 +1,8 @@ +module BlockId (BlockId, mkBlockId) where + +import Hoopl.Label (Label) +import Unique (Unique) + +type BlockId = Label + +mkBlockId :: Unique -> BlockId diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 62c8037e9c..12c3357e47 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -15,30 +15,22 @@ module CLabel ( mkClosureLabel, mkSRTLabel, - mkTopSRTLabel, mkInfoTableLabel, mkEntryLabel, - mkSlowEntryLabel, - mkConEntryLabel, mkRednCountsLabel, mkConInfoTableLabel, - mkLargeSRTLabel, mkApEntryLabel, mkApInfoTableLabel, mkClosureTableLabel, mkBytesLabel, + mkLocalBlockLabel, mkLocalClosureLabel, mkLocalInfoTableLabel, - mkLocalEntryLabel, - mkLocalConEntryLabel, - mkLocalConInfoTableLabel, mkLocalClosureTableLabel, - mkReturnPtLabel, - mkReturnInfoLabel, - mkAltLabel, - mkDefaultLabel, + mkBlockInfoTableLabel, + mkBitmapLabel, mkStringLitLabel, @@ -53,18 +45,18 @@ module CLabel ( mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel, - mkMAP_FROZEN_infoLabel, - mkMAP_FROZEN0_infoLabel, + mkMAP_FROZEN_CLEAN_infoLabel, + mkMAP_FROZEN_DIRTY_infoLabel, mkMAP_DIRTY_infoLabel, - mkSMAP_FROZEN_infoLabel, - mkSMAP_FROZEN0_infoLabel, + mkSMAP_FROZEN_CLEAN_infoLabel, + mkSMAP_FROZEN_DIRTY_infoLabel, mkSMAP_DIRTY_infoLabel, - mkEMPTY_MVAR_infoLabel, + mkBadAlignmentLabel, mkArrWords_infoLabel, + mkSRTInfoLabel, mkTopTickyCtrLabel, mkCAFBlackHoleInfoTableLabel, - mkCAFBlackHoleEntryLabel, mkRtsPrimOpLabel, mkRtsSlowFastTickyCtrLabel, @@ -102,21 +94,28 @@ module CLabel ( mkHpcTicksLabel, + -- * Predicates hasCAF, - needsCDecl, maybeAsmTemp, externallyVisibleCLabel, + needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel, isMathFun, isCFunctionLabel, isGcPtrLabel, labelDynamic, + isLocalCLabel, -- * Conversions - toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, toRednCountsLbl, hasHaskellName, + toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName, - pprCLabel + pprCLabel, + isInfoTableLabel, + isConInfoTableLabel ) where #include "HsVersions.h" +import GhcPrelude + import IdInfo import BasicTypes +import {-# SOURCE #-} BlockId (BlockId, mkBlockId) import Packages import Module import Name @@ -135,8 +134,8 @@ import PprCore ( {- instances -} ) -- ----------------------------------------------------------------------------- -- The CLabel type -{- - | CLabel is an abstract type that supports the following operations: +{- | + 'CLabel' is an abstract type that supports the following operations: - Pretty printing @@ -155,6 +154,25 @@ import PprCore ( {- instances -} ) more than one declaration for any given label). - Converting an info table label into an entry label. + + CLabel usage is a bit messy in GHC as they are used in a number of different + contexts: + + - By the C-- AST to identify labels + + - By the unregisterised C code generator ("PprC") for naming functions (hence + the name 'CLabel') + + - By the native and LLVM code generators to identify labels + + For extra fun, each of these uses a slightly different subset of constructors + (e.g. 'AsmTempLabel' and 'AsmTempDerivedLabel' are used only in the NCG and + LLVM backends). + + In general, we use 'IdLabel' to represent Haskell things early in the + pipeline. However, later optimization passes will often represent blocks they + create with 'LocalBlockLabel' where there is no obvious 'Name' to hang off the + label. -} data CLabel @@ -177,6 +195,14 @@ data CLabel | RtsLabel RtsLabelInfo + -- | A label associated with a block. These aren't visible outside of the + -- compilation unit in which they are defined. These are generally used to + -- name blocks produced by Cmm-to-Cmm passes and the native code generator, + -- where we don't have a 'Name' to associate the label to and therefore can't + -- use 'IdLabel'. + | LocalBlockLabel + {-# UNPACK #-} !Unique + -- | A 'C' (or otherwise foreign) label. -- | ForeignLabel @@ -190,14 +216,13 @@ data CLabel FunctionOrData - -- | A family of labels related to a particular case expression. - | CaseLabel - {-# UNPACK #-} !Unique -- Unique says which case expression - CaseLabelInfo - + -- | Local temporary label used for native (or LLVM) code generation; must not + -- appear outside of these contexts. Use primarily for debug information | AsmTempLabel {-# UNPACK #-} !Unique + -- | A label \"derived\" from another 'CLabel' by the addition of a suffix. + -- Must not occur outside of the NCG or LLVM code generators. | AsmTempDerivedLabel CLabel FastString -- suffix @@ -229,10 +254,7 @@ data CLabel | HpcTicksLabel Module -- | Static reference table - | SRTLabel !Unique - - -- | Label of an StgLargeSRT - | LargeSRTLabel + | SRTLabel {-# UNPACK #-} !Unique -- | A bitmap (function or case return) @@ -256,14 +278,12 @@ instance Ord CLabel where compare b1 b2 `thenCmp` compare c1 c2 compare (RtsLabel a1) (RtsLabel a2) = compare a1 a2 + compare (LocalBlockLabel u1) (LocalBlockLabel u2) = nonDetCmpUnique u1 u2 compare (ForeignLabel a1 b1 c1 d1) (ForeignLabel a2 b2 c2 d2) = compare a1 a2 `thenCmp` compare b1 b2 `thenCmp` compare c1 c2 `thenCmp` compare d1 d2 - compare (CaseLabel u1 a1) (CaseLabel u2 a2) = - nonDetCmpUnique u1 u2 `thenCmp` - compare a1 a2 compare (AsmTempLabel u1) (AsmTempLabel u2) = nonDetCmpUnique u1 u2 compare (AsmTempDerivedLabel a1 b1) (AsmTempDerivedLabel a2 b2) = compare a1 a2 `thenCmp` @@ -284,8 +304,6 @@ instance Ord CLabel where compare a1 a2 compare (SRTLabel u1) (SRTLabel u2) = nonDetCmpUnique u1 u2 - compare (LargeSRTLabel u1) (LargeSRTLabel u2) = - nonDetCmpUnique u1 u2 compare (LargeBitmapLabel u1) (LargeBitmapLabel u2) = nonDetCmpUnique u1 u2 compare IdLabel{} _ = LT @@ -294,10 +312,10 @@ instance Ord CLabel where compare _ CmmLabel{} = GT compare RtsLabel{} _ = LT compare _ RtsLabel{} = GT + compare LocalBlockLabel{} _ = LT + compare _ LocalBlockLabel{} = GT compare ForeignLabel{} _ = LT compare _ ForeignLabel{} = GT - compare CaseLabel{} _ = LT - compare _ CaseLabel{} = GT compare AsmTempLabel{} _ = LT compare _ AsmTempLabel{} = GT compare AsmTempDerivedLabel{} _ = LT @@ -318,8 +336,6 @@ instance Ord CLabel where compare _ HpcTicksLabel{} = GT compare SRTLabel{} _ = LT compare _ SRTLabel{} = GT - compare LargeSRTLabel{} _ = LT - compare _ LargeSRTLabel{} = GT -- | Record where a foreign label is stored. data ForeignLabelSource @@ -350,7 +366,8 @@ data ForeignLabelSource pprDebugCLabel :: CLabel -> SDoc pprDebugCLabel lbl = case lbl of - IdLabel{} -> ppr lbl <> (parens $ text "IdLabel") + IdLabel _ _ info-> ppr lbl <> (parens $ text "IdLabel" + <> whenPprDebug (text ":" <> text (show info))) CmmLabel pkg _name _info -> ppr lbl <> (parens $ text "CmmLabel" <+> ppr pkg) @@ -367,9 +384,6 @@ pprDebugCLabel lbl data IdLabelInfo = Closure -- ^ Label for closure - | SRT -- ^ Static reference table (TODO: could be removed - -- with the old code generator, but might be needed - -- when we implement the New SRT Plan) | InfoTable -- ^ Info tables for closures; always read-only | Entry -- ^ Entry point | Slow -- ^ Slow entry point @@ -386,16 +400,11 @@ data IdLabelInfo | Bytes -- ^ Content of a string literal. See -- Note [Bytes label]. + | BlockInfoTable -- ^ Like LocalInfoTable but for a proc-point block + -- instead of a closure entry-point. + -- See Note [Proc-point local block entry-point]. - deriving (Eq, Ord) - - -data CaseLabelInfo - = CaseReturnPt - | CaseReturnInfo - | CaseAlt ConTag - | CaseDefault - deriving (Eq, Ord) + deriving (Eq, Ord, Show) data RtsLabelInfo @@ -443,73 +452,88 @@ data DynamicLinkerLabelInfo -- Constructing IdLabels -- These are always local: -mkSlowEntryLabel :: Name -> CafInfo -> CLabel -mkSlowEntryLabel name c = IdLabel name c Slow -mkTopSRTLabel :: Unique -> CLabel -mkTopSRTLabel u = SRTLabel u +mkSRTLabel :: Unique -> CLabel +mkSRTLabel u = SRTLabel u -mkSRTLabel :: Name -> CafInfo -> CLabel mkRednCountsLabel :: Name -> CLabel -mkSRTLabel name c = IdLabel name c SRT mkRednCountsLabel name = IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE] -- These have local & (possibly) external variants: mkLocalClosureLabel :: Name -> CafInfo -> CLabel mkLocalInfoTableLabel :: Name -> CafInfo -> CLabel -mkLocalEntryLabel :: Name -> CafInfo -> CLabel mkLocalClosureTableLabel :: Name -> CafInfo -> CLabel mkLocalClosureLabel name c = IdLabel name c Closure mkLocalInfoTableLabel name c = IdLabel name c LocalInfoTable -mkLocalEntryLabel name c = IdLabel name c LocalEntry mkLocalClosureTableLabel name c = IdLabel name c ClosureTable mkClosureLabel :: Name -> CafInfo -> CLabel mkInfoTableLabel :: Name -> CafInfo -> CLabel mkEntryLabel :: Name -> CafInfo -> CLabel mkClosureTableLabel :: Name -> CafInfo -> CLabel -mkLocalConInfoTableLabel :: CafInfo -> Name -> CLabel -mkLocalConEntryLabel :: CafInfo -> Name -> CLabel mkConInfoTableLabel :: Name -> CafInfo -> CLabel mkBytesLabel :: Name -> CLabel mkClosureLabel name c = IdLabel name c Closure mkInfoTableLabel name c = IdLabel name c InfoTable mkEntryLabel name c = IdLabel name c Entry mkClosureTableLabel name c = IdLabel name c ClosureTable -mkLocalConInfoTableLabel c con = IdLabel con c ConInfoTable -mkLocalConEntryLabel c con = IdLabel con c ConEntry mkConInfoTableLabel name c = IdLabel name c ConInfoTable mkBytesLabel name = IdLabel name NoCafRefs Bytes -mkConEntryLabel :: Name -> CafInfo -> CLabel -mkConEntryLabel name c = IdLabel name c ConEntry +mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel +mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable + -- See Note [Proc-point local block entry-point]. -- Constructing Cmm Labels mkDirty_MUT_VAR_Label, mkSplitMarkerLabel, mkUpdInfoLabel, mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel, - mkMAP_FROZEN_infoLabel, mkMAP_FROZEN0_infoLabel, mkMAP_DIRTY_infoLabel, - mkEMPTY_MVAR_infoLabel, mkTopTickyCtrLabel, - mkCAFBlackHoleInfoTableLabel, mkCAFBlackHoleEntryLabel, - mkArrWords_infoLabel, mkSMAP_FROZEN_infoLabel, mkSMAP_FROZEN0_infoLabel, - mkSMAP_DIRTY_infoLabel :: CLabel + mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel, + mkMAP_DIRTY_infoLabel, + mkArrWords_infoLabel, + mkTopTickyCtrLabel, + mkCAFBlackHoleInfoTableLabel, + mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel, + mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction mkSplitMarkerLabel = CmmLabel rtsUnitId (fsLit "__stg_split_marker") CmmCode mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo mkMainCapabilityLabel = CmmLabel rtsUnitId (fsLit "MainCapability") CmmData -mkMAP_FROZEN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN") CmmInfo -mkMAP_FROZEN0_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN0") CmmInfo +mkMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo +mkMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo mkMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_DIRTY") CmmInfo -mkEMPTY_MVAR_infoLabel = CmmLabel rtsUnitId (fsLit "stg_EMPTY_MVAR") CmmInfo mkTopTickyCtrLabel = CmmLabel rtsUnitId (fsLit "top_ct") CmmData mkCAFBlackHoleInfoTableLabel = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE") CmmInfo -mkCAFBlackHoleEntryLabel = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE") CmmEntry mkArrWords_infoLabel = CmmLabel rtsUnitId (fsLit "stg_ARR_WORDS") CmmInfo -mkSMAP_FROZEN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN") CmmInfo -mkSMAP_FROZEN0_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN0") CmmInfo +mkSMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo +mkSMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo mkSMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo +mkBadAlignmentLabel = CmmLabel rtsUnitId (fsLit "stg_badAlignment") CmmEntry + +mkSRTInfoLabel :: Int -> CLabel +mkSRTInfoLabel n = CmmLabel rtsUnitId lbl CmmInfo + where + lbl = + case n of + 1 -> fsLit "stg_SRT_1" + 2 -> fsLit "stg_SRT_2" + 3 -> fsLit "stg_SRT_3" + 4 -> fsLit "stg_SRT_4" + 5 -> fsLit "stg_SRT_5" + 6 -> fsLit "stg_SRT_6" + 7 -> fsLit "stg_SRT_7" + 8 -> fsLit "stg_SRT_8" + 9 -> fsLit "stg_SRT_9" + 10 -> fsLit "stg_SRT_10" + 11 -> fsLit "stg_SRT_11" + 12 -> fsLit "stg_SRT_12" + 13 -> fsLit "stg_SRT_13" + 14 -> fsLit "stg_SRT_14" + 15 -> fsLit "stg_SRT_15" + 16 -> fsLit "stg_SRT_16" + _ -> panic "mkSRTInfoLabel" ----- mkCmmInfoLabel, mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel, @@ -524,6 +548,8 @@ mkCmmCodeLabel pkg str = CmmLabel pkg str CmmCode mkCmmDataLabel pkg str = CmmLabel pkg str CmmData mkCmmClosureLabel pkg str = CmmLabel pkg str CmmClosure +mkLocalBlockLabel :: Unique -> CLabel +mkLocalBlockLabel u = LocalBlockLabel u -- Constructing RtsLabels mkRtsPrimOpLabel :: PrimOp -> CLabel @@ -592,13 +618,24 @@ isSomeRODataLabel (IdLabel _ _ ClosureTable) = True isSomeRODataLabel (IdLabel _ _ ConInfoTable) = True isSomeRODataLabel (IdLabel _ _ InfoTable) = True isSomeRODataLabel (IdLabel _ _ LocalInfoTable) = True --- static reference tables defined in haskell (.hs) -isSomeRODataLabel (IdLabel _ _ SRT) = True -isSomeRODataLabel (SRTLabel _) = True +isSomeRODataLabel (IdLabel _ _ BlockInfoTable) = True -- info table defined in cmm (.cmm) isSomeRODataLabel (CmmLabel _ _ CmmInfo) = True isSomeRODataLabel _lbl = False +-- | Whether label is points to some kind of info table +isInfoTableLabel :: CLabel -> Bool +isInfoTableLabel (IdLabel _ _ InfoTable) = True +isInfoTableLabel (IdLabel _ _ LocalInfoTable) = True +isInfoTableLabel (IdLabel _ _ ConInfoTable) = True +isInfoTableLabel (IdLabel _ _ BlockInfoTable) = True +isInfoTableLabel _ = False + +-- | Whether label is points to constructor info table +isConInfoTableLabel :: CLabel -> Bool +isConInfoTableLabel (IdLabel _ _ ConInfoTable) = True +isConInfoTableLabel _ = False + -- | Get the label size field from a ForeignLabel foreignLabelStdcallInfo :: CLabel -> Maybe Int foreignLabelStdcallInfo (ForeignLabel _ info _ _) = info @@ -606,22 +643,9 @@ foreignLabelStdcallInfo _lbl = Nothing -- Constructing Large*Labels -mkLargeSRTLabel :: Unique -> CLabel mkBitmapLabel :: Unique -> CLabel -mkLargeSRTLabel uniq = LargeSRTLabel uniq mkBitmapLabel uniq = LargeBitmapLabel uniq - --- Constructin CaseLabels -mkReturnPtLabel :: Unique -> CLabel -mkReturnInfoLabel :: Unique -> CLabel -mkAltLabel :: Unique -> ConTag -> CLabel -mkDefaultLabel :: Unique -> CLabel -mkReturnPtLabel uniq = CaseLabel uniq CaseReturnPt -mkReturnInfoLabel uniq = CaseLabel uniq CaseReturnInfo -mkAltLabel uniq tag = CaseLabel uniq (CaseAlt tag) -mkDefaultLabel uniq = CaseLabel uniq CaseDefault - -- Constructing Cost Center Labels mkCCLabel :: CostCentre -> CLabel mkCCSLabel :: CostCentreStack -> CLabel @@ -682,31 +706,29 @@ toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure toClosureLbl l = pprPanic "toClosureLbl" (ppr l) toSlowEntryLbl :: CLabel -> CLabel +toSlowEntryLbl (IdLabel n _ BlockInfoTable) + = pprPanic "toSlowEntryLbl" (ppr n) toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l) toEntryLbl :: CLabel -> CLabel toEntryLbl (IdLabel n c LocalInfoTable) = IdLabel n c LocalEntry toEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry +toEntryLbl (IdLabel n _ BlockInfoTable) = mkLocalBlockLabel (nameUnique n) + -- See Note [Proc-point local block entry-point]. toEntryLbl (IdLabel n c _) = IdLabel n c Entry -toEntryLbl (CaseLabel n CaseReturnInfo) = CaseLabel n CaseReturnPt toEntryLbl (CmmLabel m str CmmInfo) = CmmLabel m str CmmEntry toEntryLbl (CmmLabel m str CmmRetInfo) = CmmLabel m str CmmRet toEntryLbl l = pprPanic "toEntryLbl" (ppr l) toInfoLbl :: CLabel -> CLabel -toInfoLbl (IdLabel n c Entry) = IdLabel n c InfoTable toInfoLbl (IdLabel n c LocalEntry) = IdLabel n c LocalInfoTable toInfoLbl (IdLabel n c ConEntry) = IdLabel n c ConInfoTable toInfoLbl (IdLabel n c _) = IdLabel n c InfoTable -toInfoLbl (CaseLabel n CaseReturnPt) = CaseLabel n CaseReturnInfo toInfoLbl (CmmLabel m str CmmEntry) = CmmLabel m str CmmInfo toInfoLbl (CmmLabel m str CmmRet) = CmmLabel m str CmmRetInfo toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l) -toRednCountsLbl :: CLabel -> Maybe CLabel -toRednCountsLbl = fmap mkRednCountsLabel . hasHaskellName - hasHaskellName :: CLabel -> Maybe Name hasHaskellName (IdLabel n _ _) = Just n hasHaskellName _ = Nothing @@ -747,10 +769,9 @@ needsCDecl :: CLabel -> Bool -- don't bother declaring Bitmap labels, we always make sure -- they are defined before use. needsCDecl (SRTLabel _) = True -needsCDecl (LargeSRTLabel _) = False needsCDecl (LargeBitmapLabel _) = False needsCDecl (IdLabel _ _ _) = True -needsCDecl (CaseLabel _ _) = True +needsCDecl (LocalBlockLabel _) = True needsCDecl (StringLitLabel _) = False needsCDecl (AsmTempLabel _) = False @@ -773,11 +794,11 @@ needsCDecl (DynamicLinkerLabel {}) = panic "needsCDecl DynamicLinkerLabel" needsCDecl PicBaseLabel = panic "needsCDecl PicBaseLabel" needsCDecl (DeadStripPreventer {}) = panic "needsCDecl DeadStripPreventer" --- | If a label is a local temporary used for native code generation --- then return just its unique, otherwise nothing. -maybeAsmTemp :: CLabel -> Maybe Unique -maybeAsmTemp (AsmTempLabel uq) = Just uq -maybeAsmTemp _ = Nothing +-- | If a label is a local block label then return just its 'BlockId', otherwise +-- 'Nothing'. +maybeLocalBlockLabel :: CLabel -> Maybe BlockId +maybeLocalBlockLabel (LocalBlockLabel uq) = Just $ mkBlockId uq +maybeLocalBlockLabel _ = Nothing -- | Check whether a label corresponds to a C function that has @@ -880,11 +901,11 @@ math_funs = mkUniqSet [ -- externally visible if it has to be declared as exported -- in the .o file's symbol table; that is, made non-static. externallyVisibleCLabel :: CLabel -> Bool -- not C "static" -externallyVisibleCLabel (CaseLabel _ _) = False externallyVisibleCLabel (StringLitLabel _) = False externallyVisibleCLabel (AsmTempLabel _) = False externallyVisibleCLabel (AsmTempDerivedLabel _ _)= False externallyVisibleCLabel (RtsLabel _) = True +externallyVisibleCLabel (LocalBlockLabel _) = False externallyVisibleCLabel (CmmLabel _ _ _) = True externallyVisibleCLabel (ForeignLabel{}) = True externallyVisibleCLabel (IdLabel name _ info) = isExternalName name && externallyVisibleIdLabel info @@ -894,14 +915,13 @@ externallyVisibleCLabel (DynamicLinkerLabel _ _) = False externallyVisibleCLabel (HpcTicksLabel _) = True externallyVisibleCLabel (LargeBitmapLabel _) = False externallyVisibleCLabel (SRTLabel _) = False -externallyVisibleCLabel (LargeSRTLabel _) = False externallyVisibleCLabel (PicBaseLabel {}) = panic "externallyVisibleCLabel PicBaseLabel" externallyVisibleCLabel (DeadStripPreventer {}) = panic "externallyVisibleCLabel DeadStripPreventer" externallyVisibleIdLabel :: IdLabelInfo -> Bool -externallyVisibleIdLabel SRT = False externallyVisibleIdLabel LocalInfoTable = False externallyVisibleIdLabel LocalEntry = False +externallyVisibleIdLabel BlockInfoTable = False externallyVisibleIdLabel _ = True -- ----------------------------------------------------------------------------- @@ -928,6 +948,7 @@ isGcPtrLabel lbl = case labelType lbl of -- | Work out the general type of data at the address of this label -- whether it be code, data, or static GC object. labelType :: CLabel -> CLabelType +labelType (IdLabel _ _ info) = idInfoLabelType info labelType (CmmLabel _ _ CmmData) = DataLabel labelType (CmmLabel _ _ CmmClosure) = GcPtrLabel labelType (CmmLabel _ _ CmmCode) = CodeLabel @@ -939,20 +960,28 @@ labelType (CmmLabel _ _ CmmRet) = CodeLabel labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsApFast _)) = CodeLabel -labelType (CaseLabel _ CaseReturnInfo) = DataLabel -labelType (CaseLabel _ _) = CodeLabel +labelType (RtsLabel _) = DataLabel +labelType (LocalBlockLabel _) = CodeLabel labelType (SRTLabel _) = DataLabel -labelType (LargeSRTLabel _) = DataLabel -labelType (LargeBitmapLabel _) = DataLabel labelType (ForeignLabel _ _ _ IsFunction) = CodeLabel -labelType (IdLabel _ _ info) = idInfoLabelType info -labelType _ = DataLabel +labelType (ForeignLabel _ _ _ IsData) = DataLabel +labelType (AsmTempLabel _) = panic "labelType(AsmTempLabel)" +labelType (AsmTempDerivedLabel _ _) = panic "labelType(AsmTempDerivedLabel)" +labelType (StringLitLabel _) = DataLabel +labelType (CC_Label _) = DataLabel +labelType (CCS_Label _) = DataLabel +labelType (DynamicLinkerLabel _ _) = DataLabel -- Is this right? +labelType PicBaseLabel = DataLabel +labelType (DeadStripPreventer _) = DataLabel +labelType (HpcTicksLabel _) = DataLabel +labelType (LargeBitmapLabel _) = DataLabel idInfoLabelType :: IdLabelInfo -> CLabelType idInfoLabelType info = case info of InfoTable -> DataLabel LocalInfoTable -> DataLabel + BlockInfoTable -> DataLabel Closure -> GcPtrLabel ConInfoTable -> DataLabel ClosureTable -> DataLabel @@ -962,28 +991,48 @@ idInfoLabelType info = -- ----------------------------------------------------------------------------- --- Does a CLabel need dynamic linkage? +-- | Is a 'CLabel' defined in the current module being compiled? +-- +-- Sometimes we can optimise references within a compilation unit in ways that +-- we couldn't for inter-module references. This provides a conservative +-- estimate of whether a 'CLabel' lives in the current module. +isLocalCLabel :: Module -> CLabel -> Bool +isLocalCLabel this_mod lbl = + case lbl of + IdLabel name _ _ + | isInternalName name -> True + | otherwise -> nameModule name == this_mod + LocalBlockLabel _ -> True + _ -> False + +-- ----------------------------------------------------------------------------- + +-- | Does a 'CLabel' need dynamic linkage? +-- -- When referring to data in code, we need to know whether -- that data resides in a DLL or not. [Win32 only.] -- @labelDynamic@ returns @True@ if the label is located -- in a DLL, be it a data reference or not. - labelDynamic :: DynFlags -> Module -> CLabel -> Bool labelDynamic dflags this_mod lbl = case lbl of -- is the RTS in a DLL or not? - RtsLabel _ -> (WayDyn `elem` ways dflags) && (this_pkg /= rtsUnitId) + RtsLabel _ -> + (gopt Opt_ExternalDynamicRefs dflags) && (this_pkg /= rtsUnitId) - IdLabel n _ _ -> isDllName dflags this_mod n + IdLabel n _ _ -> + isDllName dflags this_mod n -- When compiling in the "dyn" way, each package is to be linked into -- its own shared library. CmmLabel pkg _ _ | os == OSMinGW32 -> - (WayDyn `elem` ways dflags) && (this_pkg /= pkg) + (gopt Opt_ExternalDynamicRefs dflags) && (this_pkg /= pkg) | otherwise -> - True + gopt Opt_ExternalDynamicRefs dflags + + LocalBlockLabel _ -> False ForeignLabel _ _ source _ -> if os == OSMinGW32 @@ -999,14 +1048,15 @@ labelDynamic dflags this_mod lbl = -- When compiling in the "dyn" way, each package is to be -- linked into its own DLL. ForeignLabelInPackage pkgId -> - (WayDyn `elem` ways dflags) && (this_pkg /= pkgId) + (gopt Opt_ExternalDynamicRefs dflags) && (this_pkg /= pkgId) else -- On Mac OS X and on ELF platforms, false positives are OK, -- so we claim that all foreign imports come from dynamic -- libraries True - HpcTicksLabel m -> (WayDyn `elem` ways dflags) && this_mod /= m + HpcTicksLabel m -> + (gopt Opt_ExternalDynamicRefs dflags) && this_mod /= m -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves. _ -> False @@ -1028,7 +1078,6 @@ internal names. <type> is one of the following: info Info table srt Static reference table - srtd Static reference table descriptor entry Entry code (function, closure) slow Slow entry code (if any) ret Direct return address @@ -1082,6 +1131,18 @@ Note [Bytes label] ~~~~~~~~~~~~~~~~~~ For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which points to a static data block containing the content of the literal. + +Note [Proc-point local block entry-points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A label for a proc-point local block entry-point has no "_entry" suffix. With +`infoTblLbl` we derive an info table label from a proc-point block ID. If +we convert such an info table label into an entry label we must produce +the label without an "_entry" suffix. So an info table label records +the fact that it was derived from a block ID in `IdLabelInfo` as +`BlockInfoTable`. + +The info table label and the local block label are both local labels +and are not externally visible. -} instance Outputable CLabel where @@ -1089,19 +1150,19 @@ instance Outputable CLabel where pprCLabel :: Platform -> CLabel -> SDoc +pprCLabel _ (LocalBlockLabel u) + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u + pprCLabel platform (AsmTempLabel u) - | cGhcWithNativeCodeGen == "YES" - = getPprStyle $ \ sty -> - if asmStyle sty then - ptext (asmTempLabelPrefix platform) <> pprUniqueAlways u - else - char '_' <> pprUniqueAlways u + | not (platformUnregisterised platform) + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u pprCLabel platform (AsmTempDerivedLabel l suf) | cGhcWithNativeCodeGen == "YES" = ptext (asmTempLabelPrefix platform) - <> case l of AsmTempLabel u -> pprUniqueAlways u - _other -> pprCLabel platform l + <> case l of AsmTempLabel u -> pprUniqueAlways u + LocalBlockLabel u -> pprUniqueAlways u + _other -> pprCLabel platform l <> ftext suf pprCLabel platform (DynamicLinkerLabel info lbl) @@ -1114,7 +1175,15 @@ pprCLabel _ PicBaseLabel pprCLabel platform (DeadStripPreventer lbl) | cGhcWithNativeCodeGen == "YES" - = pprCLabel platform lbl <> text "_dsp" + = + {- + `lbl` can be temp one but we need to ensure that dsp label will stay + in the final binary so we prepend non-temp prefix ("dsp_") and + optional `_` (underscore) because this is how you mark non-temp symbols + on some platforms (Darwin) + -} + maybe_underscore $ text "dsp_" + <> pprCLabel platform lbl <> text "_dsp" pprCLabel _ (StringLitLabel u) | cGhcWithNativeCodeGen == "YES" @@ -1144,29 +1213,24 @@ pprCLbl :: CLabel -> SDoc pprCLbl (StringLitLabel u) = pprUniqueAlways u <> text "_str" -pprCLbl (CaseLabel u CaseReturnPt) - = hcat [pprUniqueAlways u, text "_ret"] -pprCLbl (CaseLabel u CaseReturnInfo) - = hcat [pprUniqueAlways u, text "_info"] -pprCLbl (CaseLabel u (CaseAlt tag)) - = hcat [pprUniqueAlways u, pp_cSEP, int tag, text "_alt"] -pprCLbl (CaseLabel u CaseDefault) - = hcat [pprUniqueAlways u, text "_dflt"] - pprCLbl (SRTLabel u) - = pprUniqueAlways u <> pp_cSEP <> text "srt" + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u <> pp_cSEP <> text "srt" -pprCLbl (LargeSRTLabel u) = pprUniqueAlways u <> pp_cSEP <> text "srtd" -pprCLbl (LargeBitmapLabel u) = text "b" <> pprUniqueAlways u <> pp_cSEP <> text "btm" +pprCLbl (LargeBitmapLabel u) = + tempLabelPrefixOrUnderscore + <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm" -- Some bitsmaps for tuple constructors have a numeric tag (e.g. '7') -- until that gets resolved we'll just force them to start --- with a letter so the label will be legal assmbly code. +-- with a letter so the label will be legal assembly code. pprCLbl (CmmLabel _ str CmmCode) = ftext str pprCLbl (CmmLabel _ str CmmData) = ftext str pprCLbl (CmmLabel _ str CmmPrimCall) = ftext str +pprCLbl (LocalBlockLabel u) = + tempLabelPrefixOrUnderscore <> text "blk_" <> pprUniqueAlways u + pprCLbl (RtsLabel (RtsApFast str)) = ftext str <> text "_fast" pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset)) @@ -1229,7 +1293,8 @@ pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat)) pprCLbl (ForeignLabel str _ _ _) = ftext str -pprCLbl (IdLabel name _cafs flavor) = ppr name <> ppIdFlavor flavor +pprCLbl (IdLabel name _cafs flavor) = + internalNamePrefix name <> ppr name <> ppIdFlavor flavor pprCLbl (CC_Label cc) = ppr cc pprCLbl (CCS_Label ccs) = ppr ccs @@ -1247,7 +1312,6 @@ ppIdFlavor :: IdLabelInfo -> SDoc ppIdFlavor x = pp_cSEP <> (case x of Closure -> text "closure" - SRT -> text "srt" InfoTable -> text "info" LocalInfoTable -> text "info" Entry -> text "entry" @@ -1258,6 +1322,7 @@ ppIdFlavor x = pp_cSEP <> ConInfoTable -> text "con_info" ClosureTable -> text "closure_tbl" Bytes -> text "bytes" + BlockInfoTable -> text "info" ) @@ -1272,6 +1337,24 @@ instance Outputable ForeignLabelSource where ForeignLabelInThisPackage -> parens $ text "this package" ForeignLabelInExternalPackage -> parens $ text "external package" +internalNamePrefix :: Name -> SDoc +internalNamePrefix name = getPprStyle $ \ sty -> + if codeStyle sty && isRandomGenerated then + sdocWithPlatform $ \platform -> + ptext (asmTempLabelPrefix platform) + else + empty + where + isRandomGenerated = not $ isExternalName name + +tempLabelPrefixOrUnderscore :: SDoc +tempLabelPrefixOrUnderscore = sdocWithPlatform $ \platform -> + getPprStyle $ \ sty -> + if asmStyle sty then + ptext (asmTempLabelPrefix platform) + else + char '_' + -- ----------------------------------------------------------------------------- -- Machine-dependent knowledge about labels. @@ -1285,53 +1368,62 @@ asmTempLabelPrefix platform = case platformOS platform of _ -> sLit ".L" pprDynamicLinkerAsmLabel :: Platform -> DynamicLinkerLabelInfo -> CLabel -> SDoc -pprDynamicLinkerAsmLabel platform dllInfo lbl - = if platformOS platform == OSDarwin - then if platformArch platform == ArchX86_64 - then case dllInfo of - CodeStub -> char 'L' <> ppr lbl <> text "$stub" - SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr" - GotSymbolPtr -> ppr lbl <> text "@GOTPCREL" - GotSymbolOffset -> ppr lbl - else case dllInfo of - CodeStub -> char 'L' <> ppr lbl <> text "$stub" - SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr" - _ -> panic "pprDynamicLinkerAsmLabel" - - else if platformOS platform == OSAIX - then case dllInfo of - SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention - _ -> panic "pprDynamicLinkerAsmLabel" - - else if osElfTarget (platformOS platform) - then if platformArch platform == ArchPPC - then case dllInfo of - CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] - ppr lbl <> text "+32768@plt" - SymbolPtr -> text ".LC_" <> ppr lbl - _ -> panic "pprDynamicLinkerAsmLabel" - else if platformArch platform == ArchX86_64 - then case dllInfo of - CodeStub -> ppr lbl <> text "@plt" - GotSymbolPtr -> ppr lbl <> text "@gotpcrel" - GotSymbolOffset -> ppr lbl - SymbolPtr -> text ".LC_" <> ppr lbl - else if platformArch platform == ArchPPC_64 ELF_V1 - || platformArch platform == ArchPPC_64 ELF_V2 - then case dllInfo of - GotSymbolPtr -> text ".LC_" <> ppr lbl - <> text "@toc" - GotSymbolOffset -> ppr lbl - SymbolPtr -> text ".LC_" <> ppr lbl - _ -> panic "pprDynamicLinkerAsmLabel" - else case dllInfo of - CodeStub -> ppr lbl <> text "@plt" - SymbolPtr -> text ".LC_" <> ppr lbl - GotSymbolPtr -> ppr lbl <> text "@got" - GotSymbolOffset -> ppr lbl <> text "@gotoff" - else if platformOS platform == OSMinGW32 - then case dllInfo of - SymbolPtr -> text "__imp_" <> ppr lbl - _ -> panic "pprDynamicLinkerAsmLabel" - else panic "pprDynamicLinkerAsmLabel" - +pprDynamicLinkerAsmLabel platform dllInfo lbl = + case platformOS platform of + OSDarwin + | platformArch platform == ArchX86_64 -> + case dllInfo of + CodeStub -> char 'L' <> ppr lbl <> text "$stub" + SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr" + GotSymbolPtr -> ppr lbl <> text "@GOTPCREL" + GotSymbolOffset -> ppr lbl + | otherwise -> + case dllInfo of + CodeStub -> char 'L' <> ppr lbl <> text "$stub" + SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr" + _ -> panic "pprDynamicLinkerAsmLabel" + + OSAIX -> + case dllInfo of + SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention + _ -> panic "pprDynamicLinkerAsmLabel" + + _ | osElfTarget (platformOS platform) -> elfLabel + + OSMinGW32 -> + case dllInfo of + SymbolPtr -> text "__imp_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + _ -> panic "pprDynamicLinkerAsmLabel" + where + elfLabel + | platformArch platform == ArchPPC + = case dllInfo of + CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] + ppr lbl <> text "+32768@plt" + SymbolPtr -> text ".LC_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + | platformArch platform == ArchX86_64 + = case dllInfo of + CodeStub -> ppr lbl <> text "@plt" + GotSymbolPtr -> ppr lbl <> text "@gotpcrel" + GotSymbolOffset -> ppr lbl + SymbolPtr -> text ".LC_" <> ppr lbl + + | platformArch platform == ArchPPC_64 ELF_V1 + || platformArch platform == ArchPPC_64 ELF_V2 + = case dllInfo of + GotSymbolPtr -> text ".LC_" <> ppr lbl + <> text "@toc" + GotSymbolOffset -> ppr lbl + SymbolPtr -> text ".LC_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + | otherwise + = case dllInfo of + CodeStub -> ppr lbl <> text "@plt" + SymbolPtr -> text ".LC_" <> ppr lbl + GotSymbolPtr -> ppr lbl <> text "@got" + GotSymbolOffset -> ppr lbl <> text "@gotoff" diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index dbd54236f5..eb34618e38 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -1,5 +1,5 @@ -- Cmm representations using Hoopl's Graph CmmNode e x. -{-# LANGUAGE CPP, GADTs #-} +{-# LANGUAGE GADTs #-} module Cmm ( -- * Cmm top-level datatypes @@ -18,7 +18,6 @@ module Cmm ( -- * Info Tables CmmTopInfo(..), CmmStackInfo(..), CmmInfoTable(..), topInfoTable, ClosureTypeInfo(..), - C_SRT(..), needsSRT, ProfilingInfo(..), ConstrDescription, -- * Statements, expressions and types @@ -26,6 +25,10 @@ module Cmm ( module CmmExpr, ) where +import GhcPrelude + +import Id +import CostCentre import CLabel import BlockId import CmmNode @@ -39,8 +42,6 @@ import Outputable import Data.Word ( Word8 ) -#include "HsVersions.h" - ----------------------------------------------------------------------------- -- Cmm, GenCmm ----------------------------------------------------------------------------- @@ -138,24 +139,28 @@ data CmmInfoTable cit_lbl :: CLabel, -- Info table label cit_rep :: SMRep, cit_prof :: ProfilingInfo, - cit_srt :: C_SRT + cit_srt :: Maybe CLabel, -- empty, or a closure address + cit_clo :: Maybe (Id, CostCentreStack) + -- Just (id,ccs) <=> build a static closure later + -- Nothing <=> don't build a static closure + -- + -- Static closures for FUNs and THUNKs are *not* generated by + -- the code generator, because we might want to add SRT + -- entries to them later (for FUNs at least; THUNKs are + -- treated the same for consistency). See Note [SRTs] in + -- CmmBuildInfoTables, in particular the [FUN] optimisation. + -- + -- This is strictly speaking not a part of the info table that + -- will be finally generated, but it's the only convenient + -- place to convey this information from the code generator to + -- where we build the static closures in + -- CmmBuildInfoTables.doSRTs. } data ProfilingInfo = NoProfilingInfo | ProfilingInfo [Word8] [Word8] -- closure_type, closure_desc --- C_SRT is what StgSyn.SRT gets translated to... --- we add a label for the table, and expect only the 'offset/length' form - -data C_SRT = NoC_SRT - | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-} - deriving (Eq) - -needsSRT :: C_SRT -> Bool -needsSRT NoC_SRT = False -needsSRT (C_SRT _ _ _) = True - ----------------------------------------------------------------------------- -- Static Data ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs index 5dd8ee4ef2..a8f89a1a9c 100644 --- a/compiler/cmm/CmmBuildInfoTables.hs +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -1,49 +1,140 @@ -{-# LANGUAGE BangPatterns, CPP, GADTs #-} +{-# LANGUAGE GADTs, BangPatterns, RecordWildCards, + GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections #-} module CmmBuildInfoTables - ( CAFSet, CAFEnv, cafAnal - , doSRTs, TopSRT, emptySRT, isEmptySRT, srtToData ) -where + ( CAFSet, CAFEnv, cafAnal + , doSRTs, ModuleSRTInfo, emptySRT + ) where -#include "HsVersions.h" +import GhcPrelude hiding (succ) +import Id +import BlockId import Hoopl.Block import Hoopl.Graph import Hoopl.Label import Hoopl.Collections import Hoopl.Dataflow +import Module +import Platform import Digraph -import Bitmap import CLabel import PprCmmDecl () import Cmm import CmmUtils -import CmmInfo -import Data.List import DynFlags import Maybes import Outputable import SMRep import UniqSupply -import Util +import CostCentre +import StgCmmHeap import PprCmm() +import Control.Monad import Data.Map (Map) import qualified Data.Map as Map import Data.Set (Set) import qualified Data.Set as Set -import Control.Monad +import Data.Tuple +import Control.Monad.Trans.State +import Control.Monad.Trans.Class + + +{- Note [SRTs] + +SRTs are the mechanism by which the garbage collector can determine +the live CAFs in the program. + +Representation +^^^^^^^^^^^^^^ + ++------+ +| info | +| | +-----+---+---+---+ +| -------->|SRT_2| | | | | 0 | +|------| +-----+-|-+-|-+---+ +| | | | +| code | | | +| | v v + +An SRT is simply an object in the program's data segment. It has the +same representation as a static constructor. There are 16 +pre-compiled SRT info tables: stg_SRT_1_info, .. stg_SRT_16_info, +representing SRT objects with 1-16 pointers, respectively. + +The entries of an SRT object point to static closures, which are either +- FUN_STATIC, THUNK_STATIC or CONSTR +- Another SRT (actually just a CONSTR) + +The final field of the SRT is the static link field, used by the +garbage collector to chain together static closures that it visits and +to determine whether a static closure has been visited or not. (see +Note [STATIC_LINK fields]) + +By traversing the transitive closure of an SRT, the GC will reach all +of the CAFs that are reachable from the code associated with this SRT. + +If we need to create an SRT with more than 16 entries, we build a +chain of SRT objects with all but the last having 16 entries. + ++-----+---+- -+---+---+ +|SRT16| | | | | | 0 | ++-----+-|-+- -+-|-+---+ + | | + v v + +----+---+---+---+ + |SRT2| | | | | 0 | + +----+-|-+-|-+---+ + | | + | | + v v -import qualified Prelude as P -import Prelude hiding (succ) +Referring to an SRT from the info table +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -foldSet :: (a -> b -> b) -> b -> Set a -> b -foldSet = Set.foldr +The following things have SRTs: ------------------------------------------------------------------------ --- SRTs +- Static functions (FUN) +- Static thunks (THUNK), ie. CAFs +- Continuations (RET_SMALL, etc.) -{- EXAMPLE +In each case, the info table points to the SRT. + +- info->srt is zero if there's no SRT, otherwise: +- info->srt == 1 and info->f.srt_offset points to the SRT + +e.g. for a FUN with an SRT: + +StgFunInfoTable +------+ + info->f.srt_offset | ------------> offset to SRT object +StgStdInfoTable +------+ + info->layout.ptrs | ... | + info->layout.nptrs | ... | + info->srt | 1 | + info->type | ... | + |------| + +On x86_64, we optimise the info table representation further. The +offset to the SRT can be stored in 32 bits (all code lives within a +2GB region in x86_64's small memory model), so we can save a word in +the info table by storing the srt_offset in the srt field, which is +half a word. + +On x86_64 with TABLES_NEXT_TO_CODE (except on MachO, due to #15169): + +- info->srt is zero if there's no SRT, otherwise: +- info->srt is an offset from the info pointer to the SRT object + +StgStdInfoTable +------+ + info->layout.ptrs | | + info->layout.nptrs | | + info->srt | ------------> offset to SRT object + |------| + + +EXAMPLE +^^^^^^^ f = \x. ... g ... where @@ -65,29 +156,254 @@ CmmDecls. e.g. for f_entry, we might end up with where f1_ret is a return point, and f2_proc is a proc-point. We have a CAFSet for each of these CmmDecls, let's suppose they are - [ f_entry{g_closure}, f1_ret{g_closure}, f2_proc{} ] - [ g_entry{h_closure, c1_closure} ] + [ f_entry{g_info}, f1_ret{g_info}, f2_proc{} ] + [ g_entry{h_info, c1_closure} ] [ h_entry{c2_closure} ] -Now, note that we cannot use g_closure and h_closure in an SRT, -because there are no static closures corresponding to these functions. -So we have to flatten out the structure, replacing g_closure and -h_closure with their contents: +Next, we make an SRT for each of these functions: - [ f_entry{c2_closure, c1_closure}, f1_ret{c2_closure,c1_closure}, f2_proc{} ] - [ g_entry{c2_closure, c1_closure} ] - [ h_entry{c2_closure} ] + f_srt : [g_info] + g_srt : [h_info, c1_closure] + h_srt : [c2_closure] + +Now, for g_info and h_info, we want to refer to the SRTs for g and h +respectively, which we'll label g_srt and h_srt: + + f_srt : [g_srt] + g_srt : [h_srt, c1_closure] + h_srt : [c2_closure] + +Now, when an SRT has a single entry, we don't actually generate an SRT +closure for it, instead we just replace references to it with its +single element. So, since h_srt == c2_closure, we have + + f_srt : [g_srt] + g_srt : [c2_closure, c1_closure] + h_srt : [c2_closure] + +and the only SRT closure we generate is -This is what flattenCAFSets is doing. + g_srt = SRT_2 [c2_closure, c1_closure] + +Optimisations +^^^^^^^^^^^^^ + +To reduce the code size overhead and the cost of traversing SRTs in +the GC, we want to simplify SRTs where possible. We therefore apply +the following optimisations. Each has a [keyword]; search for the +keyword in the code below to see where the optimisation is +implemented. + +1. [Inline] we never create an SRT with a single entry, instead we + point to the single entry directly from the info table. + + i.e. instead of + + +------+ + | info | + | | +-----+---+---+ + | -------->|SRT_1| | | 0 | + |------| +-----+-|-+---+ + | | | + | code | | + | | v + C + + we can point directly to the closure: + + +------+ + | info | + | | + | -------->C + |------| + | | + | code | + | | + + + Furthermore, the SRT for any code that refers to this info table + can point directly to C. + + The exception to this is when we're doing dynamic linking. In that + case, if the closure is not locally defined then we can't point to + it directly from the info table, because this is the text section + which cannot contain runtime relocations. In this case we skip this + optimisation and generate the singleton SRT, becase SRTs are in the + data section and *can* have relocatable references. + +2. [FUN] A static function closure can also be an SRT, we simply put + the SRT entries as fields in the static closure. This makes a lot + of sense: the static references are just like the free variables of + the FUN closure. + + i.e. instead of + + f_closure: + +-----+---+ + | | | 0 | + +- |--+---+ + | +------+ + | | info | f_srt: + | | | +-----+---+---+---+ + | | -------->|SRT_2| | | | + 0 | + `----------->|------| +-----+-|-+-|-+---+ + | | | | + | code | | | + | | v v + + + We can generate: + + f_closure: + +-----+---+---+---+ + | | | | | | | 0 | + +- |--+-|-+-|-+---+ + | | | +------+ + | v v | info | + | | | + | | 0 | + `----------->|------| + | | + | code | + | | + + + (note: we can't do this for THUNKs, because the thunk gets + overwritten when it is entered, so we wouldn't be able to share + this SRT with other info tables that want to refer to it (see + [Common] below). FUNs are immutable so don't have this problem.) + +3. [Common] Identical SRTs can be commoned up. + +4. [Filter] If an SRT A refers to an SRT B and a closure C, and B also + refers to C (perhaps transitively), then we can omit the reference + to C from A. + + +Note that there are many other optimisations that we could do, but +aren't implemented. In general, we could omit any reference from an +SRT if everything reachable from it is also reachable from the other +fields in the SRT. Our [Filter] optimisation is a special case of +this. + +Another opportunity we don't exploit is this: + +A = {X,Y,Z} +B = {Y,Z} +C = {X,B} + +Here we could use C = {A} and therefore [Inline] C = A. -} ------------------------------------------------------------------------ --- Finding the CAFs used by a procedure +-- --------------------------------------------------------------------- +{- Note [Invalid optimisation: shortcutting] + +You might think that if we have something like + +A's SRT = {B} +B's SRT = {X} + +that we could replace the reference to B in A's SRT with X. + +A's SRT = {X} +B's SRT = {X} -type CAFSet = Set CLabel +and thereby perhaps save a little work at runtime, because we don't +have to visit B. + +But this is NOT valid. + +Consider these cases: + +0. B can't be a constructor, because constructors don't have SRTs + +1. B is a CAF. This is the easy one. Obviously we want A's SRT to + point to B, so that it keeps B alive. + +2. B is a function. This is the tricky one. The reason we can't +shortcut in this case is that we aren't allowed to resurrect static +objects. + +== How does this cause a problem? == + +The particular case that cropped up when we tried this was #15544. +- A is a thunk +- B is a static function +- X is a CAF +- suppose we GC when A is alive, and B is not otherwise reachable. +- B is "collected", meaning that it doesn't make it onto the static + objects list during this GC, but nothing bad happens yet. +- Next, suppose we enter A, and then call B. (remember that A refers to B) + At the entry point to B, we GC. This puts B on the stack, as part of the + RET_FUN stack frame that gets pushed when we GC at a function entry point. +- This GC will now reach B +- But because B was previous "collected", it breaks the assumption + that static objects are never resurrected. See Note [STATIC_LINK + fields] in rts/sm/Storage.h for why this is bad. +- In practice, the GC thinks that B has already been visited, and so + doesn't visit X, and catastrophe ensues. + +== Isn't this caused by the RET_FUN business? == + +Maybe, but could you prove that RET_FUN is the only way that +resurrection can occur? + +So, no shortcutting. +-} + +-- --------------------------------------------------------------------- +-- Label types + +-- Labels that come from cafAnal can be: +-- - _closure labels for static functions or CAFs +-- - _info labels for dynamic functions, thunks, or continuations +-- - _entry labels for functions or thunks +-- +-- Meanwhile the labels on top-level blocks are _entry labels. +-- +-- To put everything in the same namespace we convert all labels to +-- closure labels using toClosureLbl. Note that some of these +-- labels will not actually exist; that's ok because we're going to +-- map them to SRTEntry later, which ranges over labels that do exist. +-- +newtype CAFLabel = CAFLabel CLabel + deriving (Eq,Ord,Outputable) + +type CAFSet = Set CAFLabel type CAFEnv = LabelMap CAFSet +mkCAFLabel :: CLabel -> CAFLabel +mkCAFLabel lbl = CAFLabel (toClosureLbl lbl) + +-- This is a label that we can put in an SRT. It *must* be a closure label, +-- pointing to either a FUN_STATIC, THUNK_STATIC, or CONSTR. +newtype SRTEntry = SRTEntry CLabel + deriving (Eq, Ord, Outputable) + +-- --------------------------------------------------------------------- +-- CAF analysis + +-- | +-- For each code block: +-- - collect the references reachable from this code block to FUN, +-- THUNK or RET labels for which hasCAF == True +-- +-- This gives us a `CAFEnv`: a mapping from code block to sets of labels +-- +cafAnal + :: LabelSet -- The blocks representing continuations, ie. those + -- that will get RET info tables. These labels will + -- get their own SRTs, so we don't aggregate CAFs from + -- references to these labels, we just use the label. + -> CLabel -- The top label of the proc + -> CmmGraph + -> CAFEnv +cafAnal contLbls topLbl cmmGraph = + analyzeCmmBwd cafLattice + (cafTransfers contLbls (g_entry cmmGraph) topLbl) cmmGraph mapEmpty + + cafLattice :: DataflowLattice CAFSet cafLattice = DataflowLattice Set.empty add where @@ -95,290 +411,486 @@ cafLattice = DataflowLattice Set.empty add let !new' = old `Set.union` new in changedIf (Set.size new' > Set.size old) new' -cafTransfers :: TransferFun CAFSet -cafTransfers (BlockCC eNode middle xNode) fBase = - let joined = cafsInNode xNode $! joinOutFacts cafLattice xNode fBase + +cafTransfers :: LabelSet -> Label -> CLabel -> TransferFun CAFSet +cafTransfers contLbls entry topLbl + (BlockCC eNode middle xNode) fBase = + let joined = cafsInNode xNode $! live' !result = foldNodesBwdOO cafsInNode middle joined + + facts = mapMaybe successorFact (successors xNode) + live' = joinFacts cafLattice facts + + successorFact s + -- If this is a loop back to the entry, we can refer to the + -- entry label. + | s == entry = Just (add topLbl Set.empty) + -- If this is a continuation, we want to refer to the + -- SRT for the continuation's info table + | s `setMember` contLbls + = Just (Set.singleton (mkCAFLabel (infoTblLbl s))) + -- Otherwise, takes the CAF references from the destination + | otherwise + = lookupFact s fBase + + cafsInNode :: CmmNode e x -> CAFSet -> CAFSet + cafsInNode node set = foldExpDeep addCaf node set + + addCaf expr !set = + case expr of + CmmLit (CmmLabel c) -> add c set + CmmLit (CmmLabelOff c _) -> add c set + CmmLit (CmmLabelDiffOff c1 c2 _ _) -> add c1 $! add c2 set + _ -> set + add l s | hasCAF l = Set.insert (mkCAFLabel l) s + | otherwise = s + in mapSingleton (entryLabel eNode) result -cafsInNode :: CmmNode e x -> CAFSet -> CAFSet -cafsInNode node set = foldExpDeep addCaf node set - where - addCaf expr !set = - case expr of - CmmLit (CmmLabel c) -> add c set - CmmLit (CmmLabelOff c _) -> add c set - CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $! add c2 set - _ -> set - add l s | hasCAF l = Set.insert (toClosureLbl l) s - | otherwise = s - --- | An analysis to find live CAFs. -cafAnal :: CmmGraph -> CAFEnv -cafAnal cmmGraph = analyzeCmmBwd cafLattice cafTransfers cmmGraph mapEmpty - ------------------------------------------------------------------------ --- Building the SRTs - --- Description of the SRT for a given module. --- Note that this SRT may grow as we greedily add new CAFs to it. -data TopSRT = TopSRT { lbl :: CLabel - , next_elt :: Int -- the next entry in the table - , rev_elts :: [CLabel] - , elt_map :: Map CLabel Int } - -- map: CLabel -> its last entry in the table -instance Outputable TopSRT where - ppr (TopSRT lbl next elts eltmap) = - text "TopSRT:" <+> ppr lbl - <+> ppr next - <+> ppr elts - <+> ppr eltmap - -emptySRT :: MonadUnique m => m TopSRT -emptySRT = - do top_lbl <- getUniqueM >>= \ u -> return $ mkTopSRTLabel u - return TopSRT { lbl = top_lbl, next_elt = 0, rev_elts = [], elt_map = Map.empty } - -isEmptySRT :: TopSRT -> Bool -isEmptySRT srt = null (rev_elts srt) - -cafMember :: TopSRT -> CLabel -> Bool -cafMember srt lbl = Map.member lbl (elt_map srt) - -cafOffset :: TopSRT -> CLabel -> Maybe Int -cafOffset srt lbl = Map.lookup lbl (elt_map srt) - -addCAF :: CLabel -> TopSRT -> TopSRT -addCAF caf srt = - srt { next_elt = last + 1 - , rev_elts = caf : rev_elts srt - , elt_map = Map.insert caf last (elt_map srt) } - where last = next_elt srt - -srtToData :: TopSRT -> CmmGroup -srtToData srt = [CmmData sec (Statics (lbl srt) tbl)] - where tbl = map (CmmStaticLit . CmmLabel) (reverse (rev_elts srt)) - sec = Section RelocatableReadOnlyData (lbl srt) - --- Once we have found the CAFs, we need to do two things: --- 1. Build a table of all the CAFs used in the procedure. --- 2. Compute the C_SRT describing the subset of CAFs live at each procpoint. --- --- When building the local view of the SRT, we first make sure that all the CAFs are --- in the SRT. Then, if the number of CAFs is small enough to fit in a bitmap, --- we make sure they're all close enough to the bottom of the table that the --- bitmap will be able to cover all of them. -buildSRT :: DynFlags -> TopSRT -> CAFSet -> UniqSM (TopSRT, Maybe CmmDecl, C_SRT) -buildSRT dflags topSRT cafs = - do let - -- For each label referring to a function f without a static closure, - -- replace it with the CAFs that are reachable from f. - sub_srt topSRT localCafs = - let cafs = Set.elems localCafs - mkSRT topSRT = - do localSRTs <- procpointSRT dflags (lbl topSRT) (elt_map topSRT) cafs - return (topSRT, localSRTs) - in if cafs `lengthExceeds` maxBmpSize dflags then - mkSRT (foldl add_if_missing topSRT cafs) - else -- make sure all the cafs are near the bottom of the srt - mkSRT (add_if_too_far topSRT cafs) - add_if_missing srt caf = - if cafMember srt caf then srt else addCAF caf srt - -- If a CAF is more than maxBmpSize entries from the young end of the - -- SRT, then we add it to the SRT again. - -- (Note: Not in the SRT => infinitely far.) - add_if_too_far srt@(TopSRT {elt_map = m}) cafs = - add srt (sortBy farthestFst cafs) - where - farthestFst x y = case (Map.lookup x m, Map.lookup y m) of - (Nothing, Nothing) -> EQ - (Nothing, Just _) -> LT - (Just _, Nothing) -> GT - (Just d, Just d') -> compare d' d - add srt [] = srt - add srt@(TopSRT {next_elt = next}) (caf : rst) = - case cafOffset srt caf of - Just ix -> if next - ix > maxBmpSize dflags then - add (addCAF caf srt) rst - else srt - Nothing -> add (addCAF caf srt) rst - (topSRT, subSRTs) <- sub_srt topSRT cafs - let (sub_tbls, blockSRTs) = subSRTs - return (topSRT, sub_tbls, blockSRTs) - --- Construct an SRT bitmap. --- Adapted from simpleStg/SRT.hs, which expects Id's. -procpointSRT :: DynFlags -> CLabel -> Map CLabel Int -> [CLabel] -> - UniqSM (Maybe CmmDecl, C_SRT) -procpointSRT _ _ _ [] = - return (Nothing, NoC_SRT) -procpointSRT dflags top_srt top_table entries = - do (top, srt) <- bitmap `seq` to_SRT dflags top_srt offset len bitmap - return (top, srt) - where - ints = map (expectJust "constructSRT" . flip Map.lookup top_table) entries - sorted_ints = sort ints - offset = head sorted_ints - bitmap_entries = map (subtract offset) sorted_ints - len = P.last bitmap_entries + 1 - bitmap = intsToBitmap dflags len bitmap_entries - -maxBmpSize :: DynFlags -> Int -maxBmpSize dflags = widthInBits (wordWidth dflags) `div` 2 - --- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT. -to_SRT :: DynFlags -> CLabel -> Int -> Int -> Bitmap -> UniqSM (Maybe CmmDecl, C_SRT) -to_SRT dflags top_srt off len bmp - | len > maxBmpSize dflags || bmp == [toStgWord dflags (fromStgHalfWord (srtEscape dflags))] - = do id <- getUniqueM - let srt_desc_lbl = mkLargeSRTLabel id - section = Section RelocatableReadOnlyData srt_desc_lbl - tbl = CmmData section $ - Statics srt_desc_lbl $ map CmmStaticLit - ( cmmLabelOffW dflags top_srt off - : mkWordCLit dflags (fromIntegral len) - : map (mkStgWordCLit dflags) bmp) - return (Just tbl, C_SRT srt_desc_lbl 0 (srtEscape dflags)) - | otherwise - = return (Nothing, C_SRT top_srt off (toStgHalfWord dflags (fromStgWord (head bmp)))) - -- The fromIntegral converts to StgHalfWord - --- Gather CAF info for a procedure, but only if the procedure --- doesn't have a static closure. --- (If it has a static closure, it will already have an SRT to --- keep its CAFs live.) --- Any procedure referring to a non-static CAF c must keep live --- any CAF that is reachable from c. -localCAFInfo :: CAFEnv -> CmmDecl -> (CAFSet, Maybe CLabel) -localCAFInfo _ (CmmData _ _) = (Set.empty, Nothing) -localCAFInfo cafEnv proc@(CmmProc _ top_l _ (CmmGraph {g_entry=entry})) = - case topInfoTable proc of - Just (CmmInfoTable { cit_rep = rep }) - | not (isStaticRep rep) && not (isStackRep rep) - -> (cafs, Just (toClosureLbl top_l)) - _other -> (cafs, Nothing) - where - cafs = expectJust "maybeBindCAFs" $ mapLookup entry cafEnv - --- Once we have the local CAF sets for some (possibly) mutually --- recursive functions, we can create an environment mapping --- each function to its set of CAFs. Note that a CAF may --- be a reference to a function. If that function f does not have --- a static closure, then we need to refer specifically --- to the set of CAFs used by f. Of course, the set of CAFs --- used by f must be included in the local CAF sets that are input to --- this function. To minimize lookup time later, we return --- the environment with every reference to f replaced by its set of CAFs. --- To do this replacement efficiently, we gather strongly connected --- components, then we sort the components in topological order. -mkTopCAFInfo :: [(CAFSet, Maybe CLabel)] -> Map CLabel CAFSet -mkTopCAFInfo localCAFs = foldl addToTop Map.empty g - where - addToTop env (AcyclicSCC (l, cafset)) = - Map.insert l (flatten env cafset) env - addToTop env (CyclicSCC nodes) = - let (lbls, cafsets) = unzip nodes - cafset = foldr Set.delete (foldl Set.union Set.empty cafsets) lbls - in foldl (\env l -> Map.insert l (flatten env cafset) env) env lbls - - g = stronglyConnCompFromEdgedVerticesOrd - [ DigraphNode (l,cafs) l (Set.elems cafs) - | (cafs, Just l) <- localCAFs ] - -flatten :: Map CLabel CAFSet -> CAFSet -> CAFSet -flatten env cafset = foldSet (lookup env) Set.empty cafset - where - lookup env caf cafset' = - case Map.lookup caf env of - Just cafs -> foldSet Set.insert cafset' cafs - Nothing -> Set.insert caf cafset' - -bundle :: Map CLabel CAFSet - -> (CAFEnv, CmmDecl) - -> (CAFSet, Maybe CLabel) - -> (LabelMap CAFSet, CmmDecl) -bundle flatmap (env, decl@(CmmProc infos _lbl _ g)) (closure_cafs, mb_lbl) - = ( mapMapWithKey get_cafs (info_tbls infos), decl ) - where - entry = g_entry g - - entry_cafs - | Just l <- mb_lbl = expectJust "bundle" $ Map.lookup l flatmap - | otherwise = flatten flatmap closure_cafs - - get_cafs l _ - | l == entry = entry_cafs - | Just info <- mapLookup l env = flatten flatmap info - | otherwise = Set.empty - -- the label might not be in the env if the code corresponding to - -- this info table was optimised away (perhaps because it was - -- unreachable). In this case it doesn't matter what SRT we - -- infer, since the info table will not appear in the generated - -- code. See #9329. - -bundle _flatmap (_, decl) _ - = ( mapEmpty, decl ) - - -flattenCAFSets :: [(CAFEnv, [CmmDecl])] -> [(LabelMap CAFSet, CmmDecl)] -flattenCAFSets cpsdecls = zipWith (bundle flatmap) zipped localCAFs - where - zipped = [ (env,decl) | (env,decls) <- cpsdecls, decl <- decls ] - localCAFs = unzipWith localCAFInfo zipped - flatmap = mkTopCAFInfo localCAFs -- transitive closure of localCAFs - -doSRTs :: DynFlags - -> TopSRT - -> [(CAFEnv, [CmmDecl])] - -> IO (TopSRT, [CmmDecl]) - -doSRTs dflags topSRT tops - = do - let caf_decls = flattenCAFSets tops - us <- mkSplitUniqSupply 'u' - let (topSRT', gs') = initUs_ us $ foldM setSRT (topSRT, []) caf_decls - return (topSRT', reverse gs' {- Note [reverse gs] -}) - where - setSRT (topSRT, rst) (caf_map, decl@(CmmProc{})) = do - (topSRT, srt_tables, srt_env) <- buildSRTs dflags topSRT caf_map - let decl' = updInfoSRTs srt_env decl - return (topSRT, decl': srt_tables ++ rst) - setSRT (topSRT, rst) (_, decl) = - return (topSRT, decl : rst) - -buildSRTs :: DynFlags -> TopSRT -> LabelMap CAFSet - -> UniqSM (TopSRT, [CmmDecl], LabelMap C_SRT) -buildSRTs dflags top_srt caf_map - = foldM doOne (top_srt, [], mapEmpty) (mapToList caf_map) - where - doOne (top_srt, decls, srt_env) (l, cafs) - = do (top_srt, mb_decl, srt) <- buildSRT dflags top_srt cafs - return ( top_srt, maybeToList mb_decl ++ decls - , mapInsert l srt srt_env ) - -{- -- In each CmmDecl there is a mapping from BlockId -> CmmInfoTable -- The one corresponding to g_entry is the closure info table, the - rest are continuations. -- Each one needs an SRT. -- We get the CAFSet for each one from the CAFEnv -- flatten gives us - [(LabelMap CAFSet, CmmDecl)] -- --} +-- ----------------------------------------------------------------------------- +-- ModuleSRTInfo + +data ModuleSRTInfo = ModuleSRTInfo + { thisModule :: Module + -- ^ Current module being compiled. Required for calling labelDynamic. + , dedupSRTs :: Map (Set SRTEntry) SRTEntry + -- ^ previous SRTs we've emitted, so we can de-duplicate. + -- Used to implement the [Common] optimisation. + , flatSRTs :: Map SRTEntry (Set SRTEntry) + -- ^ The reverse mapping, so that we can remove redundant + -- entries. e.g. if we have an SRT [a,b,c], and we know that b + -- points to [c,d], we can omit c and emit [a,b]. + -- Used to implement the [Filter] optimisation. + } +instance Outputable ModuleSRTInfo where + ppr ModuleSRTInfo{..} = + text "ModuleSRTInfo:" <+> ppr dedupSRTs <+> ppr flatSRTs -{- Note [reverse gs] +emptySRT :: Module -> ModuleSRTInfo +emptySRT mod = + ModuleSRTInfo + { thisModule = mod + , dedupSRTs = Map.empty + , flatSRTs = Map.empty } + +-- ----------------------------------------------------------------------------- +-- Constructing SRTs + +{- Implementation notes + +- In each CmmDecl there is a mapping info_tbls from Label -> CmmInfoTable + +- The entry in info_tbls corresponding to g_entry is the closure info + table, the rest are continuations. + +- Each entry in info_tbls possibly needs an SRT. We need to make a + label for each of these. + +- We get the CAFSet for each entry from the CAFEnv - It is important to keep the code blocks in the same order, - otherwise binary sizes get slightly bigger. I'm not completely - sure why this is, perhaps the assembler generates bigger jump - instructions for forward refs. --SDM -} -updInfoSRTs :: LabelMap C_SRT -> CmmDecl -> CmmDecl -updInfoSRTs srt_env (CmmProc top_info top_l live g) = - CmmProc (top_info {info_tbls = mapMapWithKey updInfoTbl (info_tbls top_info)}) top_l live g - where updInfoTbl l info_tbl - = info_tbl { cit_srt = expectJust "updInfo" $ mapLookup l srt_env } -updInfoSRTs _ t = t +-- | Return a (Label,CLabel) pair for each labelled block of a CmmDecl, +-- where the label is +-- - the info label for a continuation or dynamic closure +-- - the closure label for a top-level function (not a CAF) +getLabelledBlocks :: CmmDecl -> [(Label, CAFLabel)] +getLabelledBlocks (CmmData _ _) = [] +getLabelledBlocks (CmmProc top_info _ _ _) = + [ (blockId, mkCAFLabel (cit_lbl info)) + | (blockId, info) <- mapToList (info_tbls top_info) + , let rep = cit_rep info + , not (isStaticRep rep) || not (isThunkRep rep) + ] + + +-- | Put the labelled blocks that we will be annotating with SRTs into +-- dependency order. This is so that we can process them one at a +-- time, resolving references to earlier blocks to point to their +-- SRTs. CAFs themselves are not included here; see getCAFs below. +depAnalSRTs + :: CAFEnv + -> [CmmDecl] + -> [SCC (Label, CAFLabel, Set CAFLabel)] +depAnalSRTs cafEnv decls = + srtTrace "depAnalSRTs" (ppr graph) graph + where + labelledBlocks = concatMap getLabelledBlocks decls + labelToBlock = Map.fromList (map swap labelledBlocks) + graph = stronglyConnCompFromEdgedVerticesOrd + [ let cafs' = Set.delete lbl cafs in + DigraphNode (l,lbl,cafs') l + (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs')) + | (l, lbl) <- labelledBlocks + , Just cafs <- [mapLookup l cafEnv] ] + + +-- | Get (Label, CAFLabel, Set CAFLabel) for each block that represents a CAF. +-- These are treated differently from other labelled blocks: +-- - we never shortcut a reference to a CAF to the contents of its +-- SRT, since the point of SRTs is to keep CAFs alive. +-- - CAFs therefore don't take part in the dependency analysis in depAnalSRTs. +-- instead we generate their SRTs after everything else. +getCAFs :: CAFEnv -> [CmmDecl] -> [(Label, CAFLabel, Set CAFLabel)] +getCAFs cafEnv decls = + [ (g_entry g, mkCAFLabel topLbl, cafs) + | CmmProc top_info topLbl _ g <- decls + , Just info <- [mapLookup (g_entry g) (info_tbls top_info)] + , let rep = cit_rep info + , isStaticRep rep && isThunkRep rep + , Just cafs <- [mapLookup (g_entry g) cafEnv] + ] + + +-- | Get the list of blocks that correspond to the entry points for +-- FUN_STATIC closures. These are the blocks for which if we have an +-- SRT we can merge it with the static closure. [FUN] +getStaticFuns :: [CmmDecl] -> [(BlockId, CLabel)] +getStaticFuns decls = + [ (g_entry g, lbl) + | CmmProc top_info _ _ g <- decls + , Just info <- [mapLookup (g_entry g) (info_tbls top_info)] + , Just (id, _) <- [cit_clo info] + , let rep = cit_rep info + , isStaticRep rep && isFunRep rep + , let lbl = mkLocalClosureLabel (idName id) (idCafInfo id) + ] + + +-- | Maps labels from 'cafAnal' to the final CLabel that will appear +-- in the SRT. +-- - closures with singleton SRTs resolve to their single entry +-- - closures with larger SRTs map to the label for that SRT +-- - CAFs must not map to anything! +-- - if a labels maps to Nothing, we found that this label's SRT +-- is empty, so we don't need to refer to it from other SRTs. +type SRTMap = Map CAFLabel (Maybe SRTEntry) + +-- | resolve a CAFLabel to its SRTEntry using the SRTMap +resolveCAF :: SRTMap -> CAFLabel -> Maybe SRTEntry +resolveCAF srtMap lbl@(CAFLabel l) = + Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap + + +-- | Attach SRTs to all info tables in the CmmDecls, and add SRT +-- declarations to the ModuleSRTInfo. +-- +doSRTs + :: DynFlags + -> ModuleSRTInfo + -> [(CAFEnv, [CmmDecl])] + -> IO (ModuleSRTInfo, [CmmDecl]) + +doSRTs dflags moduleSRTInfo tops = do + us <- mkSplitUniqSupply 'u' + + -- Ignore the original grouping of decls, and combine all the + -- CAFEnvs into a single CAFEnv. + let (cafEnvs, declss) = unzip tops + cafEnv = mapUnions cafEnvs + decls = concat declss + staticFuns = mapFromList (getStaticFuns decls) + + -- Put the decls in dependency order. Why? So that we can implement + -- [Inline] and [Filter]. If we need to refer to an SRT that has + -- a single entry, we use the entry itself, which means that we + -- don't need to generate the singleton SRT in the first place. But + -- to do this we need to process blocks before things that depend on + -- them. + let + sccs = depAnalSRTs cafEnv decls + cafsWithSRTs = getCAFs cafEnv decls + + -- On each strongly-connected group of decls, construct the SRT + -- closures and the SRT fields for info tables. + let result :: + [ ( [CmmDecl] -- generated SRTs + , [(Label, CLabel)] -- SRT fields for info tables + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) ] + ((result, _srtMap), moduleSRTInfo') = + initUs_ us $ + flip runStateT moduleSRTInfo $ + flip runStateT Map.empty $ do + nonCAFs <- mapM (doSCC dflags staticFuns) sccs + cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) -> + oneSRT dflags staticFuns [l] [cafLbl] True{-is a CAF-} cafs + return (nonCAFs ++ cAFs) + + (declss, pairs, funSRTs) = unzip3 result + + -- Next, update the info tables with the SRTs + let + srtFieldMap = mapFromList (concat pairs) + funSRTMap = mapFromList (concat funSRTs) + decls' = concatMap (updInfoSRTs dflags srtFieldMap funSRTMap) decls + + return (moduleSRTInfo', concat declss ++ decls') + + +-- | Build the SRT for a strongly-connected component of blocks +doSCC + :: DynFlags + -> LabelMap CLabel -- which blocks are static function entry points + -> SCC (Label, CAFLabel, Set CAFLabel) + -> StateT SRTMap + (StateT ModuleSRTInfo UniqSM) + ( [CmmDecl] -- generated SRTs + , [(Label, CLabel)] -- SRT fields for info tables + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) + +doSCC dflags staticFuns (AcyclicSCC (l, cafLbl, cafs)) = + oneSRT dflags staticFuns [l] [cafLbl] False cafs + +doSCC dflags staticFuns (CyclicSCC nodes) = do + -- build a single SRT for the whole cycle, see Note [recursive SRTs] + let (blockids, lbls, cafsets) = unzip3 nodes + cafs = Set.unions cafsets + oneSRT dflags staticFuns blockids lbls False cafs + + +{- Note [recursive SRTs] + +If the dependency analyser has found us a recursive group of +declarations, then we build a single SRT for the whole group, on the +grounds that everything in the group is reachable from everything +else, so we lose nothing by having a single SRT. + +However, there are a couple of wrinkles to be aware of. + +* The Set CAFLabel for this SRT will contain labels in the group +itself. The SRTMap will therefore not contain entries for these labels +yet, so we can't turn them into SRTEntries using resolveCAF. BUT we +can just remove recursive references from the Set CAFLabel before +generating the SRT - the SRT will still contain all the CAFLabels that +we need to refer to from this group's SRT. + +* That is, EXCEPT for static function closures. For the same reason +described in Note [Invalid optimisation: shortcutting], we cannot omit +references to static function closures. + - But, since we will merge the SRT with one of the static function + closures (see [FUN]), we can omit references to *that* static + function closure from the SRT. +-} + +-- | Build an SRT for a set of blocks +oneSRT + :: DynFlags + -> LabelMap CLabel -- which blocks are static function entry points + -> [Label] -- blocks in this set + -> [CAFLabel] -- labels for those blocks + -> Bool -- True <=> this SRT is for a CAF + -> Set CAFLabel -- SRT for this set + -> StateT SRTMap + (StateT ModuleSRTInfo UniqSM) + ( [CmmDecl] -- SRT objects we built + , [(Label, CLabel)] -- SRT fields for these blocks' itbls + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) + +oneSRT dflags staticFuns blockids lbls isCAF cafs = do + srtMap <- get + topSRT <- lift get + let + -- Can we merge this SRT with a FUN_STATIC closure? + (maybeFunClosure, otherFunLabels) = + case [ (l,b) | b <- blockids, Just l <- [mapLookup b staticFuns] ] of + [] -> (Nothing, []) + ((l,b):xs) -> (Just (l,b), map (mkCAFLabel . fst) xs) + + -- Remove recursive references from the SRT, except for (all but + -- one of the) static functions. See Note [recursive SRTs]. + nonRec = cafs `Set.difference` + Set.fromList lbls `Set.difference` Set.fromList otherFunLabels + + -- First resolve all the CAFLabels to SRTEntries + -- Implements the [Inline] optimisation. + resolved = + Set.fromList $ + catMaybes (map (resolveCAF srtMap) (Set.toList nonRec)) + + -- The set of all SRTEntries in SRTs that we refer to from here. + allBelow = + Set.unions [ lbls | caf <- Set.toList resolved + , Just lbls <- [Map.lookup caf (flatSRTs topSRT)] ] + + -- Remove SRTEntries that are also in an SRT that we refer to. + -- Implements the [Filter] optimisation. + filtered = Set.difference resolved allBelow + + srtTrace "oneSRT:" + (ppr cafs <+> ppr resolved <+> ppr allBelow <+> ppr filtered) $ return () + + let + isStaticFun = isJust maybeFunClosure + + -- For a label without a closure (e.g. a continuation), we must + -- update the SRTMap for the label to point to a closure. It's + -- important that we don't do this for static functions or CAFs, + -- see Note [Invalid optimisation: shortcutting]. + updateSRTMap srtEntry = + when (not isCAF && not isStaticFun) $ do + let newSRTMap = Map.fromList [(cafLbl, srtEntry) | cafLbl <- lbls] + put (Map.union newSRTMap srtMap) + + this_mod = thisModule topSRT + + case Set.toList filtered of + [] -> do + srtTrace "oneSRT: empty" (ppr lbls) $ return () + updateSRTMap Nothing + return ([], [], []) + + -- [Inline] - when we have only one entry there is no need to + -- build an SRT object at all, instead we put the singleton SRT + -- entry in the info table. + [one@(SRTEntry lbl)] + | -- Info tables refer to SRTs by offset (as noted in the section + -- "Referring to an SRT from the info table" of Note [SRTs]). However, + -- when dynamic linking is used we cannot guarantee that the offset + -- between the SRT and the info table will fit in the offset field. + -- Consequently we build a singleton SRT in in this case. + not (labelDynamic dflags this_mod lbl) + + -- MachO relocations can't express offsets between compilation units at + -- all, so we are always forced to build a singleton SRT in this case. + && (not (osMachOTarget $ platformOS $ targetPlatform dflags) + || isLocalCLabel this_mod lbl) -> do + + -- If we have a static function closure, then it becomes the + -- SRT object, and everything else points to it. (the only way + -- we could have multiple labels here is if this is a + -- recursive group, see Note [recursive SRTs]) + case maybeFunClosure of + Just (staticFunLbl,staticFunBlock) -> return ([], withLabels, []) + where + withLabels = + [ (b, if b == staticFunBlock then lbl else staticFunLbl) + | b <- blockids ] + Nothing -> do + updateSRTMap (Just one) + return ([], map (,lbl) blockids, []) + + cafList -> + -- Check whether an SRT with the same entries has been emitted already. + -- Implements the [Common] optimisation. + case Map.lookup filtered (dedupSRTs topSRT) of + Just srtEntry@(SRTEntry srtLbl) -> do + srtTrace "oneSRT [Common]" (ppr lbls <+> ppr srtLbl) $ return () + updateSRTMap (Just srtEntry) + return ([], map (,srtLbl) blockids, []) + Nothing -> do + -- No duplicates: we have to build a new SRT object + srtTrace "oneSRT: new" (ppr lbls <+> ppr filtered) $ return () + (decls, funSRTs, srtEntry) <- + case maybeFunClosure of + Just (fun,block) -> + return ( [], [(block, cafList)], SRTEntry fun ) + Nothing -> do + (decls, entry) <- lift . lift $ buildSRTChain dflags cafList + return (decls, [], entry) + updateSRTMap (Just srtEntry) + let allBelowThis = Set.union allBelow filtered + oldFlatSRTs = flatSRTs topSRT + newFlatSRTs = Map.insert srtEntry allBelowThis oldFlatSRTs + newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT) + lift (put (topSRT { dedupSRTs = newDedupSRTs + , flatSRTs = newFlatSRTs })) + let SRTEntry lbl = srtEntry + return (decls, map (,lbl) blockids, funSRTs) + + +-- | build a static SRT object (or a chain of objects) from a list of +-- SRTEntries. +buildSRTChain + :: DynFlags + -> [SRTEntry] + -> UniqSM + ( [CmmDecl] -- The SRT object(s) + , SRTEntry -- label to use in the info table + ) +buildSRTChain _ [] = panic "buildSRT: empty" +buildSRTChain dflags cafSet = + case splitAt mAX_SRT_SIZE cafSet of + (these, []) -> do + (decl,lbl) <- buildSRT dflags these + return ([decl], lbl) + (these,those) -> do + (rest, rest_lbl) <- buildSRTChain dflags (head these : those) + (decl,lbl) <- buildSRT dflags (rest_lbl : tail these) + return (decl:rest, lbl) + where + mAX_SRT_SIZE = 16 + + +buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDecl, SRTEntry) +buildSRT dflags refs = do + id <- getUniqueM + let + lbl = mkSRTLabel id + srt_n_info = mkSRTInfoLabel (length refs) + fields = + mkStaticClosure dflags srt_n_info dontCareCCS + [ CmmLabel lbl | SRTEntry lbl <- refs ] + [] -- no padding + [mkIntCLit dflags 0] -- link field + [] -- no saved info + return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl) + + +-- | Update info tables with references to their SRTs. Also generate +-- static closures, splicing in SRT fields as necessary. +updInfoSRTs + :: DynFlags + -> LabelMap CLabel -- SRT labels for each block + -> LabelMap [SRTEntry] -- SRTs to merge into FUN_STATIC closures + -> CmmDecl + -> [CmmDecl] + +updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g) + | Just (_,closure) <- maybeStaticClosure = [ proc, closure ] + | otherwise = [ proc ] + where + proc = CmmProc top_info { info_tbls = newTopInfo } top_l live g + newTopInfo = mapMapWithKey updInfoTbl (info_tbls top_info) + updInfoTbl l info_tbl + | l == g_entry g, Just (inf, _) <- maybeStaticClosure = inf + | otherwise = info_tbl { cit_srt = mapLookup l srt_env } + + -- Generate static closures [FUN]. Note that this also generates + -- static closures for thunks (CAFs), because it's easier to treat + -- them uniformly in the code generator. + maybeStaticClosure :: Maybe (CmmInfoTable, CmmDecl) + maybeStaticClosure + | Just info_tbl@CmmInfoTable{..} <- + mapLookup (g_entry g) (info_tbls top_info) + , Just (id, ccs) <- cit_clo + , isStaticRep cit_rep = + let + (newInfo, srtEntries) = case mapLookup (g_entry g) funSRTEnv of + Nothing -> + -- if we don't add SRT entries to this closure, then we + -- want to set the srt field in its info table as usual + (info_tbl { cit_srt = mapLookup (g_entry g) srt_env }, []) + Just srtEntries -> srtTrace "maybeStaticFun" (ppr res) + (info_tbl { cit_rep = new_rep }, res) + where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ] + fields = mkStaticClosureFields dflags info_tbl ccs (idCafInfo id) + srtEntries + new_rep = case cit_rep of + HeapRep sta ptrs nptrs ty -> + HeapRep sta (ptrs + length srtEntries) nptrs ty + _other -> panic "maybeStaticFun" + lbl = mkLocalClosureLabel (idName id) (idCafInfo id) + in + Just (newInfo, mkDataLits (Section Data lbl) lbl fields) + | otherwise = Nothing + +updInfoSRTs _ _ _ t = [t] + + +srtTrace :: String -> SDoc -> b -> b +-- srtTrace = pprTrace +srtTrace _ _ b = b diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index 440ee5634f..e1067e9519 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - module CmmCallConv ( ParamLocation(..), assignArgumentsPos, @@ -7,7 +5,7 @@ module CmmCallConv ( realArgRegsCover ) where -#include "HsVersions.h" +import GhcPrelude import CmmExpr import SMRep @@ -129,9 +127,10 @@ assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args) assign_stk offset assts (r:rs) = assign_stk off' ((r, StackParam off') : assts) rs where w = typeWidth (arg_ty r) - size = (((widthInBytes w - 1) `div` word_size) + 1) * word_size off' = offset + size - word_size = wORD_SIZE dflags + -- Stack arguments always take a whole number of words, we never + -- pack them unlike constructor fields. + size = roundUpToWords dflags (widthInBytes w) ----------------------------------------------------------------------------- -- Local information about the registers available diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs index 3c23e70b8c..1af9a84028 100644 --- a/compiler/cmm/CmmCommonBlockElim.hs +++ b/compiler/cmm/CmmCommonBlockElim.hs @@ -1,17 +1,19 @@ -{-# LANGUAGE GADTs, BangPatterns #-} +{-# LANGUAGE GADTs, BangPatterns, ScopedTypeVariables #-} + module CmmCommonBlockElim ( elimCommonBlocks ) where +import GhcPrelude hiding (iterate, succ, unzip, zip) + import BlockId import Cmm import CmmUtils import CmmSwitch (eqSwitchTargetWith) import CmmContFlowOpt -- import PprCmm () -import Prelude hiding (iterate, succ, unzip, zip) import Hoopl.Block import Hoopl.Graph @@ -23,11 +25,11 @@ import qualified Data.List as List import Data.Word import qualified Data.Map as M import Outputable -import UniqFM -import UniqDFM import qualified TrieMap as TM +import UniqFM import Unique import Control.Arrow (first, second) +import Data.List (foldl') -- ----------------------------------------------------------------------------- -- Eliminate common blocks @@ -62,7 +64,11 @@ elimCommonBlocks :: CmmGraph -> CmmGraph elimCommonBlocks g = replaceLabels env $ copyTicks env g where env = iterate mapEmpty blocks_with_key - groups = groupByInt hash_block (postorderDfs g) + -- The order of blocks doesn't matter here. While we could use + -- revPostorder which drops unreachable blocks this is done in + -- ContFlowOpt already which runs before this pass. So we use + -- toBlockList since it is faster. + groups = groupByInt hash_block (toBlockList g) :: [[CmmBlock]] blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups] -- Invariant: The blocks in the list are pairwise distinct @@ -90,6 +96,8 @@ iterate subst blocks subst' = subst `mapUnion` new_substs updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks +-- Combine two lists of blocks. +-- While they are internally distinct they can still share common blocks. mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks) mergeBlocks subst existing new = go new where @@ -165,14 +173,14 @@ hash_block block = hash_lit (CmmVec ls) = hash_list hash_lit ls hash_lit (CmmLabel _) = 119 -- ugh hash_lit (CmmLabelOff _ i) = cvt $ 199 + i - hash_lit (CmmLabelDiffOff _ _ i) = cvt $ 299 + i + hash_lit (CmmLabelDiffOff _ _ i _) = cvt $ 299 + i hash_lit (CmmBlock _) = 191 -- ugh hash_lit (CmmHighStackMark) = cvt 313 hash_tgt (ForeignTarget e _) = hash_e e hash_tgt (PrimTarget _) = 31 -- lots of these - hash_list f = foldl (\z x -> f x + z) (0::Word32) + hash_list f = foldl' (\z x -> f x + z) (0::Word32) cvt = fromInteger . toInteger @@ -208,7 +216,7 @@ eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2) = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2 eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1) (CmmUnsafeForeignCall t2 r2 a2) - = t1 == t2 && r1 == r2 && and (zipWith (eqExprWith eqBid) a1 a2) + = t1 == t2 && r1 == r2 && eqListWith (eqExprWith eqBid) a1 a2 eqMiddleWith _ _ _ = False eqExprWith :: (BlockId -> BlockId -> Bool) @@ -223,7 +231,7 @@ eqExprWith eqBid = eq CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2 _e1 `eq` _e2 = False - xs `eqs` ys = and (zipWith eq xs ys) + xs `eqs` ys = eqListWith eq xs ys eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2 eqLit l1 l2 = l1 == l2 @@ -246,7 +254,7 @@ eqBlockBodyWith eqBid block block' (_,m',l') = blockSplit block' nodes' = filter (not . dont_care) (blockToList m') - equal = and (zipWith (eqMiddleWith eqBid) nodes nodes') && + equal = eqListWith (eqMiddleWith eqBid) nodes nodes' && eqLastWith eqBid l l' @@ -265,6 +273,11 @@ eqMaybeWith eltEq (Just e) (Just e') = eltEq e e' eqMaybeWith _ Nothing Nothing = True eqMaybeWith _ _ _ = False +eqListWith :: (a -> b -> Bool) -> [a] -> [b] -> Bool +eqListWith f (a : as) (b : bs) = f a b && eqListWith f as bs +eqListWith _ [] [] = True +eqListWith _ _ _ = False + -- | Given a block map, ensure that all "target" blocks are covered by -- the same ticks as the respective "source" blocks. This not only -- means copying ticks, but also adjusting tick scopes where @@ -275,8 +288,8 @@ copyTicks env g | otherwise = ofBlockMap (g_entry g) $ mapMap copyTo blockMap where -- Reverse block merge map blockMap = toBlockMap g - revEnv = mapFoldWithKey insertRev M.empty env - insertRev k x = M.insertWith (const (k:)) x [k] + revEnv = mapFoldlWithKey insertRev M.empty env + insertRev m k x = M.insertWith (const (k:)) x [k] m -- Copy ticks and scopes into the given block copyTo block = case M.lookup (entryLabel block) revEnv of Nothing -> block @@ -289,17 +302,21 @@ copyTicks env g foldr blockCons code (map CmmTick ticks) -- Group by [Label] -groupByLabel :: [(Key, a)] -> [(Key, [a])] -groupByLabel = go (TM.emptyTM :: TM.ListMap UniqDFM a) - where - go !m [] = TM.foldTM (:) m [] - go !m ((k,v) : entries) = go (TM.alterTM k' adjust m) entries - where k' = map getUnique k - adjust Nothing = Just (k,[v]) - adjust (Just (_,vs)) = Just (k,v:vs) - +-- See Note [Compressed TrieMap] in coreSyn/TrieMap about the usage of GenMap. +groupByLabel :: [(Key, DistinctBlocks)] -> [(Key, [DistinctBlocks])] +groupByLabel = + go (TM.emptyTM :: TM.ListMap (TM.GenMap LabelMap) (Key, [DistinctBlocks])) + where + go !m [] = TM.foldTM (:) m [] + go !m ((k,v) : entries) = go (TM.alterTM k adjust m) entries + where --k' = map (getKey . getUnique) k + adjust Nothing = Just (k,[v]) + adjust (Just (_,vs)) = Just (k,v:vs) groupByInt :: (a -> Int) -> [a] -> [[a]] groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs - -- See Note [Unique Determinism and code generation] - where go m x = alterUFM (Just . maybe [x] (x:)) m (f x) + -- See Note [Unique Determinism and code generation] + where + go m x = alterUFM addEntry m (f x) + where + addEntry xs = Just $! maybe [x] (x:) xs diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 219b68e42a..92dd7abba5 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -1,4 +1,5 @@ {-# LANGUAGE GADTs #-} +{-# LANGUAGE BangPatterns #-} {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} module CmmContFlowOpt ( cmmCfgOpts @@ -8,6 +9,8 @@ module CmmContFlowOpt ) where +import GhcPrelude hiding (succ, unzip, zip) + import Hoopl.Block import Hoopl.Collections import Hoopl.Graph @@ -21,7 +24,6 @@ import Panic import Util import Control.Monad -import Prelude hiding (succ, unzip, zip) -- Note [What is shortcutting] @@ -53,7 +55,7 @@ import Prelude hiding (succ, unzip, zip) -- -- This optimisation does three things: -- --- - If a block finishes in an unconditonal branch to another block +-- - If a block finishes in an unconditional branch to another block -- and that is the only jump to that block we concatenate the -- destination block at the end of the current one. -- @@ -171,11 +173,10 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } | otherwise = (entry_id, shortcut_map) - -- blocks is a list of blocks in DFS postorder, while blockmap is - -- a map of blocks. We process each element from blocks and update - -- blockmap accordingly - blocks = postorderDfs g - blockmap = foldr addBlock emptyBody blocks + -- blocks are sorted in reverse postorder, but we want to go from the exit + -- towards beginning, so we use foldr below. + blocks = revPostorder g + blockmap = foldl' (flip addBlock) emptyBody blocks -- Accumulator contains three components: -- * map of blocks in a graph @@ -194,7 +195,7 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } maybe_concat :: CmmBlock -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) - maybe_concat block (blocks, shortcut_map, backEdges) + maybe_concat block (!blocks, !shortcut_map, !backEdges) -- If: -- (1) current block ends with unconditional branch to b' and -- (2) it has exactly one predecessor (namely, current block) @@ -252,8 +253,8 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } -- unconditional jump to a block that can be shortcut. | Nothing <- callContinuation_maybe last = let oldSuccs = successors last - newSuccs = successors swapcond_last - in ( mapInsert bid (blockJoinTail head swapcond_last) blocks + newSuccs = successors rewrite_last + in ( mapInsert bid (blockJoinTail head rewrite_last) blocks , shortcut_map , if oldSuccs == newSuccs then backEdges @@ -281,34 +282,58 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } Just b | Just dest <- canShortcut b -> dest _otherwise -> l - -- For a conditional, we invert the conditional if that would make it - -- more likely that the branch-not-taken case becomes a fallthrough. - -- This helps the native codegen a little bit, and probably has no - -- effect on LLVM. It's convenient to do it here, where we have the - -- information about predecessors. - swapcond_last + rewrite_last + -- Sometimes we can get rid of the conditional completely. + | CmmCondBranch _cond t f _l <- shortcut_last + , t == f + = CmmBranch t + + -- See Note [Invert Cmm conditionals] | CmmCondBranch cond t f l <- shortcut_last - , likelyFalse l - , numPreds f > 1 - , hasOnePredecessor t + , hasOnePredecessor t -- inverting will make t a fallthrough + , likelyTrue l || (numPreds f > 1) , Just cond' <- maybeInvertCmmExpr cond = CmmCondBranch cond' f t (invertLikeliness l) | otherwise = shortcut_last - likelyFalse (Just False) = True - likelyFalse Nothing = True - likelyFalse _ = False + likelyTrue (Just True) = True + likelyTrue _ = False - invertLikeliness (Just b) = Just (not b) - invertLikeliness Nothing = Nothing + invertLikeliness :: Maybe Bool -> Maybe Bool + invertLikeliness = fmap not -- Number of predecessors for a block numPreds bid = mapLookup bid backEdges `orElse` 0 hasOnePredecessor b = numPreds b == 1 +{- + Note [Invert Cmm conditionals] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The native code generator always produces jumps to the true branch. + Falling through to the false branch is however faster. So we try to + arrange for that to happen. + This means we invert the condition if: + * The likely path will become a fallthrough. + * We can't guarantee a fallthrough for the false branch but for the + true branch. + + In some cases it's faster to avoid inverting when the false branch is likely. + However determining when that is the case is neither easy nor cheap so for + now we always invert as this produces smaller binaries and code that is + equally fast on average. (On an i7-6700K) + + TODO: + There is also the edge case when both branches have multiple predecessors. + In this case we could assume that we will end up with a jump for BOTH + branches. In this case it might be best to put the likely path in the true + branch especially if there are large numbers of predecessors as this saves + us the jump thats not taken. However I haven't tested this and as of early + 2018 we almost never generate cmm where this would apply. +-} + -- Functions for incrementing and decrementing number of predecessors. If -- decrementing would set the predecessor count to 0, we remove entry from the -- map. @@ -406,14 +431,14 @@ removeUnreachableBlocksProc proc@(CmmProc info lbl live g) -- Remove any info_tbls for unreachable keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable - keep_used bs = mapFoldWithKey keep mapEmpty bs + keep_used bs = mapFoldlWithKey keep mapEmpty bs - keep :: Label -> CmmInfoTable -> LabelMap CmmInfoTable -> LabelMap CmmInfoTable - keep l i env | l `setMember` used_lbls = mapInsert l i env + keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable + keep env l i | l `setMember` used_lbls = mapInsert l i env | otherwise = env used_blocks :: [CmmBlock] - used_blocks = postorderDfs g + used_blocks = revPostorder g used_lbls :: LabelSet - used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks + used_lbls = setFromList $ map entryLabel used_blocks diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index bb610a0b88..d129d601f4 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -1,5 +1,4 @@ {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -10,7 +9,10 @@ module CmmExpr , CmmReg(..), cmmRegType , CmmLit(..), cmmLitType , LocalReg(..), localRegType - , GlobalReg(..), isArgReg, globalRegType, spReg, hpReg, spLimReg, nodeReg, node, baseReg + , GlobalReg(..), isArgReg, globalRegType + , spReg, hpReg, spLimReg, hpLimReg, nodeReg + , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg + , node, baseReg , VGcPtr(..) , DefinerOfRegs, UserOfRegs @@ -28,6 +30,8 @@ module CmmExpr ) where +import GhcPrelude + import BlockId import CLabel import CmmMachOp @@ -37,7 +41,6 @@ import Outputable (panic) import Unique import Data.Set (Set) -import Data.List import qualified Data.Set as Set ----------------------------------------------------------------------------- @@ -184,7 +187,14 @@ data CmmLit -- Don't use it at all unless tablesNextToCode. -- It is also used inside the NCG during when generating -- position-independent code. - | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset + | CmmLabelDiffOff CLabel CLabel Int Width -- label1 - label2 + offset + -- In an expression, the width just has the effect of MO_SS_Conv + -- from wordWidth to the desired width. + -- + -- In a static literal, the supported Widths depend on the + -- architecture: wordWidth is supported on all + -- architectures. Additionally W32 is supported on x86_64 when + -- using the small memory model. | CmmBlock {-# UNPACK #-} !BlockId -- Code label -- Invariant: must be a continuation BlockId @@ -217,7 +227,7 @@ cmmLitType cflags (CmmVec (l:ls)) = let ty = cmmLitType cflags l else panic "cmmLitType: CmmVec" cmmLitType dflags (CmmLabel lbl) = cmmLabelType dflags lbl cmmLitType dflags (CmmLabelOff lbl _) = cmmLabelType dflags lbl -cmmLitType dflags (CmmLabelDiffOff {}) = bWord dflags +cmmLitType _ (CmmLabelDiffOff _ _ _ width) = cmmBits width cmmLitType dflags (CmmBlock _) = bWord dflags cmmLitType dflags (CmmHighStackMark) = bWord dflags @@ -549,12 +559,18 @@ instance Ord GlobalReg where compare _ EagerBlackholeInfo = GT -- convenient aliases -baseReg, spReg, hpReg, spLimReg, nodeReg :: CmmReg +baseReg, spReg, hpReg, spLimReg, hpLimReg, nodeReg, + currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg :: CmmReg baseReg = CmmGlobal BaseReg spReg = CmmGlobal Sp hpReg = CmmGlobal Hp +hpLimReg = CmmGlobal HpLim spLimReg = CmmGlobal SpLim nodeReg = CmmGlobal node +currentTSOReg = CmmGlobal CurrentTSO +currentNurseryReg = CmmGlobal CurrentNursery +hpAllocReg = CmmGlobal HpAlloc +cccsReg = CmmGlobal CCCS node :: GlobalReg node = VanillaReg 1 VGcPtr diff --git a/compiler/cmm/CmmImplementSwitchPlans.hs b/compiler/cmm/CmmImplementSwitchPlans.hs index eda031e840..2e2da5d305 100644 --- a/compiler/cmm/CmmImplementSwitchPlans.hs +++ b/compiler/cmm/CmmImplementSwitchPlans.hs @@ -4,6 +4,8 @@ module CmmImplementSwitchPlans ) where +import GhcPrelude + import Hoopl.Block import BlockId import Cmm diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index e849c810ef..43cba2526d 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -34,6 +34,8 @@ module CmmInfo ( #include "HsVersions.h" +import GhcPrelude + import Cmm import CmmUtils import CLabel @@ -43,6 +45,7 @@ import Stream (Stream) import qualified Stream import Hoopl.Collections +import Platform import Maybes import DynFlags import Panic @@ -60,7 +63,8 @@ mkEmptyContInfoTable info_lbl = CmmInfoTable { cit_lbl = info_lbl , cit_rep = mkStackRep [] , cit_prof = NoProfilingInfo - , cit_srt = NoC_SRT } + , cit_srt = Nothing + , cit_clo = Nothing } cmmToRawCmm :: DynFlags -> Stream IO CmmGroup () -> IO (Stream IO RawCmmGroup ()) @@ -186,7 +190,7 @@ mkInfoTableContents dflags | StackRep frame <- smrep = do { (prof_lits, prof_data) <- mkProfLits dflags prof - ; let (srt_label, srt_bitmap) = mkSRTLit dflags srt + ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt ; (liveness_lit, liveness_data) <- mkLivenessBits dflags frame ; let std_info = mkStdInfoTable dflags prof_lits rts_tag srt_bitmap liveness_lit @@ -199,7 +203,7 @@ mkInfoTableContents dflags | HeapRep _ ptrs nonptrs closure_type <- smrep = do { let layout = packIntsCLit dflags ptrs nonptrs ; (prof_lits, prof_data) <- mkProfLits dflags prof - ; let (srt_label, srt_bitmap) = mkSRTLit dflags srt + ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt ; (mb_srt_field, mb_layout, extra_bits, ct_data) <- mk_pieces closure_type srt_label ; let std_info = mkStdInfoTable dflags prof_lits @@ -209,20 +213,22 @@ mkInfoTableContents dflags ; return (prof_data ++ ct_data, (std_info, extra_bits)) } where mk_pieces :: ClosureTypeInfo -> [CmmLit] - -> UniqSM ( Maybe StgHalfWord -- Override the SRT field with this - , Maybe CmmLit -- Override the layout field with this + -> UniqSM ( Maybe CmmLit -- Override the SRT field with this + , Maybe CmmLit -- Override the layout field with this , [CmmLit] -- "Extra bits" for info table , [RawCmmDecl]) -- Auxiliary data decls mk_pieces (Constr con_tag con_descr) _no_srt -- A data constructor = do { (descr_lit, decl) <- newStringLit con_descr - ; return ( Just (toStgHalfWord dflags (fromIntegral con_tag)) + ; return ( Just (CmmInt (fromIntegral con_tag) + (halfWordWidth dflags)) , Nothing, [descr_lit], [decl]) } mk_pieces Thunk srt_label = return (Nothing, Nothing, srt_label, []) mk_pieces (ThunkSelector offset) _no_srt - = return (Just (toStgHalfWord dflags 0), Just (mkWordCLit dflags (fromIntegral offset)), [], []) + = return (Just (CmmInt 0 (halfWordWidth dflags)), + Just (mkWordCLit dflags (fromIntegral offset)), [], []) -- Layout known (one free var); we use the layout field for offset mk_pieces (Fun arity (ArgSpec fun_type)) srt_label @@ -233,8 +239,9 @@ mkInfoTableContents dflags = do { (liveness_lit, liveness_data) <- mkLivenessBits dflags arg_bits ; let fun_type | null liveness_data = aRG_GEN | otherwise = aRG_GEN_BIG - extra_bits = [ packIntsCLit dflags fun_type arity - , srt_lit, liveness_lit, slow_entry ] + extra_bits = [ packIntsCLit dflags fun_type arity ] + ++ (if inlineSRT dflags then [] else [ srt_lit ]) + ++ [ liveness_lit, slow_entry ] ; return (Nothing, Nothing, extra_bits, liveness_data) } where slow_entry = CmmLabel (toSlowEntryLbl info_lbl) @@ -253,12 +260,24 @@ packIntsCLit dflags a b = packHalfWordsCLit dflags mkSRTLit :: DynFlags - -> C_SRT + -> CLabel + -> Maybe CLabel -> ([CmmLit], -- srt_label, if any - StgHalfWord) -- srt_bitmap -mkSRTLit dflags NoC_SRT = ([], toStgHalfWord dflags 0) -mkSRTLit dflags (C_SRT lbl off bitmap) = ([cmmLabelOffW dflags lbl off], bitmap) + CmmLit) -- srt_bitmap +mkSRTLit dflags info_lbl (Just lbl) + | inlineSRT dflags + = ([], CmmLabelDiffOff lbl info_lbl 0 (halfWordWidth dflags)) +mkSRTLit dflags _ Nothing = ([], CmmInt 0 (halfWordWidth dflags)) +mkSRTLit dflags _ (Just lbl) = ([CmmLabel lbl], CmmInt 1 (halfWordWidth dflags)) + +-- | Is the SRT offset field inline in the info table on this platform? +-- +-- See the section "Referring to an SRT from the info table" in +-- Note [SRTs] in CmmBuildInfoTables.hs +inlineSRT :: DynFlags -> Bool +inlineSRT dflags = platformArch (targetPlatform dflags) == ArchX86_64 + && tablesNextToCode dflags ------------------------------------------------------------------------- -- @@ -290,10 +309,10 @@ makeRelativeRefTo :: DynFlags -> CLabel -> CmmLit -> CmmLit makeRelativeRefTo dflags info_lbl (CmmLabel lbl) | tablesNextToCode dflags - = CmmLabelDiffOff lbl info_lbl 0 + = CmmLabelDiffOff lbl info_lbl 0 (wordWidth dflags) makeRelativeRefTo dflags info_lbl (CmmLabelOff lbl off) | tablesNextToCode dflags - = CmmLabelDiffOff lbl info_lbl off + = CmmLabelDiffOff lbl info_lbl off (wordWidth dflags) makeRelativeRefTo _ _ lit = lit @@ -366,23 +385,23 @@ mkStdInfoTable :: DynFlags -> (CmmLit,CmmLit) -- Closure type descr and closure descr (profiling) -> Int -- Closure RTS tag - -> StgHalfWord -- SRT length + -> CmmLit -- SRT length -> CmmLit -- layout field -> [CmmLit] -mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt_len layout_lit +mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt layout_lit = -- Parallel revertible-black hole field prof_info -- Ticky info (none at present) -- Debug info (none at present) - ++ [layout_lit, type_lit] + ++ [layout_lit, tag, srt] where prof_info | gopt Opt_SccProfilingOn dflags = [type_descr, closure_descr] | otherwise = [] - type_lit = packHalfWordsCLit dflags (toStgHalfWord dflags (fromIntegral cl_type)) srt_len + tag = CmmInt (fromIntegral cl_type) (halfWordWidth dflags) ------------------------------------------------------------------------- -- @@ -415,9 +434,19 @@ srtEscape dflags = toStgHalfWord dflags (-1) -- ------------------------------------------------------------------------- +-- | Wrap a 'CmmExpr' in an alignment check when @-falignment-sanitisation@ is +-- enabled. +wordAligned :: DynFlags -> CmmExpr -> CmmExpr +wordAligned dflags e + | gopt Opt_AlignmentSanitisation dflags + = CmmMachOp (MO_AlignmentCheck (wORD_SIZE dflags) (wordWidth dflags)) [e] + | otherwise + = e + closureInfoPtr :: DynFlags -> CmmExpr -> CmmExpr -- Takes a closure pointer and returns the info table pointer -closureInfoPtr dflags e = CmmLoad e (bWord dflags) +closureInfoPtr dflags e = + CmmLoad (wordAligned dflags e) (bWord dflags) entryCode :: DynFlags -> CmmExpr -> CmmExpr -- Takes an info pointer (the first word of a closure) diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 4151aa0c4e..1d6c209953 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -1,8 +1,10 @@ -{-# LANGUAGE BangPatterns, CPP, RecordWildCards, GADTs #-} +{-# LANGUAGE BangPatterns, RecordWildCards, GADTs #-} module CmmLayoutStack ( cmmLayoutStack, setInfoTableStackMap ) where +import GhcPrelude hiding ((<*>)) + import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX layering violation import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX layering violation @@ -35,11 +37,7 @@ import qualified Data.Set as Set import Control.Monad.Fix import Data.Array as Array import Data.Bits -import Data.List (nub) - -import Prelude hiding ((<*>)) - -#include "HsVersions.h" +import Data.List (nub, foldl') {- Note [Stack Layout] @@ -246,7 +244,7 @@ cmmLayoutStack dflags procpoints entry_args -- We need liveness info. Dead assignments are removed later -- by the sinking pass. let liveness = cmmLocalLiveness dflags graph - blocks = postorderDfs graph + blocks = revPostorder graph (final_stackmaps, _final_high_sp, new_blocks) <- mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> @@ -324,7 +322,7 @@ layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high -- Sp = Sp + sp_off -- Sp adjustment goes here -- last1 -- the last node -- - let middle_pre = blockToList $ foldl blockSnoc middle0 middle1 + let middle_pre = blockToList $ foldl' blockSnoc middle0 middle1 let final_blocks = manifestSp dflags final_stackmaps stack0 sp0 final_sp_high @@ -579,15 +577,8 @@ makeFixupBlock dflags sp0 l stack tscope assigs | otherwise = do tmp_lbl <- newBlockId let sp_off = sp0 - sm_sp stack - maybeAddUnwind block - | debugLevel dflags > 0 - = block `blockSnoc` CmmUnwind [(Sp, Just unwind_val)] - | otherwise - = block - where unwind_val = cmmOffset dflags (CmmReg spReg) (sm_sp stack) block = blockJoin (CmmEntry tmp_lbl tscope) - ( maybeAddSpAdj dflags sp_off - $ maybeAddUnwind + ( maybeAddSpAdj dflags sp0 sp_off $ blockFromList assigs ) (CmmBranch l) return (tmp_lbl, [block]) @@ -853,28 +844,7 @@ manifestSp dflags stackmaps stack0 sp0 sp_high adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off) adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off) - -- Add unwind pseudo-instruction at the beginning of each block to - -- document Sp level for debugging - add_initial_unwind block - | debugLevel dflags > 0 - = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block - | otherwise - = block - where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags) - - -- Add unwind pseudo-instruction right before the Sp adjustment - -- if there is one. - add_adj_unwind block - | debugLevel dflags > 0 - , sp_off /= 0 - = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)] - | otherwise - = block - where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off) - - final_middle = maybeAddSpAdj dflags sp_off - . add_adj_unwind - . add_initial_unwind + final_middle = maybeAddSpAdj dflags sp0 sp_off . blockFromList . map adj_pre_sp . elimStackStores stack0 stackmaps area_off @@ -893,11 +863,33 @@ getAreaOff stackmaps (Young l) = Nothing -> pprPanic "getAreaOff" (ppr l) -maybeAddSpAdj :: DynFlags -> ByteOff -> Block CmmNode O O -> Block CmmNode O O -maybeAddSpAdj _ 0 block = block -maybeAddSpAdj dflags sp_off block = block `blockSnoc` adj +maybeAddSpAdj + :: DynFlags -> ByteOff -> ByteOff -> Block CmmNode O O -> Block CmmNode O O +maybeAddSpAdj dflags sp0 sp_off block = + add_initial_unwind $ add_adj_unwind $ adj block where - adj = CmmAssign spReg (cmmOffset dflags (CmmReg spReg) sp_off) + adj block + | sp_off /= 0 + = block `blockSnoc` CmmAssign spReg (cmmOffset dflags spExpr sp_off) + | otherwise = block + -- Add unwind pseudo-instruction at the beginning of each block to + -- document Sp level for debugging + add_initial_unwind block + | debugLevel dflags > 0 + = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block + | otherwise + = block + where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags) + + -- Add unwind pseudo-instruction right after the Sp adjustment + -- if there is one. + add_adj_unwind block + | debugLevel dflags > 0 + , sp_off /= 0 + = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)] + | otherwise + = block + where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off) {- Note [SP old/young offsets] @@ -920,7 +912,7 @@ arguments. areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n) - = cmmOffset dflags (CmmReg spReg) (sp_old - area_off area - n) + = cmmOffset dflags spExpr (sp_old - area_off area - n) -- Replace (CmmStackSlot area n) with an offset from Sp areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark) @@ -1090,7 +1082,7 @@ insertReloads dflags stackmap live = [ CmmAssign (CmmLocal reg) -- This cmmOffset basically corresponds to manifesting -- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets] - (CmmLoad (cmmOffset dflags (CmmReg spReg) (sp_off - reg_off)) + (CmmLoad (cmmOffset dflags spExpr (sp_off - reg_off)) (localRegType reg)) | (reg, reg_off) <- stackSlotRegs stackmap , reg `elemRegSet` live @@ -1143,7 +1135,7 @@ lowerSafeForeignCall dflags block -- Both 'id' and 'new_base' are KindNonPtr because they're -- RTS-only objects and are not subject to garbage collection id <- newTemp (bWord dflags) - new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg)) + new_base <- newTemp (cmmRegType dflags baseReg) let (caller_save, caller_load) = callerSaveVolatileRegs dflags save_state_code <- saveThreadState dflags load_state_code <- loadThreadState dflags @@ -1154,7 +1146,7 @@ lowerSafeForeignCall dflags block resume = mkMiddle (callResumeThread new_base id) <*> -- Assign the result to BaseReg: we -- might now have a different Capability! - mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*> + mkAssign baseReg (CmmReg (CmmLocal new_base)) <*> caller_load <*> load_state_code @@ -1169,7 +1161,7 @@ lowerSafeForeignCall dflags block -- different. Hence we continue by jumping to the top stack frame, -- not by jumping to succ. jump = CmmCall { cml_target = entryCode dflags $ - CmmLoad (CmmReg spReg) (bWord dflags) + CmmLoad spExpr (bWord dflags) , cml_cont = Just succ , cml_args_regs = regs , cml_args = widthInBytes (wordWidth dflags) @@ -1199,7 +1191,7 @@ callSuspendThread dflags id intrbl = CmmUnsafeForeignCall (ForeignTarget (foreignLbl (fsLit "suspendThread")) (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn)) - [id] [CmmReg (CmmGlobal BaseReg), mkIntExpr dflags (fromEnum intrbl)] + [id] [baseExpr, mkIntExpr dflags (fromEnum intrbl)] callResumeThread :: LocalReg -> LocalReg -> CmmNode O O callResumeThread new_base id = diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x index 82f7bee965..691ca5eb28 100644 --- a/compiler/cmm/CmmLex.x +++ b/compiler/cmm/CmmLex.x @@ -22,6 +22,8 @@ module CmmLex ( CmmToken(..), cmmlex, ) where +import GhcPrelude + import CmmExpr import Lexer @@ -97,6 +99,10 @@ $white_no_nl+ ; "&&" { kw CmmT_BoolAnd } "||" { kw CmmT_BoolOr } + "True" { kw CmmT_True } + "False" { kw CmmT_False } + "likely" { kw CmmT_likely} + P@decimal { global_regN (\n -> VanillaReg n VGcPtr) } R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) } F@decimal { global_regN FloatReg } @@ -178,6 +184,9 @@ data CmmToken | CmmT_Int Integer | CmmT_Float Rational | CmmT_EOF + | CmmT_False + | CmmT_True + | CmmT_likely deriving (Show) -- ----------------------------------------------------------------------------- @@ -264,7 +273,10 @@ reservedWordsFM = listToUFM $ ( "b512", CmmT_bits512 ), ( "f32", CmmT_float32 ), ( "f64", CmmT_float64 ), - ( "gcptr", CmmT_gcptr ) + ( "gcptr", CmmT_gcptr ), + ( "likely", CmmT_likely), + ( "True", CmmT_True ), + ( "False", CmmT_False ) ] tok_decimal span buf len diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 64b4400378..3224bb8cab 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -10,6 +10,8 @@ module CmmLint ( cmmLint, cmmLintGraph ) where +import GhcPrelude + import Hoopl.Block import Hoopl.Collections import Hoopl.Graph diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 944a9e394e..f340c32c8a 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -12,6 +12,8 @@ module CmmLive ) where +import GhcPrelude + import DynFlags import BlockId import Cmm diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index d736f14bfc..c5e9d9bf27 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - module CmmMachOp ( MachOp(..) , pprMachOp, isCommutableMachOp, isAssociativeMachOp @@ -28,7 +26,7 @@ module CmmMachOp ) where -#include "HsVersions.h" +import GhcPrelude import CmmType import Outputable @@ -136,9 +134,12 @@ data MachOp -- Floating point vector operations | MO_VF_Add Length Width | MO_VF_Sub Length Width - | MO_VF_Neg Length Width -- unary - + | MO_VF_Neg Length Width -- unary negation | MO_VF_Mul Length Width | MO_VF_Quot Length Width + + -- Alignment check (for -falignment-sanitisation) + | MO_AlignmentCheck Int Width deriving (Eq, Show) pprMachOp :: MachOp -> SDoc @@ -417,6 +418,8 @@ machOpResultType dflags mop tys = MO_VF_Mul l w -> cmmVec l (cmmFloat w) MO_VF_Quot l w -> cmmVec l (cmmFloat w) MO_VF_Neg l w -> cmmVec l (cmmFloat w) + + MO_AlignmentCheck _ _ -> ty1 where (ty1:_) = tys @@ -507,6 +510,8 @@ machOpArgReps dflags op = MO_VF_Quot _ r -> [r,r] MO_VF_Neg _ r -> [r] + MO_AlignmentCheck _ r -> [r] + ----------------------------------------------------------------------------- -- CallishMachOp ----------------------------------------------------------------------------- @@ -526,6 +531,9 @@ data CallishMachOp | MO_F64_Asin | MO_F64_Acos | MO_F64_Atan + | MO_F64_Asinh + | MO_F64_Acosh + | MO_F64_Atanh | MO_F64_Log | MO_F64_Exp | MO_F64_Fabs @@ -540,6 +548,9 @@ data CallishMachOp | MO_F32_Asin | MO_F32_Acos | MO_F32_Atan + | MO_F32_Asinh + | MO_F32_Acosh + | MO_F32_Atanh | MO_F32_Log | MO_F32_Exp | MO_F32_Fabs @@ -551,6 +562,7 @@ data CallishMachOp | MO_U_QuotRem Width | MO_U_QuotRem2 Width | MO_Add2 Width + | MO_AddWordC Width | MO_SubWordC Width | MO_AddIntC Width | MO_SubIntC Width @@ -575,8 +587,11 @@ data CallishMachOp | MO_Memcpy Int | MO_Memset Int | MO_Memmove Int + | MO_Memcmp Int | MO_PopCnt Width + | MO_Pdep Width + | MO_Pext Width | MO_Clz Width | MO_Ctz Width @@ -607,6 +622,7 @@ callishMachOpHints op = case op of MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint]) MO_Memset _ -> ([], [AddrHint,NoHint,NoHint]) MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint]) + MO_Memcmp _ -> ([], [AddrHint, AddrHint, NoHint]) _ -> ([],[]) -- empty lists indicate NoHint @@ -616,4 +632,5 @@ machOpMemcpyishAlign op = case op of MO_Memcpy align -> Just align MO_Memset align -> Just align MO_Memmove align -> Just align + MO_Memcmp align -> Just align _ -> Nothing diff --git a/compiler/cmm/CmmMonad.hs b/compiler/cmm/CmmMonad.hs index fc66bf5928..f3b4441a9b 100644 --- a/compiler/cmm/CmmMonad.hs +++ b/compiler/cmm/CmmMonad.hs @@ -7,16 +7,15 @@ -- The parser for C-- requires access to a lot more of the 'DynFlags', -- so 'PD' provides access to 'DynFlags' via a 'HasDynFlags' instance. ----------------------------------------------------------------------------- -{-# LANGUAGE CPP #-} module CmmMonad ( PD(..) , liftP ) where +import GhcPrelude + import Control.Monad -#if __GLASGOW_HASKELL__ > 710 import qualified Control.Monad.Fail as MonadFail -#endif import DynFlags import Lexer @@ -32,12 +31,10 @@ instance Applicative PD where instance Monad PD where (>>=) = thenPD - fail = failPD + fail = MonadFail.fail -#if __GLASGOW_HASKELL__ > 710 instance MonadFail.MonadFail PD where fail = failPD -#endif liftP :: P a -> PD a liftP (P f) = PD $ \_ s -> f s diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index f452b0b3f5..286b1e306c 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -22,6 +22,8 @@ module CmmNode ( CmmTickScope(..), isTickSubScope, combineTickScopes, ) where +import GhcPrelude hiding (succ) + import CodeGen.Platform import CmmExpr import CmmSwitch @@ -38,7 +40,6 @@ import Hoopl.Graph import Hoopl.Label import Data.Maybe import Data.List (tails,sortBy) -import Prelude hiding (succ) import Unique (nonDetCmpUnique) import Util diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 3cb28217f2..e837d29783 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -1,10 +1,6 @@ -{-# LANGUAGE CPP #-} - -- The default iteration limit is a bit too low for the definitions -- in this module. -#if __GLASGOW_HASKELL__ >= 800 {-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-} -#endif ----------------------------------------------------------------------------- -- @@ -21,7 +17,7 @@ module CmmOpt ( cmmMachOpFoldM ) where -#include "HsVersions.h" +import GhcPrelude import CmmUtils import Cmm @@ -357,35 +353,51 @@ cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))] MO_U_Quot rep | Just p <- exactLog2 n -> Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)]) + MO_U_Rem rep + | Just _ <- exactLog2 n -> + Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)]) MO_S_Quot rep | Just p <- exactLog2 n, - CmmReg _ <- x -> -- We duplicate x below, hence require + CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require + -- it is a reg. FIXME: remove this restriction. + Just (cmmMachOpFold dflags (MO_S_Shr rep) + [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)]) + MO_S_Rem rep + | Just p <- exactLog2 n, + CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require -- it is a reg. FIXME: remove this restriction. - -- shift right is not the same as quot, because it rounds - -- to minus infinity, whereasq quot rounds toward zero. - -- To fix this up, we add one less than the divisor to the - -- dividend if it is a negative number. - -- - -- to avoid a test/jump, we use the following sequence: - -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve) - -- x2 = y & (divisor-1) - -- result = (x+x2) >>= log2(divisor) - -- this could be done a bit more simply using conditional moves, - -- but we're processor independent here. - -- - -- we optimise the divide by 2 case slightly, generating - -- x1 = x >> word_size-1 (unsigned) - -- return = (x + x1) >>= log2(divisor) - let - bits = fromIntegral (widthInBits rep) - 1 - shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep - x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] - x2 = if p == 1 then x1 else - CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)] - x3 = CmmMachOp (MO_Add rep) [x, x2] - in - Just (cmmMachOpFold dflags (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)]) + -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p). + -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot) + -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation. + Just (cmmMachOpFold dflags (MO_Sub rep) + [x, cmmMachOpFold dflags (MO_And rep) + [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]]) _ -> Nothing + where + -- In contrast with unsigned integers, for signed ones + -- shift right is not the same as quot, because it rounds + -- to minus infinity, whereas quot rounds toward zero. + -- To fix this up, we add one less than the divisor to the + -- dividend if it is a negative number. + -- + -- to avoid a test/jump, we use the following sequence: + -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve) + -- x2 = y & (divisor-1) + -- result = x + x2 + -- this could be done a bit more simply using conditional moves, + -- but we're processor independent here. + -- + -- we optimise the divide by 2 case slightly, generating + -- x1 = x >> word_size-1 (unsigned) + -- return = x + x1 + signedQuotRemHelper :: Width -> Integer -> CmmExpr + signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2] + where + bits = fromIntegral (widthInBits rep) - 1 + shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep + x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] + x2 = if p == 1 then x1 else + CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)] -- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x -- Unfortunately this needs a unique supply because x might not be a @@ -410,14 +422,6 @@ That's what the constant-folding operations on comparison operators do above. -- ----------------------------------------------------------------------------- -- Utils -isLit :: CmmExpr -> Bool -isLit (CmmLit _) = True -isLit _ = False - -isComparisonExpr :: CmmExpr -> Bool -isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op -isComparisonExpr _ = False - isPicReg :: CmmExpr -> Bool isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True isPicReg _ = False diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index e2fe593b5d..8cc988383e 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -200,6 +200,8 @@ necessary to the stack to accommodate it (e.g. 2). { module CmmParse ( parseCmmFile ) where +import GhcPrelude + import StgCmmExtCode import CmmCallConv import StgCmmProf @@ -297,6 +299,10 @@ import qualified Data.Map as M '&&' { L _ (CmmT_BoolAnd) } '||' { L _ (CmmT_BoolOr) } + 'True' { L _ (CmmT_True ) } + 'False' { L _ (CmmT_False) } + 'likely'{ L _ (CmmT_likely)} + 'CLOSURE' { L _ (CmmT_CLOSURE) } 'INFO_TABLE' { L _ (CmmT_INFO_TABLE) } 'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) } @@ -396,8 +402,6 @@ statics :: { [CmmParse [CmmStatic]] } : {- empty -} { [] } | static statics { $1 : $2 } --- Strings aren't used much in the RTS HC code, so it doesn't seem --- worth allowing inline strings. C-- doesn't allow them anyway. static :: { CmmParse [CmmStatic] } : type expr ';' { do e <- $2; return [CmmStaticLit (getLit e)] } @@ -466,7 +470,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } return (mkCmmEntryLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, []) } | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')' @@ -482,7 +486,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } return (mkCmmEntryLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, []) } -- we leave most of the fields zero here. This is only used -- to generate the BCO info table in the RTS at the moment. @@ -500,7 +504,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } return (mkCmmEntryLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, + , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing }, []) } -- If profiling is on, this string gets duplicated, @@ -517,7 +521,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } return (mkCmmEntryLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, []) } | 'INFO_TABLE_RET' '(' NAME ',' INT ')' @@ -528,7 +532,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } return (mkCmmRetLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3 , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, []) } | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')' @@ -538,12 +542,12 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } live <- sequence $7 let prof = NoProfilingInfo -- drop one for the info pointer - bitmap = mkLiveness dflags (map Just (drop 1 live)) + bitmap = mkLiveness dflags (drop 1 live) rep = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap return (mkCmmRetLabel pkg $3, Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3 , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, live) } body :: { CmmParse () } @@ -627,10 +631,10 @@ stmt :: { CmmParse () } { doCall $2 [] $4 } | '(' formals ')' '=' 'call' expr '(' exprs0 ')' ';' { doCall $6 $2 $8 } - | 'if' bool_expr 'goto' NAME - { do l <- lookupLabel $4; cmmRawIf $2 l } - | 'if' bool_expr '{' body '}' else - { cmmIfThenElse $2 (withSourceNote $3 $5 $4) $6 } + | 'if' bool_expr cond_likely 'goto' NAME + { do l <- lookupLabel $5; cmmRawIf $2 l $3 } + | 'if' bool_expr cond_likely '{' body '}' else + { cmmIfThenElse $2 (withSourceNote $4 $6 $5) $7 $3 } | 'push' '(' exprs0 ')' maybe_body { pushStackFrame $3 $5 } | 'reserve' expr '=' lreg maybe_body @@ -719,6 +723,12 @@ else :: { CmmParse () } : {- empty -} { return () } | 'else' '{' body '}' { withSourceNote $2 $4 $3 } +cond_likely :: { Maybe Bool } + : '(' 'likely' ':' 'True' ')' { Just True } + | '(' 'likely' ':' 'False' ')' { Just False } + | {- empty -} { Nothing } + + -- we have to write this out longhand so that Happy's precedence rules -- can kick in. expr :: { CmmParse CmmExpr } @@ -992,6 +1002,7 @@ callishMachOps = listToUFM $ ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ), ( "memset", memcpyLikeTweakArgs MO_Memset ), ( "memmove", memcpyLikeTweakArgs MO_Memmove ), + ( "memcmp", memcpyLikeTweakArgs MO_Memcmp ), ("prefetch0", (,) $ MO_Prefetch_Data 0), ("prefetch1", (,) $ MO_Prefetch_Data 1), @@ -1003,6 +1014,16 @@ callishMachOps = listToUFM $ ( "popcnt32", (,) $ MO_PopCnt W32 ), ( "popcnt64", (,) $ MO_PopCnt W64 ), + ( "pdep8", (,) $ MO_Pdep W8 ), + ( "pdep16", (,) $ MO_Pdep W16 ), + ( "pdep32", (,) $ MO_Pdep W32 ), + ( "pdep64", (,) $ MO_Pdep W64 ), + + ( "pext8", (,) $ MO_Pext W8 ), + ( "pext16", (,) $ MO_Pext W16 ), + ( "pext32", (,) $ MO_Pext W32 ), + ( "pext64", (,) $ MO_Pext W64 ), + ( "cmpxchg8", (,) $ MO_Cmpxchg W8 ), ( "cmpxchg16", (,) $ MO_Cmpxchg W16 ), ( "cmpxchg32", (,) $ MO_Cmpxchg W32 ), @@ -1276,11 +1297,11 @@ data BoolExpr -- ToDo: smart constructors which simplify the boolean expression. -cmmIfThenElse cond then_part else_part = do +cmmIfThenElse cond then_part else_part likely = do then_id <- newBlockId join_id <- newBlockId c <- cond - emitCond c then_id + emitCond c then_id likely else_part emit (mkBranch join_id) emitLabel then_id @@ -1288,38 +1309,38 @@ cmmIfThenElse cond then_part else_part = do -- fall through to join emitLabel join_id -cmmRawIf cond then_id = do +cmmRawIf cond then_id likely = do c <- cond - emitCond c then_id + emitCond c then_id likely -- 'emitCond cond true_id' emits code to test whether the cond is true, -- branching to true_id if so, and falling through otherwise. -emitCond (BoolTest e) then_id = do +emitCond (BoolTest e) then_id likely = do else_id <- newBlockId - emit (mkCbranch e then_id else_id Nothing) + emit (mkCbranch e then_id else_id likely) emitLabel else_id -emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id +emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id likely | Just op' <- maybeInvertComparison op - = emitCond (BoolTest (CmmMachOp op' args)) then_id -emitCond (BoolNot e) then_id = do + = emitCond (BoolTest (CmmMachOp op' args)) then_id (not <$> likely) +emitCond (BoolNot e) then_id likely = do else_id <- newBlockId - emitCond e else_id + emitCond e else_id likely emit (mkBranch then_id) emitLabel else_id -emitCond (e1 `BoolOr` e2) then_id = do - emitCond e1 then_id - emitCond e2 then_id -emitCond (e1 `BoolAnd` e2) then_id = do +emitCond (e1 `BoolOr` e2) then_id likely = do + emitCond e1 then_id likely + emitCond e2 then_id likely +emitCond (e1 `BoolAnd` e2) then_id likely = do -- we'd like to invert one of the conditionals here to avoid an -- extra branch instruction, but we can't use maybeInvertComparison -- here because we can't look too closely at the expression since -- we're in a loop. and_id <- newBlockId else_id <- newBlockId - emitCond e1 and_id + emitCond e1 and_id likely emit (mkBranch else_id) emitLabel and_id - emitCond e2 then_id + emitCond e2 then_id likely emitLabel else_id -- ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index bc827dfe87..77598a4b09 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -7,6 +7,8 @@ module CmmPipeline ( cmmPipeline ) where +import GhcPrelude + import Cmm import CmmLint import CmmBuildInfoTables @@ -30,21 +32,22 @@ import Platform -- | Top level driver for C-- pipeline ----------------------------------------------------------------------------- -cmmPipeline :: HscEnv -- Compilation env including - -- dynamic flags: -dcmm-lint -ddump-cmm-cps - -> TopSRT -- SRT table and accumulating list of compiled procs - -> CmmGroup -- Input C-- with Procedures - -> IO (TopSRT, CmmGroup) -- Output CPS transformed C-- +cmmPipeline + :: HscEnv -- Compilation env including + -- dynamic flags: -dcmm-lint -ddump-cmm-cps + -> ModuleSRTInfo -- Info about SRTs generated so far + -> CmmGroup -- Input C-- with Procedures + -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C-- -cmmPipeline hsc_env topSRT prog = +cmmPipeline hsc_env srtInfo prog = do let dflags = hsc_dflags hsc_env tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog - (topSRT, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags topSRT tops + (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" (ppr cmms) - return (topSRT, cmms) + return (srtInfo, cmms) cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl]) @@ -103,7 +106,7 @@ cpsTop hsc_env proc = Opt_D_dump_cmm_sink "Sink assignments" ------------- CAF analysis ---------------------------------------------- - let cafEnv = {-# SCC "cafAnal" #-} cafAnal g + let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" (ppr cafEnv) g <- if splitting_proc_points @@ -163,7 +166,7 @@ cpsTop hsc_env proc = || -- Note [inconsistent-pic-reg] usingInconsistentPicReg usingInconsistentPicReg - = case (platformArch platform, platformOS platform, gopt Opt_PIC dflags) + = case (platformArch platform, platformOS platform, positionIndependent dflags) of (ArchX86, OSDarwin, pic) -> pic (ArchPPC, OSDarwin, pic) -> pic _ -> False diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index 2e2c22c10d..bef8f384b8 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -8,7 +8,7 @@ module CmmProcPoint ) where -import Prelude hiding (last, unzip, succ, zip) +import GhcPrelude hiding (last, unzip, succ, zip) import DynFlags import BlockId @@ -19,7 +19,7 @@ import CmmUtils import CmmInfo import CmmLive import CmmSwitch -import Data.List (sortBy) +import Data.List (sortBy, foldl') import Maybes import Control.Monad import Outputable @@ -178,9 +178,9 @@ procPointLattice = DataflowLattice unreached add_to -- -- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds]. callProcPoints :: CmmGraph -> ProcPointSet -callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g - where add :: CmmBlock -> LabelSet -> LabelSet - add b set = case lastNode b of +callProcPoints g = foldlGraphBlocks add (setSingleton (g_entry g)) g + where add :: LabelSet -> CmmBlock -> LabelSet + add set b = case lastNode b of CmmCall {cml_cont = Just k} -> setInsert k set CmmForeignCall {succ=k} -> setInsert k set _ -> set @@ -190,17 +190,17 @@ minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph -- Given the set of successors of calls (which must be proc-points) -- figure out the minimal set of necessary proc-points minimalProcPointSet platform callProcPoints g - = extendPPSet platform g (postorderDfs g) callProcPoints + = extendPPSet platform g (revPostorder g) callProcPoints extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet extendPPSet platform g blocks procPoints = let env = procPointAnalysis procPoints g - add block pps = let id = entryLabel block + add pps block = let id = entryLabel block in case mapLookup id env of Just ProcPoint -> setInsert id pps _ -> pps - procPoints' = foldGraphBlocks add setEmpty g + procPoints' = foldlGraphBlocks add setEmpty g newPoints = mapMaybe ppSuccessor blocks newPoint = listToMaybe newPoints ppSuccessor b = @@ -242,11 +242,11 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap (CmmProc (TopInfo {info_tbls = info_tbls}) top_l _ g@(CmmGraph {g_entry=entry})) = do -- Build a map from procpoints to the blocks they reach - let addBlock - :: CmmBlock + let add_block + :: LabelMap (LabelMap CmmBlock) + -> CmmBlock -> LabelMap (LabelMap CmmBlock) - -> LabelMap (LabelMap CmmBlock) - addBlock b graphEnv = + add_block graphEnv b = case mapLookup bid procMap of Just ProcPoint -> add graphEnv bid bid b Just (ReachedBy set) -> @@ -265,7 +265,7 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap regSetToList $ expectJust "ppLiveness" $ mapLookup pp liveness - graphEnv <- return $ foldGraphBlocks addBlock mapEmpty g + graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g -- Build a map from proc point BlockId to pairs of: -- * Labels for their new procedures @@ -275,12 +275,13 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap let add_label map pp = mapInsert pp lbls map where lbls | pp == entry = (entry_label, fmap cit_lbl (mapLookup entry info_tbls)) | otherwise = (block_lbl, guard (setMember pp callPPs) >> - Just (toInfoLbl block_lbl)) - where block_lbl = blockLbl pp + Just info_table_lbl) + where block_lbl = blockLbl pp + info_table_lbl = infoTblLbl pp procLabels :: LabelMap (CLabel, Maybe CLabel) - procLabels = foldl add_label mapEmpty - (filter (flip mapMember (toBlockMap g)) (setElems procPoints)) + procLabels = foldl' add_label mapEmpty + (filter (flip mapMember (toBlockMap g)) (setElems procPoints)) -- In each new graph, add blocks jumping off to the new procedures, -- and replace branches to procpoints with branches to the jump-off blocks @@ -301,7 +302,7 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap -> UniqSM (LabelMap CmmGraph) add_jumps newGraphEnv (ppId, blockEnv) = do let needed_jumps = -- find which procpoints we currently branch to - mapFold add_if_branch_to_pp [] blockEnv + mapFoldr add_if_branch_to_pp [] blockEnv add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)] add_if_branch_to_pp block rst = case lastNode block of @@ -329,7 +330,7 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap -- replace branches to procpoints with branches to jumps blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv' -- add the jump blocks to the graph - blockEnv''' = foldl (flip insertBlock) blockEnv'' jumpBlocks + blockEnv''' = foldl' (flip addBlock) blockEnv'' jumpBlocks let g' = ofBlockMap ppId blockEnv''' -- pprTrace "g' pre jumps" (ppr g') $ do return (mapInsert ppId g' newGraphEnv) @@ -372,9 +373,10 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap -- call sites. Here, we sort them in reverse order -- it gets -- reversed later. let (_, block_order) = - foldl add_block_num (0::Int, mapEmpty :: LabelMap Int) - (postorderDfs g) - add_block_num (i, map) block = (i+1, mapInsert (entryLabel block) i map) + foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int) + (revPostorder g) + add_block_num (i, map) block = + (i + 1, mapInsert (entryLabel block) i map) sort_fn (bid, _) (bid', _) = compare (expectJust "block_order" $ mapLookup bid block_order) (expectJust "block_order" $ mapLookup bid' block_order) diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs index 517605b9ff..6317cfe929 100644 --- a/compiler/cmm/CmmSink.hs +++ b/compiler/cmm/CmmSink.hs @@ -3,6 +3,8 @@ module CmmSink ( cmmSink ) where +import GhcPrelude + import Cmm import CmmOpt import CmmLive @@ -15,13 +17,31 @@ import CodeGen.Platform import Platform (isARM, platformArch) import DynFlags +import Unique import UniqFM import PprCmm () +import qualified Data.IntSet as IntSet import Data.List (partition) import qualified Data.Set as Set import Data.Maybe +-- Compact sets for membership tests of local variables. + +type LRegSet = IntSet.IntSet + +emptyLRegSet :: LRegSet +emptyLRegSet = IntSet.empty + +nullLRegSet :: LRegSet -> Bool +nullLRegSet = IntSet.null + +insertLRegSet :: LocalReg -> LRegSet -> LRegSet +insertLRegSet l = IntSet.insert (getKey (getUnique l)) + +elemLRegSet :: LocalReg -> LRegSet -> Bool +elemLRegSet l = IntSet.member (getKey (getUnique l)) + -- ----------------------------------------------------------------------------- -- Sinking and inlining @@ -152,7 +172,7 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks liveness = cmmLocalLiveness dflags graph getLive l = mapFindWithDefault Set.empty l liveness - blocks = postorderDfs graph + blocks = revPostorder graph join_pts = findJoinPoints blocks @@ -213,7 +233,7 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs - final_middle = foldl blockSnoc middle' dropped_last + final_middle = foldl' blockSnoc middle' dropped_last sunk' = mapUnion sunk $ mapFromList [ (l, filterAssignments dflags (getLive l) assigs'') @@ -323,7 +343,7 @@ walk dflags nodes assigs = go nodes emptyBlock assigs (dropped, as') = dropAssignmentsSimple dflags (\a -> conflicts dflags a node2) as1 - block' = foldl blockSnoc block dropped `blockSnoc` node2 + block' = foldl' blockSnoc block dropped `blockSnoc` node2 -- @@ -397,7 +417,7 @@ tryToInline , Assignments -- Remaining assignments ) -tryToInline dflags live node assigs = go usages node [] assigs +tryToInline dflags live node assigs = go usages node emptyLRegSet assigs where usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used usages = foldLocalRegsUsed dflags addUsage emptyUFM node @@ -420,7 +440,7 @@ tryToInline dflags live node assigs = go usages node [] assigs inline_and_keep = keep inl_node -- inline the assignment, keep it keep node' = (final_node, a : rest') - where (final_node, rest') = go usages' node' (l:skipped) rest + where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2) usages rhs -- we must not inline anything that is mentioned in the RHS @@ -428,7 +448,7 @@ tryToInline dflags live node assigs = go usages node [] assigs -- usages of the regs on the RHS to 2. cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments] - || l `elem` skipped + || l `elemLRegSet` skipped || not (okToInline dflags rhs node) l_usages = lookupUFM usages l @@ -437,13 +457,7 @@ tryToInline dflags live node assigs = go usages node [] assigs occurs_once = not l_live && l_usages == Just 1 occurs_none = not l_live && l_usages == Nothing - inl_node = case mapExpDeep inl_exp node of - -- See Note [Improving conditionals] - CmmCondBranch (CmmMachOp (MO_Ne w) args) - ti fi l - -> CmmCondBranch (cmmMachOpFold dflags (MO_Eq w) args) - fi ti l - node' -> node' + inl_node = improveConditional (mapExpDeep inl_exp node) inl_exp :: CmmExpr -> CmmExpr -- inl_exp is where the inlining actually takes place! @@ -454,22 +468,43 @@ tryToInline dflags live node assigs = go usages node [] assigs inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args inl_exp other = other -{- Note [Improving conditionals] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Given - CmmCondBranch ((a >## b) != 1) t f -where a,b, are Floats, the constant folder /cannot/ turn it into - CmmCondBranch (a <=## b) t f -because comparison on floats are not invertible -(see CmmMachOp.maybeInvertComparison). -What we want instead is simply to reverse the true/false branches thus +{- Note [improveConditional] + +cmmMachOpFold tries to simplify conditionals to turn things like + (a == b) != 1 +into + (a != b) +but there's one case it can't handle: when the comparison is over +floating-point values, we can't invert it, because floating-point +comparisons aren't invertible (because of NaNs). + +But we *can* optimise this conditional by swapping the true and false +branches. Given CmmCondBranch ((a >## b) != 1) t f ---> +we can turn it into CmmCondBranch (a >## b) f t -And we do that right here in tryToInline, just as we do cmmMachOpFold. +So here we catch conditionals that weren't optimised by cmmMachOpFold, +and apply above transformation to eliminate the comparison against 1. + +It's tempting to just turn every != into == and then let cmmMachOpFold +do its thing, but that risks changing a nice fall-through conditional +into one that requires two jumps. (see swapcond_last in +CmmContFlowOpt), so instead we carefully look for just the cases where +we can eliminate a comparison. -} +improveConditional :: CmmNode O x -> CmmNode O x +improveConditional + (CmmCondBranch (CmmMachOp mop [x, CmmLit (CmmInt 1 _)]) t f l) + | neLike mop, isComparisonExpr x + = CmmCondBranch x f t (fmap not l) + where + neLike (MO_Ne _) = True + neLike (MO_U_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1 + neLike (MO_S_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1 + neLike _ = False +improveConditional other = other -- Note [dependent assignments] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -519,11 +554,11 @@ And we do that right here in tryToInline, just as we do cmmMachOpFold. addUsage :: UniqFM Int -> LocalReg -> UniqFM Int addUsage m r = addToUFM_C (+) m r 1 -regsUsedIn :: [LocalReg] -> CmmExpr -> Bool -regsUsedIn [] _ = False +regsUsedIn :: LRegSet -> CmmExpr -> Bool +regsUsedIn ls _ | nullLRegSet ls = False regsUsedIn ls e = wrapRecExpf f e False - where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True - f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True + where f (CmmReg (CmmLocal l)) _ | l `elemLRegSet` ls = True + f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True f _ z = z -- we don't inline into CmmUnsafeForeignCall if the expression refers @@ -721,7 +756,7 @@ loadAddr dflags e w = case e of CmmReg r -> regAddr dflags r 0 w CmmRegOff r i -> regAddr dflags r i w - _other | regUsedIn dflags (CmmGlobal Sp) e -> StackMem + _other | regUsedIn dflags spReg e -> StackMem | otherwise -> AnyMem regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem diff --git a/compiler/cmm/CmmSwitch.hs b/compiler/cmm/CmmSwitch.hs index b0ca4be762..ce779465e3 100644 --- a/compiler/cmm/CmmSwitch.hs +++ b/compiler/cmm/CmmSwitch.hs @@ -11,6 +11,8 @@ module CmmSwitch ( createSwitchPlan, ) where +import GhcPrelude + import Outputable import DynFlags import Hoopl.Label (Label) @@ -107,7 +109,7 @@ data SwitchTargets = (M.Map Integer Label) -- The branches deriving (Show, Eq) --- | The smart constructr mkSwitchTargets normalises the map a bit: +-- | The smart constructor mkSwitchTargets normalises the map a bit: -- * No entries outside the range -- * No entries equal to the default -- * No default if all elements have explicit values @@ -249,6 +251,68 @@ data SwitchPlan -- findSingleValues -- 5. The thus collected pieces are assembled to a balanced binary tree. +{- + Note [Two alts + default] + ~~~~~~~~~~~~~~~~~~~~~~~~~ + +Discussion and a bit more info at #14644 + +When dealing with a switch of the form: +switch(e) { + case 1: goto l1; + case 3000: goto l2; + default: goto ldef; +} + +If we treat it as a sparse jump table we would generate: + +if (e > 3000) //Check if value is outside of the jump table. + goto ldef; +else { + if (e < 3000) { //Compare to upper value + if(e != 1) //Compare to remaining value + goto ldef; + else + goto l2; + } + else + goto l1; +} + +Instead we special case this to : + +if (e==1) goto l1; +else if (e==3000) goto l2; +else goto l3; + +This means we have: +* Less comparisons for: 1,<3000 +* Unchanged for 3000 +* One more for >3000 + +This improves code in a few ways: +* One comparison less means smaller code which helps with cache. +* It exchanges a taken jump for two jumps no taken in the >range case. + Jumps not taken are cheaper (See Agner guides) making this about as fast. +* For all other cases the first range check is removed making it faster. + +The end result is that the change is not measurably slower for the case +>3000 and faster for the other cases. + +This makes running this kind of match in an inner loop cheaper by 10-20% +depending on the data. +In nofib this improves wheel-sieve1 by 4-9% depending on problem +size. + +We could also add a second conditional jump after the comparison to +keep the range check like this: + cmp 3000, rArgument + jg <default> + je <branch 2> +While this is fairly cheap it made no big difference for the >3000 case +and slowed down all other cases making it not worthwhile. +-} + -- | Does the target support switch out of the box? Then leave this to the -- target! @@ -264,13 +328,16 @@ createSwitchPlan :: SwitchTargets -> SwitchPlan createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) | [(x, l)] <- M.toList m = IfEqual x l (Unconditionally defLabel) --- And another common case, matching booleans +-- And another common case, matching "booleans" createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m) - | [(x1, l1), (x2,l2)] <- M.toAscList m - , x1 == lo - , x2 == hi - , x1 + 1 == x2 + | [(x1, l1), (_x2,l2)] <- M.toAscList m + --Checking If |range| = 2 is enough if we have two unique literals + , hi - lo == 1 = IfEqual x1 l1 (Unconditionally l2) +-- See Note [Two alts + default] +createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) + | [(x1, l1), (x2,l2)] <- M.toAscList m + = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel)) createSwitchPlan (SwitchTargets signed range mbdef m) = -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $ plan diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs index 4abbeaf0c1..97b181a243 100644 --- a/compiler/cmm/CmmType.hs +++ b/compiler/cmm/CmmType.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - module CmmType ( CmmType -- Abstract , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord @@ -29,7 +27,8 @@ module CmmType ) where -#include "HsVersions.h" + +import GhcPrelude import DynFlags import FastString @@ -71,7 +70,7 @@ instance Outputable CmmCat where -- Why is CmmType stratified? For native code generation, -- most of the time you just want to know what sort of register -- to put the thing in, and for this you need to know how --- many bits thing has and whether it goes in a floating-point +-- many bits thing has, and whether it goes in a floating-point -- register. By contrast, the distinction between GcPtr and -- GcNonPtr is of interest to only a few parts of the code generator. diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 74524c997f..42d64842e2 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, GADTs, RankNTypes #-} +{-# LANGUAGE GADTs, RankNTypes #-} ----------------------------------------------------------------------------- -- @@ -35,7 +35,10 @@ module CmmUtils( cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord, cmmToWord, - isTrivialCmmExpr, hasNoGlobalRegs, + isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr, + + baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr, + currentTSOExpr, currentNurseryExpr, cccsExpr, -- Statics blankWord, @@ -53,16 +56,16 @@ module CmmUtils( -- * Operations that probably don't belong here modifyGraph, - ofBlockMap, toBlockMap, insertBlock, + ofBlockMap, toBlockMap, ofBlockList, toBlockList, bodyToBlockList, toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough, - foldGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1, + foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1, -- * Ticks blockTicks ) where -#include "HsVersions.h" +import GhcPrelude import TyCon ( PrimRep(..), PrimElemRep(..) ) import RepType ( UnaryType, SlotTy (..), typePrimRep1 ) @@ -73,11 +76,9 @@ import BlockId import CLabel import Outputable import DynFlags -import Util import CodeGen.Platform import Data.Word -import Data.Maybe import Data.Bits import Hoopl.Graph import Hoopl.Label @@ -252,8 +253,8 @@ cmmRegOff reg byte_off = CmmRegOff reg byte_off cmmOffsetLit :: CmmLit -> Int -> CmmLit cmmOffsetLit (CmmLabel l) byte_off = cmmLabelOff l byte_off cmmOffsetLit (CmmLabelOff l m) byte_off = cmmLabelOff l (m+byte_off) -cmmOffsetLit (CmmLabelDiffOff l1 l2 m) byte_off - = CmmLabelDiffOff l1 l2 (m+byte_off) +cmmOffsetLit (CmmLabelDiffOff l1 l2 m w) byte_off + = CmmLabelDiffOff l1 l2 (m+byte_off) w cmmOffsetLit (CmmInt m rep) byte_off = CmmInt (m + fromIntegral byte_off) rep cmmOffsetLit _ byte_off = pprPanic "cmmOffsetLit" (ppr byte_off) @@ -340,7 +341,6 @@ cmmEqWord dflags e1 e2 = CmmMachOp (mo_wordEq dflags) [e1, e2] cmmULtWord dflags e1 e2 = CmmMachOp (mo_wordULt dflags) [e1, e2] cmmUGeWord dflags e1 e2 = CmmMachOp (mo_wordUGe dflags) [e1, e2] cmmUGtWord dflags e1 e2 = CmmMachOp (mo_wordUGt dflags) [e1, e2] ---cmmShlWord dflags e1 e2 = CmmMachOp (mo_wordShl dflags) [e1, e2] cmmSLtWord dflags e1 e2 = CmmMachOp (mo_wordSLt dflags) [e1, e2] cmmUShrWord dflags e1 e2 = CmmMachOp (mo_wordUShr dflags) [e1, e2] cmmAddWord dflags e1 e2 = CmmMachOp (mo_wordAdd dflags) [e1, e2] @@ -385,6 +385,14 @@ hasNoGlobalRegs (CmmReg (CmmLocal _)) = True hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True hasNoGlobalRegs _ = False +isLit :: CmmExpr -> Bool +isLit (CmmLit _) = True +isLit _ = False + +isComparisonExpr :: CmmExpr -> Bool +isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op +isComparisonExpr _ = False + --------------------------------------------------- -- -- Tagging @@ -392,23 +400,20 @@ hasNoGlobalRegs _ = False --------------------------------------------------- -- Tag bits mask ---cmmTagBits = CmmLit (mkIntCLit tAG_BITS) cmmTagMask, cmmPointerMask :: DynFlags -> CmmExpr cmmTagMask dflags = mkIntExpr dflags (tAG_MASK dflags) cmmPointerMask dflags = mkIntExpr dflags (complement (tAG_MASK dflags)) -- Used to untag a possibly tagged pointer -- A static label need not be untagged -cmmUntag :: DynFlags -> CmmExpr -> CmmExpr +cmmUntag, cmmIsTagged, cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr cmmUntag _ e@(CmmLit (CmmLabel _)) = e -- Default case cmmUntag dflags e = cmmAndWord dflags e (cmmPointerMask dflags) -- Test if a closure pointer is untagged -cmmIsTagged :: DynFlags -> CmmExpr -> CmmExpr cmmIsTagged dflags e = cmmNeWord dflags (cmmAndWord dflags e (cmmTagMask dflags)) (zeroExpr dflags) -cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr -- Get constructor tag, but one based. cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags) @@ -451,20 +456,17 @@ regUsedIn dflags = regUsedIn_ where -- --------------------------------------------- -mkLiveness :: DynFlags -> [Maybe LocalReg] -> Liveness +mkLiveness :: DynFlags -> [LocalReg] -> Liveness mkLiveness _ [] = [] mkLiveness dflags (reg:regs) - = take sizeW bits ++ mkLiveness dflags regs + = bits ++ mkLiveness dflags regs where - sizeW = case reg of - Nothing -> 1 - Just r -> (widthInBytes (typeWidth (localRegType r)) + wORD_SIZE dflags - 1) - `quot` wORD_SIZE dflags - -- number of words, rounded up - bits = repeat $ is_non_ptr reg -- True <=> Non Ptr + sizeW = (widthInBytes (typeWidth (localRegType reg)) + wORD_SIZE dflags - 1) + `quot` wORD_SIZE dflags + -- number of words, rounded up + bits = replicate sizeW is_non_ptr -- True <=> Non Ptr - is_non_ptr Nothing = True - is_non_ptr (Just reg) = not $ isGcPtrType (localRegType reg) + is_non_ptr = not $ isGcPtrType (localRegType reg) -- ============================================== - @@ -486,12 +488,6 @@ toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO} -insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock -insertBlock block map = - ASSERT(isNothing $ mapLookup id map) - mapInsert id block map - where id = entryLabel block - toBlockList :: CmmGraph -> [CmmBlock] toBlockList g = mapElems $ toBlockMap g @@ -554,11 +550,12 @@ mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGra mapGraphNodes1 f = modifyGraph (mapGraph f) -foldGraphBlocks :: (CmmBlock -> a -> a) -> a -> CmmGraph -> a -foldGraphBlocks k z g = mapFold k z $ toBlockMap g +foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a +foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g -postorderDfs :: CmmGraph -> [CmmBlock] -postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g) +revPostorder :: CmmGraph -> [CmmBlock] +revPostorder g = {-# SCC "revPostorder" #-} + revPostorderFrom (toBlockMap g) (g_entry g) ------------------------------------------------- -- Tick utilities @@ -569,3 +566,18 @@ blockTicks b = reverse $ foldBlockNodesF goStmt b [] where goStmt :: CmmNode e x -> [CmmTickish] -> [CmmTickish] goStmt (CmmTick t) ts = t:ts goStmt _other ts = ts + + +-- ----------------------------------------------------------------------------- +-- Access to common global registers + +baseExpr, spExpr, hpExpr, currentTSOExpr, currentNurseryExpr, + spLimExpr, hpLimExpr, cccsExpr :: CmmExpr +baseExpr = CmmReg baseReg +spExpr = CmmReg spReg +spLimExpr = CmmReg spLimReg +hpExpr = CmmReg hpReg +hpLimExpr = CmmReg hpLimReg +currentTSOExpr = CmmReg currentTSOReg +currentNurseryExpr = CmmReg currentNurseryReg +cccsExpr = CmmReg cccsReg diff --git a/compiler/cmm/Debug.hs b/compiler/cmm/Debug.hs index 33595d8987..da37495530 100644 --- a/compiler/cmm/Debug.hs +++ b/compiler/cmm/Debug.hs @@ -22,6 +22,8 @@ module Debug ( UnwindExpr(..), toUnwindExpr ) where +import GhcPrelude + import BlockId import CLabel import Cmm @@ -33,7 +35,7 @@ import Outputable import PprCore () import PprCmmExpr ( pprExpr ) import SrcLoc -import Util +import Util ( seqList ) import Hoopl.Block import Hoopl.Collections @@ -44,6 +46,7 @@ import Data.Maybe import Data.List ( minimumBy, nubBy ) import Data.Ord ( comparing ) import qualified Data.Map as Map +import Data.Either ( partitionEithers ) -- | Debug information about a block of code. Ticks scope over nested -- blocks. @@ -98,7 +101,7 @@ cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes -- Analyse tick scope structure: Each one is either a top-level -- tick scope, or the child of another. (topScopes, childScopes) - = splitEithers $ map (\a -> findP a a) $ Map.keys blockCtxs + = partitionEithers $ map (\a -> findP a a) $ Map.keys blockCtxs findP tsc GlobalScope = Left tsc -- top scope findP tsc scp | scp' `Map.member` blockCtxs = Right (scp', tsc) | otherwise = findP tsc scp' @@ -328,7 +331,7 @@ code, v :: P64 = R2; if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg; -After c2fe we we may pass to either c2ff or c2fg; let's first consider the +After c2fe we may pass to either c2ff or c2fg; let's first consider the former. In this case there is nothing in particular that we need to do other than reiterate what we already know about Sp, @@ -346,8 +349,8 @@ in addition to the usual beginning-of-block statement, unwind Sp = Just Sp + 0; I64[Sp - 8] = c2dD; R1 = v :: P64; - unwind Sp = Just Sp + 8; Sp = Sp - 8; + unwind Sp = Just Sp + 8; if (R1 & 7 != 0) goto c2dD; else goto c2dE; The remaining blocks are simple, @@ -389,10 +392,95 @@ The flow of unwinding information through the compiler is a bit convoluted: * This unwind information is converted to DebugBlocks by Debug.cmmDebugGen - * These DebugBlcosk are then converted to, e.g., DWARF unwinding tables + * These DebugBlocks are then converted to, e.g., DWARF unwinding tables (by the Dwarf module) and emitted in the final object. -See also: Note [Unwinding information in the NCG] in AsmCodeGen. +See also: + Note [Unwinding information in the NCG] in AsmCodeGen, + Note [Unwind pseudo-instruction in Cmm], + Note [Debugging DWARF unwinding info]. + + +Note [Debugging DWARF unwinding info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For debugging generated unwinding info I've found it most useful to dump the +disassembled binary with objdump -D and dump the debug info with +readelf --debug-dump=frames-interp. + +You should get something like this: + + 0000000000000010 <stg_catch_frame_info>: + 10: 48 83 c5 18 add $0x18,%rbp + 14: ff 65 00 jmpq *0x0(%rbp) + +and: + + Contents of the .debug_frame section: + + 00000000 0000000000000014 ffffffff CIE "" cf=1 df=-8 ra=16 + LOC CFA rbp rsp ra + 0000000000000000 rbp+0 v+0 s c+0 + + 00000018 0000000000000024 00000000 FDE cie=00000000 pc=000000000000000f..0000000000000017 + LOC CFA rbp rsp ra + 000000000000000f rbp+0 v+0 s c+0 + 000000000000000f rbp+24 v+0 s c+0 + +To read it http://www.dwarfstd.org/doc/dwarf-2.0.0.pdf has a nice example in +Appendix 5 (page 101 of the pdf) and more details in the relevant section. + +The key thing to keep in mind is that the value at LOC is the value from +*before* the instruction at LOC executes. In other words it answers the +question: if my $rip is at LOC, how do I get the relevant values given the +values obtained through unwinding so far. + +If the readelf --debug-dump=frames-interp output looks wrong, it may also be +useful to look at readelf --debug-dump=frames, which is closer to the +information that GHC generated. + +It's also useful to dump the relevant Cmm with -ddump-cmm -ddump-opt-cmm +-ddump-cmm-proc -ddump-cmm-verbose. Note [Unwind pseudo-instruction in Cmm] +explains how to interpret it. + +Inside gdb there are a couple useful commands for inspecting frames. +For example: + + gdb> info frame <num> + +It shows the values of registers obtained through unwinding. + +Another useful thing to try when debugging the DWARF unwinding is to enable +extra debugging output in GDB: + + gdb> set debug frame 1 + +This makes GDB produce a trace of its internal workings. Having gone this far, +it's just a tiny step to run GDB in GDB. Make sure you install debugging +symbols for gdb if you obtain it through a package manager. + +Keep in mind that the current release of GDB has an instruction pointer handling +heuristic that works well for C-like languages, but doesn't always work for +Haskell. See Note [Info Offset] in Dwarf.Types for more details. + +Note [Unwind pseudo-instruction in Cmm] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the possible CmmNodes is a CmmUnwind pseudo-instruction. It doesn't +generate any assembly, but controls what DWARF unwinding information gets +generated. + +It's important to understand what ranges of code the unwind pseudo-instruction +refers to. +For a sequence of CmmNodes like: + + A // starts at addr X and ends at addr Y-1 + unwind Sp = Just Sp + 16; + B // starts at addr Y and ends at addr Z + +the unwind statement reflects the state after A has executed, but before B +has executed. If you consult the Note [Debugging DWARF unwinding info], the +LOC this information will end up in is Y. -} -- | A label associated with an 'UnwindTable' diff --git a/compiler/cmm/Hoopl/Block.hs b/compiler/cmm/Hoopl/Block.hs index 3623fcd242..c4ff1794e8 100644 --- a/compiler/cmm/Hoopl/Block.hs +++ b/compiler/cmm/Hoopl/Block.hs @@ -33,6 +33,7 @@ module Hoopl.Block , replaceLastNode ) where +import GhcPrelude -- ----------------------------------------------------------------------------- -- Shapes: Open and Closed diff --git a/compiler/cmm/Hoopl/Collections.hs b/compiler/cmm/Hoopl/Collections.hs index 679057626b..f8bdfda3d1 100644 --- a/compiler/cmm/Hoopl/Collections.hs +++ b/compiler/cmm/Hoopl/Collections.hs @@ -1,11 +1,22 @@ +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + module Hoopl.Collections ( IsSet(..) , setInsertList, setDeleteList, setUnions , IsMap(..) , mapInsertList, mapDeleteList, mapUnions + , UniqueMap, UniqueSet ) where +import GhcPrelude + +import qualified Data.IntMap.Strict as M +import qualified Data.IntSet as S + import Data.List (foldl', foldl1') class IsSet set where @@ -25,7 +36,8 @@ class IsSet set where setIntersection :: set -> set -> set setIsSubsetOf :: set -> set -> Bool - setFold :: (ElemOf set -> b -> b) -> b -> set -> b + setFoldl :: (b -> ElemOf set -> b) -> b -> set -> b + setFoldr :: (ElemOf set -> b -> b) -> b -> set -> b setElems :: set -> [ElemOf set] setFromList :: [ElemOf set] -> set @@ -56,6 +68,7 @@ class IsMap map where mapInsert :: KeyOf map -> a -> map a -> map a mapInsertWith :: (a -> a -> a) -> KeyOf map -> a -> map a -> map a mapDelete :: KeyOf map -> map a -> map a + mapAlter :: (Maybe a -> Maybe a) -> KeyOf map -> map a -> map a mapUnion :: map a -> map a -> map a mapUnionWithKey :: (KeyOf map -> a -> a -> a) -> map a -> map a -> map a @@ -65,8 +78,9 @@ class IsMap map where mapMap :: (a -> b) -> map a -> map b mapMapWithKey :: (KeyOf map -> a -> b) -> map a -> map b - mapFold :: (a -> b -> b) -> b -> map a -> b - mapFoldWithKey :: (KeyOf map -> a -> b -> b) -> b -> map a -> b + mapFoldl :: (b -> a -> b) -> b -> map a -> b + mapFoldr :: (a -> b -> b) -> b -> map a -> b + mapFoldlWithKey :: (b -> KeyOf map -> a -> b) -> b -> map a -> b mapFilter :: (a -> Bool) -> map a -> map a mapElems :: map a -> [a] @@ -85,3 +99,70 @@ mapDeleteList keys map = foldl' (flip mapDelete) map keys mapUnions :: IsMap map => [map a] -> map a mapUnions [] = mapEmpty mapUnions maps = foldl1' mapUnion maps + +----------------------------------------------------------------------------- +-- Basic instances +----------------------------------------------------------------------------- + +newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show) + +instance IsSet UniqueSet where + type ElemOf UniqueSet = Int + + setNull (US s) = S.null s + setSize (US s) = S.size s + setMember k (US s) = S.member k s + + setEmpty = US S.empty + setSingleton k = US (S.singleton k) + setInsert k (US s) = US (S.insert k s) + setDelete k (US s) = US (S.delete k s) + + setUnion (US x) (US y) = US (S.union x y) + setDifference (US x) (US y) = US (S.difference x y) + setIntersection (US x) (US y) = US (S.intersection x y) + setIsSubsetOf (US x) (US y) = S.isSubsetOf x y + + setFoldl k z (US s) = S.foldl' k z s + setFoldr k z (US s) = S.foldr k z s + + setElems (US s) = S.elems s + setFromList ks = US (S.fromList ks) + +newtype UniqueMap v = UM (M.IntMap v) + deriving (Eq, Ord, Show, Functor, Foldable, Traversable) + +instance IsMap UniqueMap where + type KeyOf UniqueMap = Int + + mapNull (UM m) = M.null m + mapSize (UM m) = M.size m + mapMember k (UM m) = M.member k m + mapLookup k (UM m) = M.lookup k m + mapFindWithDefault def k (UM m) = M.findWithDefault def k m + + mapEmpty = UM M.empty + mapSingleton k v = UM (M.singleton k v) + mapInsert k v (UM m) = UM (M.insert k v m) + mapInsertWith f k v (UM m) = UM (M.insertWith f k v m) + mapDelete k (UM m) = UM (M.delete k m) + mapAlter f k (UM m) = UM (M.alter f k m) + + mapUnion (UM x) (UM y) = UM (M.union x y) + mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey f x y) + mapDifference (UM x) (UM y) = UM (M.difference x y) + mapIntersection (UM x) (UM y) = UM (M.intersection x y) + mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y + + mapMap f (UM m) = UM (M.map f m) + mapMapWithKey f (UM m) = UM (M.mapWithKey f m) + mapFoldl k z (UM m) = M.foldl' k z m + mapFoldr k z (UM m) = M.foldr k z m + mapFoldlWithKey k z (UM m) = M.foldlWithKey' k z m + mapFilter f (UM m) = UM (M.filter f m) + + mapElems (UM m) = M.elems m + mapKeys (UM m) = M.keys m + mapToList (UM m) = M.toList m + mapFromList assocs = UM (M.fromList assocs) + mapFromListWith f assocs = UM (M.fromListWith f assocs) diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs index c2ace502b3..bf12b3f6a1 100644 --- a/compiler/cmm/Hoopl/Dataflow.hs +++ b/compiler/cmm/Hoopl/Dataflow.hs @@ -30,14 +30,16 @@ module Hoopl.Dataflow , rewriteCmmBwd , changedIf , joinOutFacts + , joinFacts ) where +import GhcPrelude + import Cmm import UniqSupply import Data.Array -import Data.List import Data.Maybe import Data.IntSet (IntSet) import qualified Data.IntSet as IntSet @@ -109,8 +111,7 @@ analyzeCmm dir lattice transfer cmmGraph initFact = blockMap = case hooplGraph of GMany NothingO bm NothingO -> bm - entries = if mapNull initFact then [entry] else mapKeys initFact - in fixpointAnalysis dir lattice transfer entries blockMap initFact + in fixpointAnalysis dir lattice transfer entry blockMap initFact -- Fixpoint algorithm. fixpointAnalysis @@ -118,19 +119,20 @@ fixpointAnalysis Direction -> DataflowLattice f -> TransferFun f - -> [Label] + -> Label -> LabelMap CmmBlock -> FactBase f -> FactBase f -fixpointAnalysis direction lattice do_block entries blockmap = loop start +fixpointAnalysis direction lattice do_block entry blockmap = loop start where -- Sorting the blocks helps to minimize the number of times we need to -- process blocks. For instance, for forward analysis we want to look at -- blocks in reverse postorder. Also, see comments for sortBlocks. - blocks = sortBlocks direction entries blockmap + blocks = sortBlocks direction entry blockmap num_blocks = length blocks block_arr = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks - start = {-# SCC "start" #-} [0 .. num_blocks - 1] + start = {-# SCC "start" #-} IntSet.fromDistinctAscList + [0 .. num_blocks - 1] dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks join = fact_join lattice @@ -138,17 +140,17 @@ fixpointAnalysis direction lattice do_block entries blockmap = loop start :: IntHeap -- ^ Worklist, i.e., blocks to process -> FactBase f -- ^ Current result (increases monotonically) -> FactBase f - loop [] !fbase1 = fbase1 - loop (index : todo1) !fbase1 = + loop todo !fbase1 | Just (index, todo1) <- IntSet.minView todo = let block = block_arr ! index out_facts = {-# SCC "do_block" #-} do_block block fbase1 -- For each of the outgoing edges, we join it with the current -- information in fbase1 and (if something changed) we update it -- and add the affected blocks to the worklist. (todo2, fbase2) = {-# SCC "mapFoldWithKey" #-} - mapFoldWithKey + mapFoldlWithKey (updateFact join dep_blocks) (todo1, fbase1) out_facts in loop todo2 fbase2 + loop _ !fbase1 = fbase1 rewriteCmmBwd :: DataflowLattice f @@ -171,9 +173,8 @@ rewriteCmm dir lattice rwFun cmmGraph initFact = do blockMap1 = case hooplGraph of GMany NothingO bm NothingO -> bm - entries = if mapNull initFact then [entry] else mapKeys initFact (blockMap2, facts) <- - fixpointRewrite dir lattice rwFun entries blockMap1 initFact + fixpointRewrite dir lattice rwFun entry blockMap1 initFact return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts) fixpointRewrite @@ -181,20 +182,21 @@ fixpointRewrite Direction -> DataflowLattice f -> RewriteFun f - -> [Label] + -> Label -> LabelMap CmmBlock -> FactBase f -> UniqSM (LabelMap CmmBlock, FactBase f) -fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap +fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap where -- Sorting the blocks helps to minimize the number of times we need to -- process blocks. For instance, for forward analysis we want to look at -- blocks in reverse postorder. Also, see comments for sortBlocks. - blocks = sortBlocks dir entries blockmap + blocks = sortBlocks dir entry blockmap num_blocks = length blocks block_arr = {-# SCC "block_arr_rewrite" #-} listArray (0, num_blocks - 1) blocks - start = {-# SCC "start_rewrite" #-} [0 .. num_blocks - 1] + start = {-# SCC "start_rewrite" #-} + IntSet.fromDistinctAscList [0 .. num_blocks - 1] dep_blocks = {-# SCC "dep_blocks_rewrite" #-} mkDepBlocks dir blocks join = fact_join lattice @@ -203,8 +205,8 @@ fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap -> LabelMap CmmBlock -- ^ Rewritten blocks. -> FactBase f -- ^ Current facts. -> UniqSM (LabelMap CmmBlock, FactBase f) - loop [] !blocks1 !fbase1 = return (blocks1, fbase1) - loop (index : todo1) !blocks1 !fbase1 = do + loop todo !blocks1 !fbase1 + | Just (index, todo1) <- IntSet.minView todo = do -- Note that we use the *original* block here. This is important. -- We're optimistically rewriting blocks even before reaching the fixed -- point, which means that the rewrite might be incorrect. So if the @@ -215,9 +217,10 @@ fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap do_block block fbase1 let blocks2 = mapInsert (entryLabel new_block) new_block blocks1 (todo2, fbase2) = {-# SCC "mapFoldWithKey_rewrite" #-} - mapFoldWithKey + mapFoldlWithKey (updateFact join dep_blocks) (todo1, fbase1) out_facts loop todo2 blocks2 fbase2 + loop _ !blocks1 !fbase1 = return (blocks1, fbase1) {- @@ -263,20 +266,15 @@ we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4. -- | Sort the blocks into the right order for analysis. This means reverse -- postorder for a forward analysis. For the backward one, we simply reverse -- that (see Note [Backward vs forward analysis]). --- --- Note: We're using Hoopl's confusingly named `postorder_dfs_from` but AFAICS --- it returns the *reverse* postorder of the blocks (it visits blocks in the --- postorder and uses (:) to collect them, which gives the reverse of the --- visitation order). sortBlocks :: NonLocal n - => Direction -> [Label] -> LabelMap (Block n C C) -> [Block n C C] -sortBlocks direction entries blockmap = + => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C] +sortBlocks direction entry blockmap = case direction of Fwd -> fwd Bwd -> reverse fwd where - fwd = postorder_dfs_from blockmap entries + fwd = revPostorderFrom blockmap entry -- Note [Backward vs forward analysis] -- @@ -328,11 +326,11 @@ mkDepBlocks Bwd blocks = go blocks 0 mapEmpty updateFact :: JoinFun f -> LabelMap IntSet + -> (IntHeap, FactBase f) -> Label -> f -- out fact -> (IntHeap, FactBase f) - -> (IntHeap, FactBase f) -updateFact fact_join dep_blocks lbl new_fact (todo, fbase) +updateFact fact_join dep_blocks (todo, fbase) lbl new_fact = case lookupFact lbl fbase of Nothing -> -- Note [No old fact] @@ -342,7 +340,7 @@ updateFact fact_join dep_blocks lbl new_fact (todo, fbase) (NotChanged _) -> (todo, fbase) (Changed f) -> let !z = mapInsert lbl f fbase in (changed, z) where - changed = IntSet.foldr insertIntHeap todo $ + changed = todo `IntSet.union` mapFindWithDefault IntSet.empty lbl dep_blocks {- @@ -376,6 +374,11 @@ joinOutFacts lattice nonLocal fact_base = foldl' join (fact_bot lattice) facts , isJust fact ] +joinFacts :: DataflowLattice f -> [f] -> f +joinFacts lattice facts = foldl' join (fact_bot lattice) facts + where + join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new) + -- | Returns the joined facts for each label. mkFactBase :: DataflowLattice f -> [(Label, f)] -> FactBase f mkFactBase lattice = foldl' add mapEmpty @@ -434,19 +437,4 @@ joinBlocksOO (BMiddle n) b = blockCons n b joinBlocksOO b (BMiddle n) = blockSnoc b n joinBlocksOO b1 b2 = BCat b1 b2 --- ----------------------------------------------------------------------------- --- a Heap of Int - --- We should really use a proper Heap here, but my attempts to make --- one have not succeeded in beating the simple ordered list. Another --- alternative is IntSet (using deleteFindMin), but that was also --- slower than the ordered list in my experiments --SDM 25/1/2012 - -type IntHeap = [Int] -- ordered - -insertIntHeap :: Int -> [Int] -> [Int] -insertIntHeap x [] = [x] -insertIntHeap x (y:ys) - | x < y = x : y : ys - | x == y = x : ys - | otherwise = y : insertIntHeap x ys +type IntHeap = IntSet diff --git a/compiler/cmm/Hoopl/Graph.hs b/compiler/cmm/Hoopl/Graph.hs index 87da072458..0142f70c76 100644 --- a/compiler/cmm/Hoopl/Graph.hs +++ b/compiler/cmm/Hoopl/Graph.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE RankNTypes #-} @@ -14,10 +15,13 @@ module Hoopl.Graph , labelsDefined , mapGraph , mapGraphBlocks - , postorder_dfs_from + , revPostorderFrom ) where +import GhcPrelude +import Util + import Hoopl.Label import Hoopl.Block import Hoopl.Collections @@ -49,13 +53,14 @@ emptyBody = mapEmpty bodyList :: Body' block n -> [(Label,block n C C)] bodyList body = mapToList body -addBlock :: NonLocal thing - => thing C C -> LabelMap (thing C C) - -> LabelMap (thing C C) -addBlock b body - | mapMember lbl body = error $ "duplicate label " ++ show lbl ++ " in graph" - | otherwise = mapInsert lbl b body - where lbl = entryLabel b +addBlock + :: (NonLocal block, HasDebugCallStack) + => block C C -> LabelMap (block C C) -> LabelMap (block C C) +addBlock block body = mapAlter add lbl body + where + lbl = entryLabel block + add Nothing = Just block + add _ = error $ "duplicate label " ++ show lbl ++ " in graph" -- --------------------------------------------------------------------------- @@ -107,9 +112,9 @@ labelsDefined :: forall block n e x . NonLocal (block n) => Graph' block n e x -> LabelSet labelsDefined GNil = setEmpty labelsDefined (GUnit{}) = setEmpty -labelsDefined (GMany _ body x) = mapFoldWithKey addEntry (exitLabel x) body - where addEntry :: forall a. ElemOf LabelSet -> a -> LabelSet -> LabelSet - addEntry label _ labels = setInsert label labels +labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body + where addEntry :: forall a. LabelSet -> ElemOf LabelSet -> a -> LabelSet + addEntry labels label _ = setInsert label labels exitLabel :: MaybeO x (block n C O) -> LabelSet exitLabel NothingO = setEmpty exitLabel (JustO b) = setSingleton (entryLabel b) @@ -117,22 +122,10 @@ labelsDefined (GMany _ body x) = mapFoldWithKey addEntry (exitLabel x) body ---------------------------------------------------------------- -class LabelsPtr l where - targetLabels :: l -> [Label] - -instance NonLocal n => LabelsPtr (n e C) where - targetLabels n = successors n - -instance LabelsPtr Label where - targetLabels l = [l] - -instance LabelsPtr LabelSet where - targetLabels = setElems - -instance LabelsPtr l => LabelsPtr [l] where - targetLabels = concatMap targetLabels - --- | This is the most important traversal over this data structure. It drops +-- | Returns a list of blocks reachable from the provided Labels in the reverse +-- postorder. +-- +-- This is the most important traversal over this data structure. It drops -- unreachable code and puts blocks in an order that is good for solving forward -- dataflow problems quickly. The reverse order is good for solving backward -- dataflow problems quickly. The forward order is also reasonably good for @@ -141,59 +134,52 @@ instance LabelsPtr l => LabelsPtr [l] where -- that you would need a more serious analysis, probably based on dominators, to -- identify loop headers. -- --- The ubiquity of 'postorder_dfs' is one reason for the ubiquity of the 'LGraph' --- representation, when for most purposes the plain 'Graph' representation is --- more mathematically elegant (but results in more complicated code). --- --- Here's an easy way to go wrong! Consider +-- For forward analyses we want reverse postorder visitation, consider: -- @ -- A -> [B,C] -- B -> D -- C -> D -- @ --- Then ordinary dfs would give [A,B,D,C] which has a back ref from C to D. --- Better to get [A,B,C,D] - - --- | Traversal: 'postorder_dfs' returns a list of blocks reachable --- from the entry of enterable graph. The entry and exit are *not* included. --- The list has the following property: --- --- Say a "back reference" exists if one of a block's --- control-flow successors precedes it in the output list --- --- Then there are as few back references as possible --- --- The output is suitable for use in --- a forward dataflow problem. For a backward problem, simply reverse --- the list. ('postorder_dfs' is sufficiently tricky to implement that --- one doesn't want to try and maintain both forward and backward --- versions.) - -postorder_dfs_from_except :: forall block e . (NonLocal block, LabelsPtr e) - => LabelMap (block C C) -> e -> LabelSet -> [block C C] -postorder_dfs_from_except blocks b visited = - vchildren (get_children b) (\acc _visited -> acc) [] visited - where - vnode :: block C C -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a - vnode block cont acc visited = - if setMember id visited then - cont acc visited - else - let cont' acc visited = cont (block:acc) visited in - vchildren (get_children block) cont' acc (setInsert id visited) - where id = entryLabel block - vchildren :: forall a. [block C C] -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a - vchildren bs cont acc visited = next bs acc visited - where next children acc visited = - case children of [] -> cont acc visited - (b:bs) -> vnode b (next bs) acc visited - get_children :: forall l. LabelsPtr l => l -> [block C C] - get_children block = foldr add_id [] $ targetLabels block - add_id id rst = case lookupFact id blocks of - Just b -> b : rst - Nothing -> rst - -postorder_dfs_from - :: (NonLocal block, LabelsPtr b) => LabelMap (block C C) -> b -> [block C C] -postorder_dfs_from blocks b = postorder_dfs_from_except blocks b setEmpty +-- Postorder: [D, C, B, A] (or [D, B, C, A]) +-- Reverse postorder: [A, B, C, D] (or [A, C, B, D]) +-- This matters for, e.g., forward analysis, because we want to analyze *both* +-- B and C before we analyze D. +revPostorderFrom + :: forall block. (NonLocal block) + => LabelMap (block C C) -> Label -> [block C C] +revPostorderFrom graph start = go start_worklist setEmpty [] + where + start_worklist = lookup_for_descend start Nil + + -- To compute the postorder we need to "visit" a block (mark as done) + -- *after* visiting all its successors. So we need to know whether we + -- already processed all successors of each block (and @NonLocal@ allows + -- arbitrary many successors). So we use an explicit stack with an extra bit + -- of information: + -- * @ConsTodo@ means to explore the block if it wasn't visited before + -- * @ConsMark@ means that all successors were already done and we can add + -- the block to the result. + -- + -- NOTE: We add blocks to the result list in postorder, but we *prepend* + -- them (i.e., we use @(:)@), which means that the final list is in reverse + -- postorder. + go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C] + go Nil !_ !result = result + go (ConsMark block rest) !wip_or_done !result = + go rest wip_or_done (block : result) + go (ConsTodo block rest) !wip_or_done !result + | entryLabel block `setMember` wip_or_done = go rest wip_or_done result + | otherwise = + let new_worklist = + foldr lookup_for_descend + (ConsMark block rest) + (successors block) + in go new_worklist (setInsert (entryLabel block) wip_or_done) result + + lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C) + lookup_for_descend label wl + | Just b <- mapLookup label graph = ConsTodo b wl + | otherwise = + error $ "Label that doesn't have a block?! " ++ show label + +data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil diff --git a/compiler/cmm/Hoopl/Label.hs b/compiler/cmm/Hoopl/Label.hs index 5ee4f72fc3..7fddbf4c3f 100644 --- a/compiler/cmm/Hoopl/Label.hs +++ b/compiler/cmm/Hoopl/Label.hs @@ -2,32 +2,37 @@ {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + module Hoopl.Label ( Label , LabelMap , LabelSet , FactBase , lookupFact - , uniqueToLbl + , mkHooplLabel ) where +import GhcPrelude + import Outputable -import Hoopl.Collections -- TODO: This should really just use GHC's Unique and Uniq{Set,FM} -import Hoopl.Unique +import Hoopl.Collections import Unique (Uniquable(..)) +import TrieMap + ----------------------------------------------------------------------------- -- Label ----------------------------------------------------------------------------- -newtype Label = Label { lblToUnique :: Unique } +newtype Label = Label { lblToUnique :: Int } deriving (Eq, Ord) -uniqueToLbl :: Unique -> Label -uniqueToLbl = Label +mkHooplLabel :: Int -> Label +mkHooplLabel = Label instance Show Label where show (Label n) = "L" ++ show n @@ -60,9 +65,10 @@ instance IsSet LabelSet where setIntersection (LS x) (LS y) = LS (setIntersection x y) setIsSubsetOf (LS x) (LS y) = setIsSubsetOf x y - setFold k z (LS s) = setFold (k . uniqueToLbl) z s + setFoldl k z (LS s) = setFoldl (\a v -> k a (mkHooplLabel v)) z s + setFoldr k z (LS s) = setFoldr (\v a -> k (mkHooplLabel v) a) z s - setElems (LS s) = map uniqueToLbl (setElems s) + setElems (LS s) = map mkHooplLabel (setElems s) setFromList ks = LS (setFromList (map lblToUnique ks)) ----------------------------------------------------------------------------- @@ -85,22 +91,25 @@ instance IsMap LabelMap where mapInsert (Label k) v (LM m) = LM (mapInsert k v m) mapInsertWith f (Label k) v (LM m) = LM (mapInsertWith f k v m) mapDelete (Label k) (LM m) = LM (mapDelete k m) + mapAlter f (Label k) (LM m) = LM (mapAlter f k m) mapUnion (LM x) (LM y) = LM (mapUnion x y) - mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . uniqueToLbl) x y) + mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . mkHooplLabel) x y) mapDifference (LM x) (LM y) = LM (mapDifference x y) mapIntersection (LM x) (LM y) = LM (mapIntersection x y) mapIsSubmapOf (LM x) (LM y) = mapIsSubmapOf x y mapMap f (LM m) = LM (mapMap f m) - mapMapWithKey f (LM m) = LM (mapMapWithKey (f . uniqueToLbl) m) - mapFold k z (LM m) = mapFold k z m - mapFoldWithKey k z (LM m) = mapFoldWithKey (k . uniqueToLbl) z m + mapMapWithKey f (LM m) = LM (mapMapWithKey (f . mkHooplLabel) m) + mapFoldl k z (LM m) = mapFoldl k z m + mapFoldr k z (LM m) = mapFoldr k z m + mapFoldlWithKey k z (LM m) = + mapFoldlWithKey (\a v -> k a (mkHooplLabel v)) z m mapFilter f (LM m) = LM (mapFilter f m) mapElems (LM m) = mapElems m - mapKeys (LM m) = map uniqueToLbl (mapKeys m) - mapToList (LM m) = [(uniqueToLbl k, v) | (k, v) <- mapToList m] + mapKeys (LM m) = map mkHooplLabel (mapKeys m) + mapToList (LM m) = [(mkHooplLabel k, v) | (k, v) <- mapToList m] mapFromList assocs = LM (mapFromList [(lblToUnique k, v) | (k, v) <- assocs]) mapFromListWith f assocs = LM (mapFromListWith f [(lblToUnique k, v) | (k, v) <- assocs]) @@ -113,6 +122,14 @@ instance Outputable LabelSet where instance Outputable a => Outputable (LabelMap a) where ppr = ppr . mapToList +instance TrieMap LabelMap where + type Key LabelMap = Label + emptyTM = mapEmpty + lookupTM k m = mapLookup k m + alterTM k f m = mapAlter f k m + foldTM k m z = mapFoldr k z m + mapTM f m = mapMap f m + ----------------------------------------------------------------------------- -- FactBase diff --git a/compiler/cmm/Hoopl/Unique.hs b/compiler/cmm/Hoopl/Unique.hs deleted file mode 100644 index f27961bb28..0000000000 --- a/compiler/cmm/Hoopl/Unique.hs +++ /dev/null @@ -1,91 +0,0 @@ -{-# LANGUAGE DeriveFunctor #-} -{-# LANGUAGE DeriveTraversable #-} -{-# LANGUAGE DeriveFoldable #-} -{-# LANGUAGE TypeFamilies #-} -module Hoopl.Unique - ( Unique - , UniqueMap - , UniqueSet - , intToUnique - ) where - -import qualified Data.IntMap as M -import qualified Data.IntSet as S - -import Hoopl.Collections - - ------------------------------------------------------------------------------ --- Unique ------------------------------------------------------------------------------ - -type Unique = Int - -intToUnique :: Int -> Unique -intToUnique = id - ------------------------------------------------------------------------------ --- UniqueSet - -newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show) - -instance IsSet UniqueSet where - type ElemOf UniqueSet = Unique - - setNull (US s) = S.null s - setSize (US s) = S.size s - setMember k (US s) = S.member k s - - setEmpty = US S.empty - setSingleton k = US (S.singleton k) - setInsert k (US s) = US (S.insert k s) - setDelete k (US s) = US (S.delete k s) - - setUnion (US x) (US y) = US (S.union x y) - setDifference (US x) (US y) = US (S.difference x y) - setIntersection (US x) (US y) = US (S.intersection x y) - setIsSubsetOf (US x) (US y) = S.isSubsetOf x y - - setFold k z (US s) = S.foldr k z s - - setElems (US s) = S.elems s - setFromList ks = US (S.fromList ks) - ------------------------------------------------------------------------------ --- UniqueMap - -newtype UniqueMap v = UM (M.IntMap v) - deriving (Eq, Ord, Show, Functor, Foldable, Traversable) - -instance IsMap UniqueMap where - type KeyOf UniqueMap = Unique - - mapNull (UM m) = M.null m - mapSize (UM m) = M.size m - mapMember k (UM m) = M.member k m - mapLookup k (UM m) = M.lookup k m - mapFindWithDefault def k (UM m) = M.findWithDefault def k m - - mapEmpty = UM M.empty - mapSingleton k v = UM (M.singleton k v) - mapInsert k v (UM m) = UM (M.insert k v m) - mapInsertWith f k v (UM m) = UM (M.insertWith f k v m) - mapDelete k (UM m) = UM (M.delete k m) - - mapUnion (UM x) (UM y) = UM (M.union x y) - mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey (f . intToUnique) x y) - mapDifference (UM x) (UM y) = UM (M.difference x y) - mapIntersection (UM x) (UM y) = UM (M.intersection x y) - mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y - - mapMap f (UM m) = UM (M.map f m) - mapMapWithKey f (UM m) = UM (M.mapWithKey (f . intToUnique) m) - mapFold k z (UM m) = M.foldr k z m - mapFoldWithKey k z (UM m) = M.foldrWithKey (k . intToUnique) z m - mapFilter f (UM m) = UM (M.filter f m) - - mapElems (UM m) = M.elems m - mapKeys (UM m) = M.keys m - mapToList (UM m) = M.toList m - mapFromList assocs = UM (M.fromList assocs) - mapFromListWith f assocs = UM (M.fromListWith f assocs) diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 62dfd34da3..70229d067d 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, CPP, GADTs #-} +{-# LANGUAGE BangPatterns, GADTs #-} module MkGraph ( CmmAGraph, CmmAGraphScoped, CgStmt(..) @@ -21,6 +21,8 @@ module MkGraph ) where +import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>) + import BlockId import Cmm import CmmCallConv @@ -35,13 +37,7 @@ import ForeignCall import OrdList import SMRep (ByteOff) import UniqSupply - -import Control.Monad -import Data.List -import Data.Maybe -import Prelude (($),Int,Bool,Eq(..)) -- avoid importing (<*>) - -#include "HsVersions.h" +import Util ----------------------------------------------------------------------------- @@ -185,12 +181,10 @@ mkNop :: CmmAGraph mkNop = nilOL mkComment :: FastString -> CmmAGraph -#if defined(DEBUG) --- SDM: generating all those comments takes time, this saved about 4% for me -mkComment fs = mkMiddle $ CmmComment fs -#else -mkComment _ = nilOL -#endif +mkComment fs + -- SDM: generating all those comments takes time, this saved about 4% for me + | debugIsOn = mkMiddle $ CmmComment fs + | otherwise = nilOL ---------- Assignment and store mkAssign :: CmmReg -> CmmExpr -> CmmAGraph diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 7d36c120b0..a979d49501 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -26,6 +26,8 @@ module PprC ( #include "HsVersions.h" -- Cmm stuff +import GhcPrelude + import BlockId import CLabel import ForeignCall @@ -377,14 +379,10 @@ pprExpr e = case e of CmmReg reg -> pprCastReg reg CmmRegOff reg 0 -> pprCastReg reg - CmmRegOff reg i - | i < 0 && negate_ok -> pprRegOff (char '-') (-i) - | otherwise -> pprRegOff (char '+') i - where - pprRegOff op i' = pprCastReg reg <> op <> int i' - negate_ok = negate (fromIntegral i :: Integer) < - fromIntegral (maxBound::Int) - -- overflow is undefined; see #7620 + -- CmmRegOff is an alias of MO_Add + CmmRegOff reg i -> sdocWithDynFlags $ \dflags -> + pprCastReg reg <> char '+' <> + pprHexVal (fromIntegral i) (wordWidth dflags) CmmMachOp mop args -> pprMachOpApp mop args @@ -493,7 +491,7 @@ pprLit lit = case lit of CmmHighStackMark -> panic "PprC printing high stack mark" CmmLabel clbl -> mkW_ <> pprCLabelAddr clbl CmmLabelOff clbl i -> mkW_ <> pprCLabelAddr clbl <> char '+' <> int i - CmmLabelDiffOff clbl1 _ i + CmmLabelDiffOff clbl1 _ i _ -- non-word widths not supported via C -- WARNING: -- * the lit must occur in the info table clbl2 -- * clbl1 must be an SRT, a slow entry point or a large bitmap @@ -504,7 +502,7 @@ pprLit lit = case lit of pprLit1 :: CmmLit -> SDoc pprLit1 lit@(CmmLabelOff _ _) = parens (pprLit lit) -pprLit1 lit@(CmmLabelDiffOff _ _ _) = parens (pprLit lit) +pprLit1 lit@(CmmLabelDiffOff _ _ _ _) = parens (pprLit lit) pprLit1 lit@(CmmFloat _ _) = parens (pprLit lit) pprLit1 other = pprLit other @@ -536,13 +534,29 @@ pprStatics dflags (CmmStaticLit (CmmInt i W64) : rest) CmmStaticLit (CmmInt q W32) : rest) where r = i .&. 0xffffffff q = i `shiftR` 32 +pprStatics dflags (CmmStaticLit (CmmInt a W32) : + CmmStaticLit (CmmInt b W32) : rest) + | wordWidth dflags == W64 + = if wORDS_BIGENDIAN dflags + then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 32) .|. b) W64) : + rest) + else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 32) .|. a) W64) : + rest) +pprStatics dflags (CmmStaticLit (CmmInt a W16) : + CmmStaticLit (CmmInt b W16) : rest) + | wordWidth dflags == W32 + = if wORDS_BIGENDIAN dflags + then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 16) .|. b) W32) : + rest) + else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 16) .|. a) W32) : + rest) pprStatics dflags (CmmStaticLit (CmmInt _ w) : _) | w /= wordWidth dflags - = panic "pprStatics: cannot emit a non-word-sized static literal" + = pprPanic "pprStatics: cannot emit a non-word-sized static literal" (ppr w) pprStatics dflags (CmmStaticLit lit : rest) = pprLit1 lit : pprStatics dflags rest pprStatics _ (other : _) - = pprPanic "pprWord" (pprStatic other) + = pprPanic "pprStatics: other" (pprStatic other) pprStatic :: CmmStatic -> SDoc pprStatic s = case s of @@ -721,6 +735,8 @@ pprMachOp_for_C mop = case mop of (panic $ "PprC.pprMachOp_for_C: MO_VF_Quot" ++ " should have been handled earlier!") + MO_AlignmentCheck {} -> panic "-falignment-santisation not supported by unregisterised backend" + signedOp :: MachOp -> Bool -- Argument type(s) are signed ints signedOp (MO_S_Quot _) = True signedOp (MO_S_Rem _) = True @@ -759,6 +775,9 @@ pprCallishMachOp_for_C mop MO_F64_Tanh -> text "tanh" MO_F64_Asin -> text "asin" MO_F64_Acos -> text "acos" + MO_F64_Atanh -> text "atanh" + MO_F64_Asinh -> text "asinh" + MO_F64_Acosh -> text "acosh" MO_F64_Atan -> text "atan" MO_F64_Log -> text "log" MO_F64_Exp -> text "exp" @@ -774,6 +793,9 @@ pprCallishMachOp_for_C mop MO_F32_Asin -> text "asinf" MO_F32_Acos -> text "acosf" MO_F32_Atan -> text "atanf" + MO_F32_Asinh -> text "asinhf" + MO_F32_Acosh -> text "acoshf" + MO_F32_Atanh -> text "atanhf" MO_F32_Log -> text "logf" MO_F32_Exp -> text "expf" MO_F32_Sqrt -> text "sqrtf" @@ -782,8 +804,11 @@ pprCallishMachOp_for_C mop MO_Memcpy _ -> text "memcpy" MO_Memset _ -> text "memset" MO_Memmove _ -> text "memmove" + MO_Memcmp _ -> text "memcmp" (MO_BSwap w) -> ptext (sLit $ bSwapLabel w) (MO_PopCnt w) -> ptext (sLit $ popCntLabel w) + (MO_Pext w) -> ptext (sLit $ pextLabel w) + (MO_Pdep w) -> ptext (sLit $ pdepLabel w) (MO_Clz w) -> ptext (sLit $ clzLabel w) (MO_Ctz w) -> ptext (sLit $ ctzLabel w) (MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop) @@ -796,6 +821,7 @@ pprCallishMachOp_for_C mop MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported @@ -1075,7 +1101,7 @@ te_BB block = mapM_ te_Stmt (blockToList mid) >> te_Stmt last te_Lit :: CmmLit -> TE () te_Lit (CmmLabel l) = te_lbl l te_Lit (CmmLabelOff l _) = te_lbl l -te_Lit (CmmLabelDiffOff l1 _ _) = te_lbl l1 +te_Lit (CmmLabelDiffOff l1 _ _ _) = te_lbl l1 te_Lit _ = return () te_Stmt :: CmmNode e x -> TE () diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index dbd4619416..90f26e4247 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -15,8 +15,8 @@ -- -- As such, this should be a well-defined syntax: we want it to look nice. -- Thus, we try wherever possible to use syntax defined in [1], --- "The C-- Reference Manual", http://www.cminusminus.org/. We differ --- slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather -- than C--'s bits8 .. bits64. -- -- We try to ensure that all information available in the abstract @@ -39,6 +39,8 @@ module PprCmm ) where +import GhcPrelude hiding (succ) + import BlockId () import CLabel import Cmm @@ -55,8 +57,6 @@ import PprCore () import BasicTypes import Hoopl.Block import Hoopl.Graph -import Data.List -import Prelude hiding (succ) ------------------------------------------------- -- Outputable instances @@ -108,7 +108,7 @@ pprStackInfo (StackInfo {arg_space=arg_space, updfr_space=updfr_space}) = pprTopInfo :: CmmTopInfo -> SDoc pprTopInfo (TopInfo {info_tbls=info_tbl, stack_info=stack_info}) = - vcat [text "info_tbl: " <> ppr info_tbl, + vcat [text "info_tbls: " <> ppr info_tbl, text "stack_info: " <> ppr stack_info] ---------------------------------------------------------- @@ -141,8 +141,8 @@ pprCmmGraph g = text "{" <> text "offset" $$ nest 2 (vcat $ map ppr blocks) $$ text "}" - where blocks = postorderDfs g - -- postorderDfs has the side-effect of discarding unreachable code, + where blocks = revPostorder g + -- revPostorder has the side-effect of discarding unreachable code, -- so pretty-printed Cmm will omit any unreachable blocks. This can -- sometimes be confusing. @@ -185,9 +185,13 @@ pprNode node = pp_node <+> pp_debug pp_node :: SDoc pp_node = sdocWithDynFlags $ \dflags -> case node of -- label: - CmmEntry id tscope -> ppr id <> colon <+> + CmmEntry id tscope -> lbl <> colon <+> (sdocWithDynFlags $ \dflags -> ppUnless (gopt Opt_SuppressTicks dflags) (text "//" <+> ppr tscope)) + where + lbl = if gopt Opt_SuppressUniques dflags + then text "_lbl_" + else ppr id -- // text CmmComment s -> text "//" <+> ftext s @@ -252,8 +256,8 @@ pprNode node = pp_node <+> pp_debug , ppr l <> semi ] def | Just l <- mbdef = hsep - [ text "default: goto" - , ppr l <> semi + [ text "default:" + , braces (text "goto" <+> ppr l <> semi) ] | otherwise = empty diff --git a/compiler/cmm/PprCmmDecl.hs b/compiler/cmm/PprCmmDecl.hs index ce8fb0dc5d..c4ee6fd068 100644 --- a/compiler/cmm/PprCmmDecl.hs +++ b/compiler/cmm/PprCmmDecl.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ---------------------------------------------------------------------------- -- -- Pretty-printing of common Cmm types @@ -15,8 +13,8 @@ -- -- As such, this should be a well-defined syntax: we want it to look nice. -- Thus, we try wherever possible to use syntax defined in [1], --- "The C-- Reference Manual", http://www.cminusminus.org/. We differ --- slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather -- than C--'s bits8 .. bits64. -- -- We try to ensure that all information available in the abstract @@ -40,6 +38,8 @@ module PprCmmDecl ) where +import GhcPrelude + import PprCmmExpr import Cmm @@ -52,7 +52,6 @@ import System.IO -- Temp Jan08 import SMRep -#include "../includes/rts/storage/FunTypes.h" pprCmms :: (Outputable info, Outputable g) @@ -116,18 +115,15 @@ pprTop (CmmData section ds) = pprInfoTable :: CmmInfoTable -> SDoc pprInfoTable (CmmInfoTable { cit_lbl = lbl, cit_rep = rep , cit_prof = prof_info - , cit_srt = _srt }) - = vcat [ text "label:" <+> ppr lbl - , text "rep:" <> ppr rep + , cit_srt = srt }) + = vcat [ text "label: " <> ppr lbl + , text "rep: " <> ppr rep , case prof_info of NoProfilingInfo -> empty - ProfilingInfo ct cd -> vcat [ text "type:" <+> pprWord8String ct - , text "desc: " <> pprWord8String cd ] ] - -instance Outputable C_SRT where - ppr NoC_SRT = text "_no_srt_" - ppr (C_SRT label off bitmap) - = parens (ppr label <> comma <> ppr off <> comma <> ppr bitmap) + ProfilingInfo ct cd -> + vcat [ text "type: " <> pprWord8String ct + , text "desc: " <> pprWord8String cd ] + , text "srt: " <> ppr srt ] instance Outputable ForeignHint where ppr NoHint = empty diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 77c92407bc..7bf73f1ca6 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -13,8 +13,8 @@ -- -- As such, this should be a well-defined syntax: we want it to look nice. -- Thus, we try wherever possible to use syntax defined in [1], --- "The C-- Reference Manual", http://www.cminusminus.org/. We differ --- slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather -- than C--'s bits8 .. bits64. -- -- We try to ensure that all information available in the abstract @@ -38,9 +38,12 @@ module PprCmmExpr ) where +import GhcPrelude + import CmmExpr import Outputable +import DynFlags import Data.Maybe import Numeric ( fromRat ) @@ -196,7 +199,7 @@ pprLit lit = sdocWithDynFlags $ \dflags -> CmmVec lits -> char '<' <> commafy (map pprLit lits) <> char '>' CmmLabel clbl -> ppr clbl CmmLabelOff clbl i -> ppr clbl <> ppr_offset i - CmmLabelDiffOff clbl1 clbl2 i -> ppr clbl1 <> char '-' + CmmLabelDiffOff clbl1 clbl2 i _ -> ppr clbl1 <> char '-' <> ppr clbl2 <> ppr_offset i CmmBlock id -> ppr id CmmHighStackMark -> text "<highSp>" @@ -224,14 +227,18 @@ pprReg r -- We only print the type of the local reg if it isn't wordRep -- pprLocalReg :: LocalReg -> SDoc -pprLocalReg (LocalReg uniq rep) +pprLocalReg (LocalReg uniq rep) = sdocWithDynFlags $ \dflags -> -- = ppr rep <> char '_' <> ppr uniq -- Temp Jan08 - = char '_' <> ppr uniq <> + char '_' <> pprUnique dflags uniq <> (if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08 -- sigh then dcolon <> ptr <> ppr rep else dcolon <> ptr <> ppr rep) where + pprUnique dflags unique = + if gopt Opt_SuppressUniques dflags + then text "_locVar_" + else ppr unique ptr = empty --if isGcPtrType rep -- then doubleQuotes (text "ptr") diff --git a/compiler/cmm/SMRep.hs b/compiler/cmm/SMRep.hs index d40af4ff1c..743631527e 100644 --- a/compiler/cmm/SMRep.hs +++ b/compiler/cmm/SMRep.hs @@ -9,7 +9,7 @@ module SMRep ( -- * Words and bytes WordOff, ByteOff, wordsToBytes, bytesToWordsRoundUp, - roundUpToWords, + roundUpToWords, roundUpTo, StgWord, fromStgWord, toStgWord, StgHalfWord, fromStgHalfWord, toStgHalfWord, @@ -47,8 +47,7 @@ module SMRep ( pprWord8String, stringToWord8s ) where -#include "../HsVersions.h" -#include "../includes/MachDeps.h" +import GhcPrelude import BasicTypes( ConTagZ ) import DynFlags @@ -77,8 +76,11 @@ type ByteOff = Int -- | Round up the given byte count to the next byte count that's a -- multiple of the machine's word size. roundUpToWords :: DynFlags -> ByteOff -> ByteOff -roundUpToWords dflags n = - (n + (wORD_SIZE dflags - 1)) .&. (complement (wORD_SIZE dflags - 1)) +roundUpToWords dflags n = roundUpTo n (wORD_SIZE dflags) + +-- | Round up @base@ to a multiple of @size@. +roundUpTo :: ByteOff -> ByteOff -> ByteOff +roundUpTo base size = (base + (size - 1)) .&. (complement (size - 1)) -- | Convert the given number of words to a number of bytes. -- @@ -277,10 +279,10 @@ isConRep (HeapRep _ _ _ Constr{}) = True isConRep _ = False isThunkRep :: SMRep -> Bool -isThunkRep (HeapRep _ _ _ Thunk{}) = True +isThunkRep (HeapRep _ _ _ Thunk) = True isThunkRep (HeapRep _ _ _ ThunkSelector{}) = True -isThunkRep (HeapRep _ _ _ BlackHole{}) = True -isThunkRep (HeapRep _ _ _ IndStatic{}) = True +isThunkRep (HeapRep _ _ _ BlackHole) = True +isThunkRep (HeapRep _ _ _ IndStatic) = True isThunkRep _ = False isFunRep :: SMRep -> Bool @@ -384,10 +386,10 @@ heapClosureSizeW _ _ = panic "SMRep.heapClosureSize" closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff closureTypeHdrSize dflags ty = case ty of - Thunk{} -> thunkHdrSize dflags + Thunk -> thunkHdrSize dflags ThunkSelector{} -> thunkHdrSize dflags - BlackHole{} -> thunkHdrSize dflags - IndStatic{} -> thunkHdrSize dflags + BlackHole -> thunkHdrSize dflags + IndStatic -> thunkHdrSize dflags _ -> fixedHdrSizeW dflags -- All thunks use thunkHdrSize, even if they are non-updatable. -- this is because we don't have separate closure types for @@ -446,21 +448,19 @@ rtsClosureType rep HeapRep False 0 2 Fun{} -> FUN_0_2 HeapRep False _ _ Fun{} -> FUN - HeapRep False 1 0 Thunk{} -> THUNK_1_0 - HeapRep False 0 1 Thunk{} -> THUNK_0_1 - HeapRep False 2 0 Thunk{} -> THUNK_2_0 - HeapRep False 1 1 Thunk{} -> THUNK_1_1 - HeapRep False 0 2 Thunk{} -> THUNK_0_2 - HeapRep False _ _ Thunk{} -> THUNK + HeapRep False 1 0 Thunk -> THUNK_1_0 + HeapRep False 0 1 Thunk -> THUNK_0_1 + HeapRep False 2 0 Thunk -> THUNK_2_0 + HeapRep False 1 1 Thunk -> THUNK_1_1 + HeapRep False 0 2 Thunk -> THUNK_0_2 + HeapRep False _ _ Thunk -> THUNK HeapRep False _ _ ThunkSelector{} -> THUNK_SELECTOR - HeapRep True _ _ Fun{} -> FUN_STATIC - HeapRep True _ _ Thunk{} -> THUNK_STATIC - - HeapRep False _ _ BlackHole{} -> BLACKHOLE - - HeapRep False _ _ IndStatic{} -> IND_STATIC + HeapRep True _ _ Fun{} -> FUN_STATIC + HeapRep True _ _ Thunk -> THUNK_STATIC + HeapRep False _ _ BlackHole -> BLACKHOLE + HeapRep False _ _ IndStatic -> IND_STATIC _ -> panic "rtsClosureType" |