summaryrefslogtreecommitdiff
path: root/compiler/cmm
diff options
context:
space:
mode:
authorKavon Farvardin <kavon@farvard.in>2018-09-23 15:29:37 -0500
committerKavon Farvardin <kavon@farvard.in>2018-09-23 15:29:37 -0500
commit84c2ad99582391005b5e873198b15e9e9eb4f78d (patch)
treecaa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/cmm
parent8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff)
parente68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff)
downloadhaskell-wip/T13904.tar.gz
update to current master againwip/T13904
Diffstat (limited to 'compiler/cmm')
-rw-r--r--compiler/cmm/Bitmap.hs5
-rw-r--r--compiler/cmm/BlockId.hs12
-rw-r--r--compiler/cmm/BlockId.hs-boot8
-rw-r--r--compiler/cmm/CLabel.hs490
-rw-r--r--compiler/cmm/Cmm.hs37
-rw-r--r--compiler/cmm/CmmBuildInfoTables.hs1130
-rw-r--r--compiler/cmm/CmmCallConv.hs9
-rw-r--r--compiler/cmm/CmmCommonBlockElim.hs63
-rw-r--r--compiler/cmm/CmmContFlowOpt.hs83
-rw-r--r--compiler/cmm/CmmExpr.hs28
-rw-r--r--compiler/cmm/CmmImplementSwitchPlans.hs2
-rw-r--r--compiler/cmm/CmmInfo.hs69
-rw-r--r--compiler/cmm/CmmLayoutStack.hs88
-rw-r--r--compiler/cmm/CmmLex.x14
-rw-r--r--compiler/cmm/CmmLint.hs2
-rw-r--r--compiler/cmm/CmmLive.hs2
-rw-r--r--compiler/cmm/CmmMachOp.hs25
-rw-r--r--compiler/cmm/CmmMonad.hs9
-rw-r--r--compiler/cmm/CmmNode.hs3
-rw-r--r--compiler/cmm/CmmOpt.hs80
-rw-r--r--compiler/cmm/CmmParse.y79
-rw-r--r--compiler/cmm/CmmPipeline.hs23
-rw-r--r--compiler/cmm/CmmProcPoint.hs46
-rw-r--r--compiler/cmm/CmmSink.hs93
-rw-r--r--compiler/cmm/CmmSwitch.hs79
-rw-r--r--compiler/cmm/CmmType.hs7
-rw-r--r--compiler/cmm/CmmUtils.hs80
-rw-r--r--compiler/cmm/Debug.hs100
-rw-r--r--compiler/cmm/Hoopl/Block.hs1
-rw-r--r--compiler/cmm/Hoopl/Collections.hs87
-rw-r--r--compiler/cmm/Hoopl/Dataflow.hs80
-rw-r--r--compiler/cmm/Hoopl/Graph.hs142
-rw-r--r--compiler/cmm/Hoopl/Label.hs45
-rw-r--r--compiler/cmm/Hoopl/Unique.hs91
-rw-r--r--compiler/cmm/MkGraph.hs22
-rw-r--r--compiler/cmm/PprC.hs52
-rw-r--r--compiler/cmm/PprCmm.hs24
-rw-r--r--compiler/cmm/PprCmmDecl.hs26
-rw-r--r--compiler/cmm/PprCmmExpr.hs17
-rw-r--r--compiler/cmm/SMRep.hs46
40 files changed, 2134 insertions, 1165 deletions
diff --git a/compiler/cmm/Bitmap.hs b/compiler/cmm/Bitmap.hs
index a5cff38a98..e6ac15f4a8 100644
--- a/compiler/cmm/Bitmap.hs
+++ b/compiler/cmm/Bitmap.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP, BangPatterns #-}
+{-# LANGUAGE BangPatterns #-}
--
-- (c) The University of Glasgow 2003-2006
@@ -15,8 +15,7 @@ module Bitmap (
seqBitmap,
) where
-#include "HsVersions.h"
-#include "../includes/MachDeps.h"
+import GhcPrelude
import SMRep
import DynFlags
diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs
index 8f11ad194b..4f4e0e8c53 100644
--- a/compiler/cmm/BlockId.hs
+++ b/compiler/cmm/BlockId.hs
@@ -8,14 +8,15 @@ module BlockId
, blockLbl, infoTblLbl
) where
+import GhcPrelude
+
import CLabel
import IdInfo
import Name
import Unique
import UniqSupply
-import Hoopl.Label (Label, uniqueToLbl)
-import Hoopl.Unique (intToUnique)
+import Hoopl.Label (Label, mkHooplLabel)
----------------------------------------------------------------
--- Block Ids, their environments, and their sets
@@ -32,13 +33,14 @@ compilation unit in which it appears.
type BlockId = Label
mkBlockId :: Unique -> BlockId
-mkBlockId unique = uniqueToLbl $ intToUnique $ getKey unique
+mkBlockId unique = mkHooplLabel $ getKey unique
newBlockId :: MonadUnique m => m BlockId
newBlockId = mkBlockId <$> getUniqueM
blockLbl :: BlockId -> CLabel
-blockLbl label = mkEntryLabel (mkFCallName (getUnique label) "block") NoCafRefs
+blockLbl label = mkLocalBlockLabel (getUnique label)
infoTblLbl :: BlockId -> CLabel
-infoTblLbl label = mkInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs
+infoTblLbl label
+ = mkBlockInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs
diff --git a/compiler/cmm/BlockId.hs-boot b/compiler/cmm/BlockId.hs-boot
new file mode 100644
index 0000000000..3ad4141184
--- /dev/null
+++ b/compiler/cmm/BlockId.hs-boot
@@ -0,0 +1,8 @@
+module BlockId (BlockId, mkBlockId) where
+
+import Hoopl.Label (Label)
+import Unique (Unique)
+
+type BlockId = Label
+
+mkBlockId :: Unique -> BlockId
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs
index 62c8037e9c..12c3357e47 100644
--- a/compiler/cmm/CLabel.hs
+++ b/compiler/cmm/CLabel.hs
@@ -15,30 +15,22 @@ module CLabel (
mkClosureLabel,
mkSRTLabel,
- mkTopSRTLabel,
mkInfoTableLabel,
mkEntryLabel,
- mkSlowEntryLabel,
- mkConEntryLabel,
mkRednCountsLabel,
mkConInfoTableLabel,
- mkLargeSRTLabel,
mkApEntryLabel,
mkApInfoTableLabel,
mkClosureTableLabel,
mkBytesLabel,
+ mkLocalBlockLabel,
mkLocalClosureLabel,
mkLocalInfoTableLabel,
- mkLocalEntryLabel,
- mkLocalConEntryLabel,
- mkLocalConInfoTableLabel,
mkLocalClosureTableLabel,
- mkReturnPtLabel,
- mkReturnInfoLabel,
- mkAltLabel,
- mkDefaultLabel,
+ mkBlockInfoTableLabel,
+
mkBitmapLabel,
mkStringLitLabel,
@@ -53,18 +45,18 @@ module CLabel (
mkBHUpdInfoLabel,
mkIndStaticInfoLabel,
mkMainCapabilityLabel,
- mkMAP_FROZEN_infoLabel,
- mkMAP_FROZEN0_infoLabel,
+ mkMAP_FROZEN_CLEAN_infoLabel,
+ mkMAP_FROZEN_DIRTY_infoLabel,
mkMAP_DIRTY_infoLabel,
- mkSMAP_FROZEN_infoLabel,
- mkSMAP_FROZEN0_infoLabel,
+ mkSMAP_FROZEN_CLEAN_infoLabel,
+ mkSMAP_FROZEN_DIRTY_infoLabel,
mkSMAP_DIRTY_infoLabel,
- mkEMPTY_MVAR_infoLabel,
+ mkBadAlignmentLabel,
mkArrWords_infoLabel,
+ mkSRTInfoLabel,
mkTopTickyCtrLabel,
mkCAFBlackHoleInfoTableLabel,
- mkCAFBlackHoleEntryLabel,
mkRtsPrimOpLabel,
mkRtsSlowFastTickyCtrLabel,
@@ -102,21 +94,28 @@ module CLabel (
mkHpcTicksLabel,
+ -- * Predicates
hasCAF,
- needsCDecl, maybeAsmTemp, externallyVisibleCLabel,
+ needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel,
isMathFun,
isCFunctionLabel, isGcPtrLabel, labelDynamic,
+ isLocalCLabel,
-- * Conversions
- toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, toRednCountsLbl, hasHaskellName,
+ toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName,
- pprCLabel
+ pprCLabel,
+ isInfoTableLabel,
+ isConInfoTableLabel
) where
#include "HsVersions.h"
+import GhcPrelude
+
import IdInfo
import BasicTypes
+import {-# SOURCE #-} BlockId (BlockId, mkBlockId)
import Packages
import Module
import Name
@@ -135,8 +134,8 @@ import PprCore ( {- instances -} )
-- -----------------------------------------------------------------------------
-- The CLabel type
-{-
- | CLabel is an abstract type that supports the following operations:
+{- |
+ 'CLabel' is an abstract type that supports the following operations:
- Pretty printing
@@ -155,6 +154,25 @@ import PprCore ( {- instances -} )
more than one declaration for any given label).
- Converting an info table label into an entry label.
+
+ CLabel usage is a bit messy in GHC as they are used in a number of different
+ contexts:
+
+ - By the C-- AST to identify labels
+
+ - By the unregisterised C code generator ("PprC") for naming functions (hence
+ the name 'CLabel')
+
+ - By the native and LLVM code generators to identify labels
+
+ For extra fun, each of these uses a slightly different subset of constructors
+ (e.g. 'AsmTempLabel' and 'AsmTempDerivedLabel' are used only in the NCG and
+ LLVM backends).
+
+ In general, we use 'IdLabel' to represent Haskell things early in the
+ pipeline. However, later optimization passes will often represent blocks they
+ create with 'LocalBlockLabel' where there is no obvious 'Name' to hang off the
+ label.
-}
data CLabel
@@ -177,6 +195,14 @@ data CLabel
| RtsLabel
RtsLabelInfo
+ -- | A label associated with a block. These aren't visible outside of the
+ -- compilation unit in which they are defined. These are generally used to
+ -- name blocks produced by Cmm-to-Cmm passes and the native code generator,
+ -- where we don't have a 'Name' to associate the label to and therefore can't
+ -- use 'IdLabel'.
+ | LocalBlockLabel
+ {-# UNPACK #-} !Unique
+
-- | A 'C' (or otherwise foreign) label.
--
| ForeignLabel
@@ -190,14 +216,13 @@ data CLabel
FunctionOrData
- -- | A family of labels related to a particular case expression.
- | CaseLabel
- {-# UNPACK #-} !Unique -- Unique says which case expression
- CaseLabelInfo
-
+ -- | Local temporary label used for native (or LLVM) code generation; must not
+ -- appear outside of these contexts. Use primarily for debug information
| AsmTempLabel
{-# UNPACK #-} !Unique
+ -- | A label \"derived\" from another 'CLabel' by the addition of a suffix.
+ -- Must not occur outside of the NCG or LLVM code generators.
| AsmTempDerivedLabel
CLabel
FastString -- suffix
@@ -229,10 +254,7 @@ data CLabel
| HpcTicksLabel Module
-- | Static reference table
- | SRTLabel !Unique
-
- -- | Label of an StgLargeSRT
- | LargeSRTLabel
+ | SRTLabel
{-# UNPACK #-} !Unique
-- | A bitmap (function or case return)
@@ -256,14 +278,12 @@ instance Ord CLabel where
compare b1 b2 `thenCmp`
compare c1 c2
compare (RtsLabel a1) (RtsLabel a2) = compare a1 a2
+ compare (LocalBlockLabel u1) (LocalBlockLabel u2) = nonDetCmpUnique u1 u2
compare (ForeignLabel a1 b1 c1 d1) (ForeignLabel a2 b2 c2 d2) =
compare a1 a2 `thenCmp`
compare b1 b2 `thenCmp`
compare c1 c2 `thenCmp`
compare d1 d2
- compare (CaseLabel u1 a1) (CaseLabel u2 a2) =
- nonDetCmpUnique u1 u2 `thenCmp`
- compare a1 a2
compare (AsmTempLabel u1) (AsmTempLabel u2) = nonDetCmpUnique u1 u2
compare (AsmTempDerivedLabel a1 b1) (AsmTempDerivedLabel a2 b2) =
compare a1 a2 `thenCmp`
@@ -284,8 +304,6 @@ instance Ord CLabel where
compare a1 a2
compare (SRTLabel u1) (SRTLabel u2) =
nonDetCmpUnique u1 u2
- compare (LargeSRTLabel u1) (LargeSRTLabel u2) =
- nonDetCmpUnique u1 u2
compare (LargeBitmapLabel u1) (LargeBitmapLabel u2) =
nonDetCmpUnique u1 u2
compare IdLabel{} _ = LT
@@ -294,10 +312,10 @@ instance Ord CLabel where
compare _ CmmLabel{} = GT
compare RtsLabel{} _ = LT
compare _ RtsLabel{} = GT
+ compare LocalBlockLabel{} _ = LT
+ compare _ LocalBlockLabel{} = GT
compare ForeignLabel{} _ = LT
compare _ ForeignLabel{} = GT
- compare CaseLabel{} _ = LT
- compare _ CaseLabel{} = GT
compare AsmTempLabel{} _ = LT
compare _ AsmTempLabel{} = GT
compare AsmTempDerivedLabel{} _ = LT
@@ -318,8 +336,6 @@ instance Ord CLabel where
compare _ HpcTicksLabel{} = GT
compare SRTLabel{} _ = LT
compare _ SRTLabel{} = GT
- compare LargeSRTLabel{} _ = LT
- compare _ LargeSRTLabel{} = GT
-- | Record where a foreign label is stored.
data ForeignLabelSource
@@ -350,7 +366,8 @@ data ForeignLabelSource
pprDebugCLabel :: CLabel -> SDoc
pprDebugCLabel lbl
= case lbl of
- IdLabel{} -> ppr lbl <> (parens $ text "IdLabel")
+ IdLabel _ _ info-> ppr lbl <> (parens $ text "IdLabel"
+ <> whenPprDebug (text ":" <> text (show info)))
CmmLabel pkg _name _info
-> ppr lbl <> (parens $ text "CmmLabel" <+> ppr pkg)
@@ -367,9 +384,6 @@ pprDebugCLabel lbl
data IdLabelInfo
= Closure -- ^ Label for closure
- | SRT -- ^ Static reference table (TODO: could be removed
- -- with the old code generator, but might be needed
- -- when we implement the New SRT Plan)
| InfoTable -- ^ Info tables for closures; always read-only
| Entry -- ^ Entry point
| Slow -- ^ Slow entry point
@@ -386,16 +400,11 @@ data IdLabelInfo
| Bytes -- ^ Content of a string literal. See
-- Note [Bytes label].
+ | BlockInfoTable -- ^ Like LocalInfoTable but for a proc-point block
+ -- instead of a closure entry-point.
+ -- See Note [Proc-point local block entry-point].
- deriving (Eq, Ord)
-
-
-data CaseLabelInfo
- = CaseReturnPt
- | CaseReturnInfo
- | CaseAlt ConTag
- | CaseDefault
- deriving (Eq, Ord)
+ deriving (Eq, Ord, Show)
data RtsLabelInfo
@@ -443,73 +452,88 @@ data DynamicLinkerLabelInfo
-- Constructing IdLabels
-- These are always local:
-mkSlowEntryLabel :: Name -> CafInfo -> CLabel
-mkSlowEntryLabel name c = IdLabel name c Slow
-mkTopSRTLabel :: Unique -> CLabel
-mkTopSRTLabel u = SRTLabel u
+mkSRTLabel :: Unique -> CLabel
+mkSRTLabel u = SRTLabel u
-mkSRTLabel :: Name -> CafInfo -> CLabel
mkRednCountsLabel :: Name -> CLabel
-mkSRTLabel name c = IdLabel name c SRT
mkRednCountsLabel name =
IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE]
-- These have local & (possibly) external variants:
mkLocalClosureLabel :: Name -> CafInfo -> CLabel
mkLocalInfoTableLabel :: Name -> CafInfo -> CLabel
-mkLocalEntryLabel :: Name -> CafInfo -> CLabel
mkLocalClosureTableLabel :: Name -> CafInfo -> CLabel
mkLocalClosureLabel name c = IdLabel name c Closure
mkLocalInfoTableLabel name c = IdLabel name c LocalInfoTable
-mkLocalEntryLabel name c = IdLabel name c LocalEntry
mkLocalClosureTableLabel name c = IdLabel name c ClosureTable
mkClosureLabel :: Name -> CafInfo -> CLabel
mkInfoTableLabel :: Name -> CafInfo -> CLabel
mkEntryLabel :: Name -> CafInfo -> CLabel
mkClosureTableLabel :: Name -> CafInfo -> CLabel
-mkLocalConInfoTableLabel :: CafInfo -> Name -> CLabel
-mkLocalConEntryLabel :: CafInfo -> Name -> CLabel
mkConInfoTableLabel :: Name -> CafInfo -> CLabel
mkBytesLabel :: Name -> CLabel
mkClosureLabel name c = IdLabel name c Closure
mkInfoTableLabel name c = IdLabel name c InfoTable
mkEntryLabel name c = IdLabel name c Entry
mkClosureTableLabel name c = IdLabel name c ClosureTable
-mkLocalConInfoTableLabel c con = IdLabel con c ConInfoTable
-mkLocalConEntryLabel c con = IdLabel con c ConEntry
mkConInfoTableLabel name c = IdLabel name c ConInfoTable
mkBytesLabel name = IdLabel name NoCafRefs Bytes
-mkConEntryLabel :: Name -> CafInfo -> CLabel
-mkConEntryLabel name c = IdLabel name c ConEntry
+mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel
+mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable
+ -- See Note [Proc-point local block entry-point].
-- Constructing Cmm Labels
mkDirty_MUT_VAR_Label, mkSplitMarkerLabel, mkUpdInfoLabel,
mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel,
- mkMAP_FROZEN_infoLabel, mkMAP_FROZEN0_infoLabel, mkMAP_DIRTY_infoLabel,
- mkEMPTY_MVAR_infoLabel, mkTopTickyCtrLabel,
- mkCAFBlackHoleInfoTableLabel, mkCAFBlackHoleEntryLabel,
- mkArrWords_infoLabel, mkSMAP_FROZEN_infoLabel, mkSMAP_FROZEN0_infoLabel,
- mkSMAP_DIRTY_infoLabel :: CLabel
+ mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel,
+ mkMAP_DIRTY_infoLabel,
+ mkArrWords_infoLabel,
+ mkTopTickyCtrLabel,
+ mkCAFBlackHoleInfoTableLabel,
+ mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
+ mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel
mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
mkSplitMarkerLabel = CmmLabel rtsUnitId (fsLit "__stg_split_marker") CmmCode
mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo
mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo
mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo
mkMainCapabilityLabel = CmmLabel rtsUnitId (fsLit "MainCapability") CmmData
-mkMAP_FROZEN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN") CmmInfo
-mkMAP_FROZEN0_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN0") CmmInfo
+mkMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
+mkMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
mkMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_DIRTY") CmmInfo
-mkEMPTY_MVAR_infoLabel = CmmLabel rtsUnitId (fsLit "stg_EMPTY_MVAR") CmmInfo
mkTopTickyCtrLabel = CmmLabel rtsUnitId (fsLit "top_ct") CmmData
mkCAFBlackHoleInfoTableLabel = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE") CmmInfo
-mkCAFBlackHoleEntryLabel = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE") CmmEntry
mkArrWords_infoLabel = CmmLabel rtsUnitId (fsLit "stg_ARR_WORDS") CmmInfo
-mkSMAP_FROZEN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN") CmmInfo
-mkSMAP_FROZEN0_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN0") CmmInfo
+mkSMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
+mkSMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
mkSMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo
+mkBadAlignmentLabel = CmmLabel rtsUnitId (fsLit "stg_badAlignment") CmmEntry
+
+mkSRTInfoLabel :: Int -> CLabel
+mkSRTInfoLabel n = CmmLabel rtsUnitId lbl CmmInfo
+ where
+ lbl =
+ case n of
+ 1 -> fsLit "stg_SRT_1"
+ 2 -> fsLit "stg_SRT_2"
+ 3 -> fsLit "stg_SRT_3"
+ 4 -> fsLit "stg_SRT_4"
+ 5 -> fsLit "stg_SRT_5"
+ 6 -> fsLit "stg_SRT_6"
+ 7 -> fsLit "stg_SRT_7"
+ 8 -> fsLit "stg_SRT_8"
+ 9 -> fsLit "stg_SRT_9"
+ 10 -> fsLit "stg_SRT_10"
+ 11 -> fsLit "stg_SRT_11"
+ 12 -> fsLit "stg_SRT_12"
+ 13 -> fsLit "stg_SRT_13"
+ 14 -> fsLit "stg_SRT_14"
+ 15 -> fsLit "stg_SRT_15"
+ 16 -> fsLit "stg_SRT_16"
+ _ -> panic "mkSRTInfoLabel"
-----
mkCmmInfoLabel, mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel,
@@ -524,6 +548,8 @@ mkCmmCodeLabel pkg str = CmmLabel pkg str CmmCode
mkCmmDataLabel pkg str = CmmLabel pkg str CmmData
mkCmmClosureLabel pkg str = CmmLabel pkg str CmmClosure
+mkLocalBlockLabel :: Unique -> CLabel
+mkLocalBlockLabel u = LocalBlockLabel u
-- Constructing RtsLabels
mkRtsPrimOpLabel :: PrimOp -> CLabel
@@ -592,13 +618,24 @@ isSomeRODataLabel (IdLabel _ _ ClosureTable) = True
isSomeRODataLabel (IdLabel _ _ ConInfoTable) = True
isSomeRODataLabel (IdLabel _ _ InfoTable) = True
isSomeRODataLabel (IdLabel _ _ LocalInfoTable) = True
--- static reference tables defined in haskell (.hs)
-isSomeRODataLabel (IdLabel _ _ SRT) = True
-isSomeRODataLabel (SRTLabel _) = True
+isSomeRODataLabel (IdLabel _ _ BlockInfoTable) = True
-- info table defined in cmm (.cmm)
isSomeRODataLabel (CmmLabel _ _ CmmInfo) = True
isSomeRODataLabel _lbl = False
+-- | Whether label is points to some kind of info table
+isInfoTableLabel :: CLabel -> Bool
+isInfoTableLabel (IdLabel _ _ InfoTable) = True
+isInfoTableLabel (IdLabel _ _ LocalInfoTable) = True
+isInfoTableLabel (IdLabel _ _ ConInfoTable) = True
+isInfoTableLabel (IdLabel _ _ BlockInfoTable) = True
+isInfoTableLabel _ = False
+
+-- | Whether label is points to constructor info table
+isConInfoTableLabel :: CLabel -> Bool
+isConInfoTableLabel (IdLabel _ _ ConInfoTable) = True
+isConInfoTableLabel _ = False
+
-- | Get the label size field from a ForeignLabel
foreignLabelStdcallInfo :: CLabel -> Maybe Int
foreignLabelStdcallInfo (ForeignLabel _ info _ _) = info
@@ -606,22 +643,9 @@ foreignLabelStdcallInfo _lbl = Nothing
-- Constructing Large*Labels
-mkLargeSRTLabel :: Unique -> CLabel
mkBitmapLabel :: Unique -> CLabel
-mkLargeSRTLabel uniq = LargeSRTLabel uniq
mkBitmapLabel uniq = LargeBitmapLabel uniq
-
--- Constructin CaseLabels
-mkReturnPtLabel :: Unique -> CLabel
-mkReturnInfoLabel :: Unique -> CLabel
-mkAltLabel :: Unique -> ConTag -> CLabel
-mkDefaultLabel :: Unique -> CLabel
-mkReturnPtLabel uniq = CaseLabel uniq CaseReturnPt
-mkReturnInfoLabel uniq = CaseLabel uniq CaseReturnInfo
-mkAltLabel uniq tag = CaseLabel uniq (CaseAlt tag)
-mkDefaultLabel uniq = CaseLabel uniq CaseDefault
-
-- Constructing Cost Center Labels
mkCCLabel :: CostCentre -> CLabel
mkCCSLabel :: CostCentreStack -> CLabel
@@ -682,31 +706,29 @@ toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure
toClosureLbl l = pprPanic "toClosureLbl" (ppr l)
toSlowEntryLbl :: CLabel -> CLabel
+toSlowEntryLbl (IdLabel n _ BlockInfoTable)
+ = pprPanic "toSlowEntryLbl" (ppr n)
toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow
toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l)
toEntryLbl :: CLabel -> CLabel
toEntryLbl (IdLabel n c LocalInfoTable) = IdLabel n c LocalEntry
toEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry
+toEntryLbl (IdLabel n _ BlockInfoTable) = mkLocalBlockLabel (nameUnique n)
+ -- See Note [Proc-point local block entry-point].
toEntryLbl (IdLabel n c _) = IdLabel n c Entry
-toEntryLbl (CaseLabel n CaseReturnInfo) = CaseLabel n CaseReturnPt
toEntryLbl (CmmLabel m str CmmInfo) = CmmLabel m str CmmEntry
toEntryLbl (CmmLabel m str CmmRetInfo) = CmmLabel m str CmmRet
toEntryLbl l = pprPanic "toEntryLbl" (ppr l)
toInfoLbl :: CLabel -> CLabel
-toInfoLbl (IdLabel n c Entry) = IdLabel n c InfoTable
toInfoLbl (IdLabel n c LocalEntry) = IdLabel n c LocalInfoTable
toInfoLbl (IdLabel n c ConEntry) = IdLabel n c ConInfoTable
toInfoLbl (IdLabel n c _) = IdLabel n c InfoTable
-toInfoLbl (CaseLabel n CaseReturnPt) = CaseLabel n CaseReturnInfo
toInfoLbl (CmmLabel m str CmmEntry) = CmmLabel m str CmmInfo
toInfoLbl (CmmLabel m str CmmRet) = CmmLabel m str CmmRetInfo
toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l)
-toRednCountsLbl :: CLabel -> Maybe CLabel
-toRednCountsLbl = fmap mkRednCountsLabel . hasHaskellName
-
hasHaskellName :: CLabel -> Maybe Name
hasHaskellName (IdLabel n _ _) = Just n
hasHaskellName _ = Nothing
@@ -747,10 +769,9 @@ needsCDecl :: CLabel -> Bool
-- don't bother declaring Bitmap labels, we always make sure
-- they are defined before use.
needsCDecl (SRTLabel _) = True
-needsCDecl (LargeSRTLabel _) = False
needsCDecl (LargeBitmapLabel _) = False
needsCDecl (IdLabel _ _ _) = True
-needsCDecl (CaseLabel _ _) = True
+needsCDecl (LocalBlockLabel _) = True
needsCDecl (StringLitLabel _) = False
needsCDecl (AsmTempLabel _) = False
@@ -773,11 +794,11 @@ needsCDecl (DynamicLinkerLabel {}) = panic "needsCDecl DynamicLinkerLabel"
needsCDecl PicBaseLabel = panic "needsCDecl PicBaseLabel"
needsCDecl (DeadStripPreventer {}) = panic "needsCDecl DeadStripPreventer"
--- | If a label is a local temporary used for native code generation
--- then return just its unique, otherwise nothing.
-maybeAsmTemp :: CLabel -> Maybe Unique
-maybeAsmTemp (AsmTempLabel uq) = Just uq
-maybeAsmTemp _ = Nothing
+-- | If a label is a local block label then return just its 'BlockId', otherwise
+-- 'Nothing'.
+maybeLocalBlockLabel :: CLabel -> Maybe BlockId
+maybeLocalBlockLabel (LocalBlockLabel uq) = Just $ mkBlockId uq
+maybeLocalBlockLabel _ = Nothing
-- | Check whether a label corresponds to a C function that has
@@ -880,11 +901,11 @@ math_funs = mkUniqSet [
-- externally visible if it has to be declared as exported
-- in the .o file's symbol table; that is, made non-static.
externallyVisibleCLabel :: CLabel -> Bool -- not C "static"
-externallyVisibleCLabel (CaseLabel _ _) = False
externallyVisibleCLabel (StringLitLabel _) = False
externallyVisibleCLabel (AsmTempLabel _) = False
externallyVisibleCLabel (AsmTempDerivedLabel _ _)= False
externallyVisibleCLabel (RtsLabel _) = True
+externallyVisibleCLabel (LocalBlockLabel _) = False
externallyVisibleCLabel (CmmLabel _ _ _) = True
externallyVisibleCLabel (ForeignLabel{}) = True
externallyVisibleCLabel (IdLabel name _ info) = isExternalName name && externallyVisibleIdLabel info
@@ -894,14 +915,13 @@ externallyVisibleCLabel (DynamicLinkerLabel _ _) = False
externallyVisibleCLabel (HpcTicksLabel _) = True
externallyVisibleCLabel (LargeBitmapLabel _) = False
externallyVisibleCLabel (SRTLabel _) = False
-externallyVisibleCLabel (LargeSRTLabel _) = False
externallyVisibleCLabel (PicBaseLabel {}) = panic "externallyVisibleCLabel PicBaseLabel"
externallyVisibleCLabel (DeadStripPreventer {}) = panic "externallyVisibleCLabel DeadStripPreventer"
externallyVisibleIdLabel :: IdLabelInfo -> Bool
-externallyVisibleIdLabel SRT = False
externallyVisibleIdLabel LocalInfoTable = False
externallyVisibleIdLabel LocalEntry = False
+externallyVisibleIdLabel BlockInfoTable = False
externallyVisibleIdLabel _ = True
-- -----------------------------------------------------------------------------
@@ -928,6 +948,7 @@ isGcPtrLabel lbl = case labelType lbl of
-- | Work out the general type of data at the address of this label
-- whether it be code, data, or static GC object.
labelType :: CLabel -> CLabelType
+labelType (IdLabel _ _ info) = idInfoLabelType info
labelType (CmmLabel _ _ CmmData) = DataLabel
labelType (CmmLabel _ _ CmmClosure) = GcPtrLabel
labelType (CmmLabel _ _ CmmCode) = CodeLabel
@@ -939,20 +960,28 @@ labelType (CmmLabel _ _ CmmRet) = CodeLabel
labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel
labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel
labelType (RtsLabel (RtsApFast _)) = CodeLabel
-labelType (CaseLabel _ CaseReturnInfo) = DataLabel
-labelType (CaseLabel _ _) = CodeLabel
+labelType (RtsLabel _) = DataLabel
+labelType (LocalBlockLabel _) = CodeLabel
labelType (SRTLabel _) = DataLabel
-labelType (LargeSRTLabel _) = DataLabel
-labelType (LargeBitmapLabel _) = DataLabel
labelType (ForeignLabel _ _ _ IsFunction) = CodeLabel
-labelType (IdLabel _ _ info) = idInfoLabelType info
-labelType _ = DataLabel
+labelType (ForeignLabel _ _ _ IsData) = DataLabel
+labelType (AsmTempLabel _) = panic "labelType(AsmTempLabel)"
+labelType (AsmTempDerivedLabel _ _) = panic "labelType(AsmTempDerivedLabel)"
+labelType (StringLitLabel _) = DataLabel
+labelType (CC_Label _) = DataLabel
+labelType (CCS_Label _) = DataLabel
+labelType (DynamicLinkerLabel _ _) = DataLabel -- Is this right?
+labelType PicBaseLabel = DataLabel
+labelType (DeadStripPreventer _) = DataLabel
+labelType (HpcTicksLabel _) = DataLabel
+labelType (LargeBitmapLabel _) = DataLabel
idInfoLabelType :: IdLabelInfo -> CLabelType
idInfoLabelType info =
case info of
InfoTable -> DataLabel
LocalInfoTable -> DataLabel
+ BlockInfoTable -> DataLabel
Closure -> GcPtrLabel
ConInfoTable -> DataLabel
ClosureTable -> DataLabel
@@ -962,28 +991,48 @@ idInfoLabelType info =
-- -----------------------------------------------------------------------------
--- Does a CLabel need dynamic linkage?
+-- | Is a 'CLabel' defined in the current module being compiled?
+--
+-- Sometimes we can optimise references within a compilation unit in ways that
+-- we couldn't for inter-module references. This provides a conservative
+-- estimate of whether a 'CLabel' lives in the current module.
+isLocalCLabel :: Module -> CLabel -> Bool
+isLocalCLabel this_mod lbl =
+ case lbl of
+ IdLabel name _ _
+ | isInternalName name -> True
+ | otherwise -> nameModule name == this_mod
+ LocalBlockLabel _ -> True
+ _ -> False
+
+-- -----------------------------------------------------------------------------
+
+-- | Does a 'CLabel' need dynamic linkage?
+--
-- When referring to data in code, we need to know whether
-- that data resides in a DLL or not. [Win32 only.]
-- @labelDynamic@ returns @True@ if the label is located
-- in a DLL, be it a data reference or not.
-
labelDynamic :: DynFlags -> Module -> CLabel -> Bool
labelDynamic dflags this_mod lbl =
case lbl of
-- is the RTS in a DLL or not?
- RtsLabel _ -> (WayDyn `elem` ways dflags) && (this_pkg /= rtsUnitId)
+ RtsLabel _ ->
+ (gopt Opt_ExternalDynamicRefs dflags) && (this_pkg /= rtsUnitId)
- IdLabel n _ _ -> isDllName dflags this_mod n
+ IdLabel n _ _ ->
+ isDllName dflags this_mod n
-- When compiling in the "dyn" way, each package is to be linked into
-- its own shared library.
CmmLabel pkg _ _
| os == OSMinGW32 ->
- (WayDyn `elem` ways dflags) && (this_pkg /= pkg)
+ (gopt Opt_ExternalDynamicRefs dflags) && (this_pkg /= pkg)
| otherwise ->
- True
+ gopt Opt_ExternalDynamicRefs dflags
+
+ LocalBlockLabel _ -> False
ForeignLabel _ _ source _ ->
if os == OSMinGW32
@@ -999,14 +1048,15 @@ labelDynamic dflags this_mod lbl =
-- When compiling in the "dyn" way, each package is to be
-- linked into its own DLL.
ForeignLabelInPackage pkgId ->
- (WayDyn `elem` ways dflags) && (this_pkg /= pkgId)
+ (gopt Opt_ExternalDynamicRefs dflags) && (this_pkg /= pkgId)
else -- On Mac OS X and on ELF platforms, false positives are OK,
-- so we claim that all foreign imports come from dynamic
-- libraries
True
- HpcTicksLabel m -> (WayDyn `elem` ways dflags) && this_mod /= m
+ HpcTicksLabel m ->
+ (gopt Opt_ExternalDynamicRefs dflags) && this_mod /= m
-- Note that DynamicLinkerLabels do NOT require dynamic linking themselves.
_ -> False
@@ -1028,7 +1078,6 @@ internal names. <type> is one of the following:
info Info table
srt Static reference table
- srtd Static reference table descriptor
entry Entry code (function, closure)
slow Slow entry code (if any)
ret Direct return address
@@ -1082,6 +1131,18 @@ Note [Bytes label]
~~~~~~~~~~~~~~~~~~
For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which
points to a static data block containing the content of the literal.
+
+Note [Proc-point local block entry-points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A label for a proc-point local block entry-point has no "_entry" suffix. With
+`infoTblLbl` we derive an info table label from a proc-point block ID. If
+we convert such an info table label into an entry label we must produce
+the label without an "_entry" suffix. So an info table label records
+the fact that it was derived from a block ID in `IdLabelInfo` as
+`BlockInfoTable`.
+
+The info table label and the local block label are both local labels
+and are not externally visible.
-}
instance Outputable CLabel where
@@ -1089,19 +1150,19 @@ instance Outputable CLabel where
pprCLabel :: Platform -> CLabel -> SDoc
+pprCLabel _ (LocalBlockLabel u)
+ = tempLabelPrefixOrUnderscore <> pprUniqueAlways u
+
pprCLabel platform (AsmTempLabel u)
- | cGhcWithNativeCodeGen == "YES"
- = getPprStyle $ \ sty ->
- if asmStyle sty then
- ptext (asmTempLabelPrefix platform) <> pprUniqueAlways u
- else
- char '_' <> pprUniqueAlways u
+ | not (platformUnregisterised platform)
+ = tempLabelPrefixOrUnderscore <> pprUniqueAlways u
pprCLabel platform (AsmTempDerivedLabel l suf)
| cGhcWithNativeCodeGen == "YES"
= ptext (asmTempLabelPrefix platform)
- <> case l of AsmTempLabel u -> pprUniqueAlways u
- _other -> pprCLabel platform l
+ <> case l of AsmTempLabel u -> pprUniqueAlways u
+ LocalBlockLabel u -> pprUniqueAlways u
+ _other -> pprCLabel platform l
<> ftext suf
pprCLabel platform (DynamicLinkerLabel info lbl)
@@ -1114,7 +1175,15 @@ pprCLabel _ PicBaseLabel
pprCLabel platform (DeadStripPreventer lbl)
| cGhcWithNativeCodeGen == "YES"
- = pprCLabel platform lbl <> text "_dsp"
+ =
+ {-
+ `lbl` can be temp one but we need to ensure that dsp label will stay
+ in the final binary so we prepend non-temp prefix ("dsp_") and
+ optional `_` (underscore) because this is how you mark non-temp symbols
+ on some platforms (Darwin)
+ -}
+ maybe_underscore $ text "dsp_"
+ <> pprCLabel platform lbl <> text "_dsp"
pprCLabel _ (StringLitLabel u)
| cGhcWithNativeCodeGen == "YES"
@@ -1144,29 +1213,24 @@ pprCLbl :: CLabel -> SDoc
pprCLbl (StringLitLabel u)
= pprUniqueAlways u <> text "_str"
-pprCLbl (CaseLabel u CaseReturnPt)
- = hcat [pprUniqueAlways u, text "_ret"]
-pprCLbl (CaseLabel u CaseReturnInfo)
- = hcat [pprUniqueAlways u, text "_info"]
-pprCLbl (CaseLabel u (CaseAlt tag))
- = hcat [pprUniqueAlways u, pp_cSEP, int tag, text "_alt"]
-pprCLbl (CaseLabel u CaseDefault)
- = hcat [pprUniqueAlways u, text "_dflt"]
-
pprCLbl (SRTLabel u)
- = pprUniqueAlways u <> pp_cSEP <> text "srt"
+ = tempLabelPrefixOrUnderscore <> pprUniqueAlways u <> pp_cSEP <> text "srt"
-pprCLbl (LargeSRTLabel u) = pprUniqueAlways u <> pp_cSEP <> text "srtd"
-pprCLbl (LargeBitmapLabel u) = text "b" <> pprUniqueAlways u <> pp_cSEP <> text "btm"
+pprCLbl (LargeBitmapLabel u) =
+ tempLabelPrefixOrUnderscore
+ <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm"
-- Some bitsmaps for tuple constructors have a numeric tag (e.g. '7')
-- until that gets resolved we'll just force them to start
--- with a letter so the label will be legal assmbly code.
+-- with a letter so the label will be legal assembly code.
pprCLbl (CmmLabel _ str CmmCode) = ftext str
pprCLbl (CmmLabel _ str CmmData) = ftext str
pprCLbl (CmmLabel _ str CmmPrimCall) = ftext str
+pprCLbl (LocalBlockLabel u) =
+ tempLabelPrefixOrUnderscore <> text "blk_" <> pprUniqueAlways u
+
pprCLbl (RtsLabel (RtsApFast str)) = ftext str <> text "_fast"
pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset))
@@ -1229,7 +1293,8 @@ pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat))
pprCLbl (ForeignLabel str _ _ _)
= ftext str
-pprCLbl (IdLabel name _cafs flavor) = ppr name <> ppIdFlavor flavor
+pprCLbl (IdLabel name _cafs flavor) =
+ internalNamePrefix name <> ppr name <> ppIdFlavor flavor
pprCLbl (CC_Label cc) = ppr cc
pprCLbl (CCS_Label ccs) = ppr ccs
@@ -1247,7 +1312,6 @@ ppIdFlavor :: IdLabelInfo -> SDoc
ppIdFlavor x = pp_cSEP <>
(case x of
Closure -> text "closure"
- SRT -> text "srt"
InfoTable -> text "info"
LocalInfoTable -> text "info"
Entry -> text "entry"
@@ -1258,6 +1322,7 @@ ppIdFlavor x = pp_cSEP <>
ConInfoTable -> text "con_info"
ClosureTable -> text "closure_tbl"
Bytes -> text "bytes"
+ BlockInfoTable -> text "info"
)
@@ -1272,6 +1337,24 @@ instance Outputable ForeignLabelSource where
ForeignLabelInThisPackage -> parens $ text "this package"
ForeignLabelInExternalPackage -> parens $ text "external package"
+internalNamePrefix :: Name -> SDoc
+internalNamePrefix name = getPprStyle $ \ sty ->
+ if codeStyle sty && isRandomGenerated then
+ sdocWithPlatform $ \platform ->
+ ptext (asmTempLabelPrefix platform)
+ else
+ empty
+ where
+ isRandomGenerated = not $ isExternalName name
+
+tempLabelPrefixOrUnderscore :: SDoc
+tempLabelPrefixOrUnderscore = sdocWithPlatform $ \platform ->
+ getPprStyle $ \ sty ->
+ if asmStyle sty then
+ ptext (asmTempLabelPrefix platform)
+ else
+ char '_'
+
-- -----------------------------------------------------------------------------
-- Machine-dependent knowledge about labels.
@@ -1285,53 +1368,62 @@ asmTempLabelPrefix platform = case platformOS platform of
_ -> sLit ".L"
pprDynamicLinkerAsmLabel :: Platform -> DynamicLinkerLabelInfo -> CLabel -> SDoc
-pprDynamicLinkerAsmLabel platform dllInfo lbl
- = if platformOS platform == OSDarwin
- then if platformArch platform == ArchX86_64
- then case dllInfo of
- CodeStub -> char 'L' <> ppr lbl <> text "$stub"
- SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
- GotSymbolPtr -> ppr lbl <> text "@GOTPCREL"
- GotSymbolOffset -> ppr lbl
- else case dllInfo of
- CodeStub -> char 'L' <> ppr lbl <> text "$stub"
- SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
- _ -> panic "pprDynamicLinkerAsmLabel"
-
- else if platformOS platform == OSAIX
- then case dllInfo of
- SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention
- _ -> panic "pprDynamicLinkerAsmLabel"
-
- else if osElfTarget (platformOS platform)
- then if platformArch platform == ArchPPC
- then case dllInfo of
- CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
- ppr lbl <> text "+32768@plt"
- SymbolPtr -> text ".LC_" <> ppr lbl
- _ -> panic "pprDynamicLinkerAsmLabel"
- else if platformArch platform == ArchX86_64
- then case dllInfo of
- CodeStub -> ppr lbl <> text "@plt"
- GotSymbolPtr -> ppr lbl <> text "@gotpcrel"
- GotSymbolOffset -> ppr lbl
- SymbolPtr -> text ".LC_" <> ppr lbl
- else if platformArch platform == ArchPPC_64 ELF_V1
- || platformArch platform == ArchPPC_64 ELF_V2
- then case dllInfo of
- GotSymbolPtr -> text ".LC_" <> ppr lbl
- <> text "@toc"
- GotSymbolOffset -> ppr lbl
- SymbolPtr -> text ".LC_" <> ppr lbl
- _ -> panic "pprDynamicLinkerAsmLabel"
- else case dllInfo of
- CodeStub -> ppr lbl <> text "@plt"
- SymbolPtr -> text ".LC_" <> ppr lbl
- GotSymbolPtr -> ppr lbl <> text "@got"
- GotSymbolOffset -> ppr lbl <> text "@gotoff"
- else if platformOS platform == OSMinGW32
- then case dllInfo of
- SymbolPtr -> text "__imp_" <> ppr lbl
- _ -> panic "pprDynamicLinkerAsmLabel"
- else panic "pprDynamicLinkerAsmLabel"
-
+pprDynamicLinkerAsmLabel platform dllInfo lbl =
+ case platformOS platform of
+ OSDarwin
+ | platformArch platform == ArchX86_64 ->
+ case dllInfo of
+ CodeStub -> char 'L' <> ppr lbl <> text "$stub"
+ SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
+ GotSymbolPtr -> ppr lbl <> text "@GOTPCREL"
+ GotSymbolOffset -> ppr lbl
+ | otherwise ->
+ case dllInfo of
+ CodeStub -> char 'L' <> ppr lbl <> text "$stub"
+ SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr"
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ OSAIX ->
+ case dllInfo of
+ SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ _ | osElfTarget (platformOS platform) -> elfLabel
+
+ OSMinGW32 ->
+ case dllInfo of
+ SymbolPtr -> text "__imp_" <> ppr lbl
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ _ -> panic "pprDynamicLinkerAsmLabel"
+ where
+ elfLabel
+ | platformArch platform == ArchPPC
+ = case dllInfo of
+ CodeStub -> -- See Note [.LCTOC1 in PPC PIC code]
+ ppr lbl <> text "+32768@plt"
+ SymbolPtr -> text ".LC_" <> ppr lbl
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ | platformArch platform == ArchX86_64
+ = case dllInfo of
+ CodeStub -> ppr lbl <> text "@plt"
+ GotSymbolPtr -> ppr lbl <> text "@gotpcrel"
+ GotSymbolOffset -> ppr lbl
+ SymbolPtr -> text ".LC_" <> ppr lbl
+
+ | platformArch platform == ArchPPC_64 ELF_V1
+ || platformArch platform == ArchPPC_64 ELF_V2
+ = case dllInfo of
+ GotSymbolPtr -> text ".LC_" <> ppr lbl
+ <> text "@toc"
+ GotSymbolOffset -> ppr lbl
+ SymbolPtr -> text ".LC_" <> ppr lbl
+ _ -> panic "pprDynamicLinkerAsmLabel"
+
+ | otherwise
+ = case dllInfo of
+ CodeStub -> ppr lbl <> text "@plt"
+ SymbolPtr -> text ".LC_" <> ppr lbl
+ GotSymbolPtr -> ppr lbl <> text "@got"
+ GotSymbolOffset -> ppr lbl <> text "@gotoff"
diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs
index dbd54236f5..eb34618e38 100644
--- a/compiler/cmm/Cmm.hs
+++ b/compiler/cmm/Cmm.hs
@@ -1,5 +1,5 @@
-- Cmm representations using Hoopl's Graph CmmNode e x.
-{-# LANGUAGE CPP, GADTs #-}
+{-# LANGUAGE GADTs #-}
module Cmm (
-- * Cmm top-level datatypes
@@ -18,7 +18,6 @@ module Cmm (
-- * Info Tables
CmmTopInfo(..), CmmStackInfo(..), CmmInfoTable(..), topInfoTable,
ClosureTypeInfo(..),
- C_SRT(..), needsSRT,
ProfilingInfo(..), ConstrDescription,
-- * Statements, expressions and types
@@ -26,6 +25,10 @@ module Cmm (
module CmmExpr,
) where
+import GhcPrelude
+
+import Id
+import CostCentre
import CLabel
import BlockId
import CmmNode
@@ -39,8 +42,6 @@ import Outputable
import Data.Word ( Word8 )
-#include "HsVersions.h"
-
-----------------------------------------------------------------------------
-- Cmm, GenCmm
-----------------------------------------------------------------------------
@@ -138,24 +139,28 @@ data CmmInfoTable
cit_lbl :: CLabel, -- Info table label
cit_rep :: SMRep,
cit_prof :: ProfilingInfo,
- cit_srt :: C_SRT
+ cit_srt :: Maybe CLabel, -- empty, or a closure address
+ cit_clo :: Maybe (Id, CostCentreStack)
+ -- Just (id,ccs) <=> build a static closure later
+ -- Nothing <=> don't build a static closure
+ --
+ -- Static closures for FUNs and THUNKs are *not* generated by
+ -- the code generator, because we might want to add SRT
+ -- entries to them later (for FUNs at least; THUNKs are
+ -- treated the same for consistency). See Note [SRTs] in
+ -- CmmBuildInfoTables, in particular the [FUN] optimisation.
+ --
+ -- This is strictly speaking not a part of the info table that
+ -- will be finally generated, but it's the only convenient
+ -- place to convey this information from the code generator to
+ -- where we build the static closures in
+ -- CmmBuildInfoTables.doSRTs.
}
data ProfilingInfo
= NoProfilingInfo
| ProfilingInfo [Word8] [Word8] -- closure_type, closure_desc
--- C_SRT is what StgSyn.SRT gets translated to...
--- we add a label for the table, and expect only the 'offset/length' form
-
-data C_SRT = NoC_SRT
- | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-}
- deriving (Eq)
-
-needsSRT :: C_SRT -> Bool
-needsSRT NoC_SRT = False
-needsSRT (C_SRT _ _ _) = True
-
-----------------------------------------------------------------------------
-- Static Data
-----------------------------------------------------------------------------
diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs
index 5dd8ee4ef2..a8f89a1a9c 100644
--- a/compiler/cmm/CmmBuildInfoTables.hs
+++ b/compiler/cmm/CmmBuildInfoTables.hs
@@ -1,49 +1,140 @@
-{-# LANGUAGE BangPatterns, CPP, GADTs #-}
+{-# LANGUAGE GADTs, BangPatterns, RecordWildCards,
+ GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections #-}
module CmmBuildInfoTables
- ( CAFSet, CAFEnv, cafAnal
- , doSRTs, TopSRT, emptySRT, isEmptySRT, srtToData )
-where
+ ( CAFSet, CAFEnv, cafAnal
+ , doSRTs, ModuleSRTInfo, emptySRT
+ ) where
-#include "HsVersions.h"
+import GhcPrelude hiding (succ)
+import Id
+import BlockId
import Hoopl.Block
import Hoopl.Graph
import Hoopl.Label
import Hoopl.Collections
import Hoopl.Dataflow
+import Module
+import Platform
import Digraph
-import Bitmap
import CLabel
import PprCmmDecl ()
import Cmm
import CmmUtils
-import CmmInfo
-import Data.List
import DynFlags
import Maybes
import Outputable
import SMRep
import UniqSupply
-import Util
+import CostCentre
+import StgCmmHeap
import PprCmm()
+import Control.Monad
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
-import Control.Monad
+import Data.Tuple
+import Control.Monad.Trans.State
+import Control.Monad.Trans.Class
+
+
+{- Note [SRTs]
+
+SRTs are the mechanism by which the garbage collector can determine
+the live CAFs in the program.
+
+Representation
+^^^^^^^^^^^^^^
+
++------+
+| info |
+| | +-----+---+---+---+
+| -------->|SRT_2| | | | | 0 |
+|------| +-----+-|-+-|-+---+
+| | | |
+| code | | |
+| | v v
+
+An SRT is simply an object in the program's data segment. It has the
+same representation as a static constructor. There are 16
+pre-compiled SRT info tables: stg_SRT_1_info, .. stg_SRT_16_info,
+representing SRT objects with 1-16 pointers, respectively.
+
+The entries of an SRT object point to static closures, which are either
+- FUN_STATIC, THUNK_STATIC or CONSTR
+- Another SRT (actually just a CONSTR)
+
+The final field of the SRT is the static link field, used by the
+garbage collector to chain together static closures that it visits and
+to determine whether a static closure has been visited or not. (see
+Note [STATIC_LINK fields])
+
+By traversing the transitive closure of an SRT, the GC will reach all
+of the CAFs that are reachable from the code associated with this SRT.
+
+If we need to create an SRT with more than 16 entries, we build a
+chain of SRT objects with all but the last having 16 entries.
+
++-----+---+- -+---+---+
+|SRT16| | | | | | 0 |
++-----+-|-+- -+-|-+---+
+ | |
+ v v
+ +----+---+---+---+
+ |SRT2| | | | | 0 |
+ +----+-|-+-|-+---+
+ | |
+ | |
+ v v
-import qualified Prelude as P
-import Prelude hiding (succ)
+Referring to an SRT from the info table
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-foldSet :: (a -> b -> b) -> b -> Set a -> b
-foldSet = Set.foldr
+The following things have SRTs:
------------------------------------------------------------------------
--- SRTs
+- Static functions (FUN)
+- Static thunks (THUNK), ie. CAFs
+- Continuations (RET_SMALL, etc.)
-{- EXAMPLE
+In each case, the info table points to the SRT.
+
+- info->srt is zero if there's no SRT, otherwise:
+- info->srt == 1 and info->f.srt_offset points to the SRT
+
+e.g. for a FUN with an SRT:
+
+StgFunInfoTable +------+
+ info->f.srt_offset | ------------> offset to SRT object
+StgStdInfoTable +------+
+ info->layout.ptrs | ... |
+ info->layout.nptrs | ... |
+ info->srt | 1 |
+ info->type | ... |
+ |------|
+
+On x86_64, we optimise the info table representation further. The
+offset to the SRT can be stored in 32 bits (all code lives within a
+2GB region in x86_64's small memory model), so we can save a word in
+the info table by storing the srt_offset in the srt field, which is
+half a word.
+
+On x86_64 with TABLES_NEXT_TO_CODE (except on MachO, due to #15169):
+
+- info->srt is zero if there's no SRT, otherwise:
+- info->srt is an offset from the info pointer to the SRT object
+
+StgStdInfoTable +------+
+ info->layout.ptrs | |
+ info->layout.nptrs | |
+ info->srt | ------------> offset to SRT object
+ |------|
+
+
+EXAMPLE
+^^^^^^^
f = \x. ... g ...
where
@@ -65,29 +156,254 @@ CmmDecls. e.g. for f_entry, we might end up with
where f1_ret is a return point, and f2_proc is a proc-point. We have
a CAFSet for each of these CmmDecls, let's suppose they are
- [ f_entry{g_closure}, f1_ret{g_closure}, f2_proc{} ]
- [ g_entry{h_closure, c1_closure} ]
+ [ f_entry{g_info}, f1_ret{g_info}, f2_proc{} ]
+ [ g_entry{h_info, c1_closure} ]
[ h_entry{c2_closure} ]
-Now, note that we cannot use g_closure and h_closure in an SRT,
-because there are no static closures corresponding to these functions.
-So we have to flatten out the structure, replacing g_closure and
-h_closure with their contents:
+Next, we make an SRT for each of these functions:
- [ f_entry{c2_closure, c1_closure}, f1_ret{c2_closure,c1_closure}, f2_proc{} ]
- [ g_entry{c2_closure, c1_closure} ]
- [ h_entry{c2_closure} ]
+ f_srt : [g_info]
+ g_srt : [h_info, c1_closure]
+ h_srt : [c2_closure]
+
+Now, for g_info and h_info, we want to refer to the SRTs for g and h
+respectively, which we'll label g_srt and h_srt:
+
+ f_srt : [g_srt]
+ g_srt : [h_srt, c1_closure]
+ h_srt : [c2_closure]
+
+Now, when an SRT has a single entry, we don't actually generate an SRT
+closure for it, instead we just replace references to it with its
+single element. So, since h_srt == c2_closure, we have
+
+ f_srt : [g_srt]
+ g_srt : [c2_closure, c1_closure]
+ h_srt : [c2_closure]
+
+and the only SRT closure we generate is
-This is what flattenCAFSets is doing.
+ g_srt = SRT_2 [c2_closure, c1_closure]
+
+Optimisations
+^^^^^^^^^^^^^
+
+To reduce the code size overhead and the cost of traversing SRTs in
+the GC, we want to simplify SRTs where possible. We therefore apply
+the following optimisations. Each has a [keyword]; search for the
+keyword in the code below to see where the optimisation is
+implemented.
+
+1. [Inline] we never create an SRT with a single entry, instead we
+ point to the single entry directly from the info table.
+
+ i.e. instead of
+
+ +------+
+ | info |
+ | | +-----+---+---+
+ | -------->|SRT_1| | | 0 |
+ |------| +-----+-|-+---+
+ | | |
+ | code | |
+ | | v
+ C
+
+ we can point directly to the closure:
+
+ +------+
+ | info |
+ | |
+ | -------->C
+ |------|
+ | |
+ | code |
+ | |
+
+
+ Furthermore, the SRT for any code that refers to this info table
+ can point directly to C.
+
+ The exception to this is when we're doing dynamic linking. In that
+ case, if the closure is not locally defined then we can't point to
+ it directly from the info table, because this is the text section
+ which cannot contain runtime relocations. In this case we skip this
+ optimisation and generate the singleton SRT, becase SRTs are in the
+ data section and *can* have relocatable references.
+
+2. [FUN] A static function closure can also be an SRT, we simply put
+ the SRT entries as fields in the static closure. This makes a lot
+ of sense: the static references are just like the free variables of
+ the FUN closure.
+
+ i.e. instead of
+
+ f_closure:
+ +-----+---+
+ | | | 0 |
+ +- |--+---+
+ | +------+
+ | | info | f_srt:
+ | | | +-----+---+---+---+
+ | | -------->|SRT_2| | | | + 0 |
+ `----------->|------| +-----+-|-+-|-+---+
+ | | | |
+ | code | | |
+ | | v v
+
+
+ We can generate:
+
+ f_closure:
+ +-----+---+---+---+
+ | | | | | | | 0 |
+ +- |--+-|-+-|-+---+
+ | | | +------+
+ | v v | info |
+ | | |
+ | | 0 |
+ `----------->|------|
+ | |
+ | code |
+ | |
+
+
+ (note: we can't do this for THUNKs, because the thunk gets
+ overwritten when it is entered, so we wouldn't be able to share
+ this SRT with other info tables that want to refer to it (see
+ [Common] below). FUNs are immutable so don't have this problem.)
+
+3. [Common] Identical SRTs can be commoned up.
+
+4. [Filter] If an SRT A refers to an SRT B and a closure C, and B also
+ refers to C (perhaps transitively), then we can omit the reference
+ to C from A.
+
+
+Note that there are many other optimisations that we could do, but
+aren't implemented. In general, we could omit any reference from an
+SRT if everything reachable from it is also reachable from the other
+fields in the SRT. Our [Filter] optimisation is a special case of
+this.
+
+Another opportunity we don't exploit is this:
+
+A = {X,Y,Z}
+B = {Y,Z}
+C = {X,B}
+
+Here we could use C = {A} and therefore [Inline] C = A.
-}
------------------------------------------------------------------------
--- Finding the CAFs used by a procedure
+-- ---------------------------------------------------------------------
+{- Note [Invalid optimisation: shortcutting]
+
+You might think that if we have something like
+
+A's SRT = {B}
+B's SRT = {X}
+
+that we could replace the reference to B in A's SRT with X.
+
+A's SRT = {X}
+B's SRT = {X}
-type CAFSet = Set CLabel
+and thereby perhaps save a little work at runtime, because we don't
+have to visit B.
+
+But this is NOT valid.
+
+Consider these cases:
+
+0. B can't be a constructor, because constructors don't have SRTs
+
+1. B is a CAF. This is the easy one. Obviously we want A's SRT to
+ point to B, so that it keeps B alive.
+
+2. B is a function. This is the tricky one. The reason we can't
+shortcut in this case is that we aren't allowed to resurrect static
+objects.
+
+== How does this cause a problem? ==
+
+The particular case that cropped up when we tried this was #15544.
+- A is a thunk
+- B is a static function
+- X is a CAF
+- suppose we GC when A is alive, and B is not otherwise reachable.
+- B is "collected", meaning that it doesn't make it onto the static
+ objects list during this GC, but nothing bad happens yet.
+- Next, suppose we enter A, and then call B. (remember that A refers to B)
+ At the entry point to B, we GC. This puts B on the stack, as part of the
+ RET_FUN stack frame that gets pushed when we GC at a function entry point.
+- This GC will now reach B
+- But because B was previous "collected", it breaks the assumption
+ that static objects are never resurrected. See Note [STATIC_LINK
+ fields] in rts/sm/Storage.h for why this is bad.
+- In practice, the GC thinks that B has already been visited, and so
+ doesn't visit X, and catastrophe ensues.
+
+== Isn't this caused by the RET_FUN business? ==
+
+Maybe, but could you prove that RET_FUN is the only way that
+resurrection can occur?
+
+So, no shortcutting.
+-}
+
+-- ---------------------------------------------------------------------
+-- Label types
+
+-- Labels that come from cafAnal can be:
+-- - _closure labels for static functions or CAFs
+-- - _info labels for dynamic functions, thunks, or continuations
+-- - _entry labels for functions or thunks
+--
+-- Meanwhile the labels on top-level blocks are _entry labels.
+--
+-- To put everything in the same namespace we convert all labels to
+-- closure labels using toClosureLbl. Note that some of these
+-- labels will not actually exist; that's ok because we're going to
+-- map them to SRTEntry later, which ranges over labels that do exist.
+--
+newtype CAFLabel = CAFLabel CLabel
+ deriving (Eq,Ord,Outputable)
+
+type CAFSet = Set CAFLabel
type CAFEnv = LabelMap CAFSet
+mkCAFLabel :: CLabel -> CAFLabel
+mkCAFLabel lbl = CAFLabel (toClosureLbl lbl)
+
+-- This is a label that we can put in an SRT. It *must* be a closure label,
+-- pointing to either a FUN_STATIC, THUNK_STATIC, or CONSTR.
+newtype SRTEntry = SRTEntry CLabel
+ deriving (Eq, Ord, Outputable)
+
+-- ---------------------------------------------------------------------
+-- CAF analysis
+
+-- |
+-- For each code block:
+-- - collect the references reachable from this code block to FUN,
+-- THUNK or RET labels for which hasCAF == True
+--
+-- This gives us a `CAFEnv`: a mapping from code block to sets of labels
+--
+cafAnal
+ :: LabelSet -- The blocks representing continuations, ie. those
+ -- that will get RET info tables. These labels will
+ -- get their own SRTs, so we don't aggregate CAFs from
+ -- references to these labels, we just use the label.
+ -> CLabel -- The top label of the proc
+ -> CmmGraph
+ -> CAFEnv
+cafAnal contLbls topLbl cmmGraph =
+ analyzeCmmBwd cafLattice
+ (cafTransfers contLbls (g_entry cmmGraph) topLbl) cmmGraph mapEmpty
+
+
cafLattice :: DataflowLattice CAFSet
cafLattice = DataflowLattice Set.empty add
where
@@ -95,290 +411,486 @@ cafLattice = DataflowLattice Set.empty add
let !new' = old `Set.union` new
in changedIf (Set.size new' > Set.size old) new'
-cafTransfers :: TransferFun CAFSet
-cafTransfers (BlockCC eNode middle xNode) fBase =
- let joined = cafsInNode xNode $! joinOutFacts cafLattice xNode fBase
+
+cafTransfers :: LabelSet -> Label -> CLabel -> TransferFun CAFSet
+cafTransfers contLbls entry topLbl
+ (BlockCC eNode middle xNode) fBase =
+ let joined = cafsInNode xNode $! live'
!result = foldNodesBwdOO cafsInNode middle joined
+
+ facts = mapMaybe successorFact (successors xNode)
+ live' = joinFacts cafLattice facts
+
+ successorFact s
+ -- If this is a loop back to the entry, we can refer to the
+ -- entry label.
+ | s == entry = Just (add topLbl Set.empty)
+ -- If this is a continuation, we want to refer to the
+ -- SRT for the continuation's info table
+ | s `setMember` contLbls
+ = Just (Set.singleton (mkCAFLabel (infoTblLbl s)))
+ -- Otherwise, takes the CAF references from the destination
+ | otherwise
+ = lookupFact s fBase
+
+ cafsInNode :: CmmNode e x -> CAFSet -> CAFSet
+ cafsInNode node set = foldExpDeep addCaf node set
+
+ addCaf expr !set =
+ case expr of
+ CmmLit (CmmLabel c) -> add c set
+ CmmLit (CmmLabelOff c _) -> add c set
+ CmmLit (CmmLabelDiffOff c1 c2 _ _) -> add c1 $! add c2 set
+ _ -> set
+ add l s | hasCAF l = Set.insert (mkCAFLabel l) s
+ | otherwise = s
+
in mapSingleton (entryLabel eNode) result
-cafsInNode :: CmmNode e x -> CAFSet -> CAFSet
-cafsInNode node set = foldExpDeep addCaf node set
- where
- addCaf expr !set =
- case expr of
- CmmLit (CmmLabel c) -> add c set
- CmmLit (CmmLabelOff c _) -> add c set
- CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $! add c2 set
- _ -> set
- add l s | hasCAF l = Set.insert (toClosureLbl l) s
- | otherwise = s
-
--- | An analysis to find live CAFs.
-cafAnal :: CmmGraph -> CAFEnv
-cafAnal cmmGraph = analyzeCmmBwd cafLattice cafTransfers cmmGraph mapEmpty
-
------------------------------------------------------------------------
--- Building the SRTs
-
--- Description of the SRT for a given module.
--- Note that this SRT may grow as we greedily add new CAFs to it.
-data TopSRT = TopSRT { lbl :: CLabel
- , next_elt :: Int -- the next entry in the table
- , rev_elts :: [CLabel]
- , elt_map :: Map CLabel Int }
- -- map: CLabel -> its last entry in the table
-instance Outputable TopSRT where
- ppr (TopSRT lbl next elts eltmap) =
- text "TopSRT:" <+> ppr lbl
- <+> ppr next
- <+> ppr elts
- <+> ppr eltmap
-
-emptySRT :: MonadUnique m => m TopSRT
-emptySRT =
- do top_lbl <- getUniqueM >>= \ u -> return $ mkTopSRTLabel u
- return TopSRT { lbl = top_lbl, next_elt = 0, rev_elts = [], elt_map = Map.empty }
-
-isEmptySRT :: TopSRT -> Bool
-isEmptySRT srt = null (rev_elts srt)
-
-cafMember :: TopSRT -> CLabel -> Bool
-cafMember srt lbl = Map.member lbl (elt_map srt)
-
-cafOffset :: TopSRT -> CLabel -> Maybe Int
-cafOffset srt lbl = Map.lookup lbl (elt_map srt)
-
-addCAF :: CLabel -> TopSRT -> TopSRT
-addCAF caf srt =
- srt { next_elt = last + 1
- , rev_elts = caf : rev_elts srt
- , elt_map = Map.insert caf last (elt_map srt) }
- where last = next_elt srt
-
-srtToData :: TopSRT -> CmmGroup
-srtToData srt = [CmmData sec (Statics (lbl srt) tbl)]
- where tbl = map (CmmStaticLit . CmmLabel) (reverse (rev_elts srt))
- sec = Section RelocatableReadOnlyData (lbl srt)
-
--- Once we have found the CAFs, we need to do two things:
--- 1. Build a table of all the CAFs used in the procedure.
--- 2. Compute the C_SRT describing the subset of CAFs live at each procpoint.
---
--- When building the local view of the SRT, we first make sure that all the CAFs are
--- in the SRT. Then, if the number of CAFs is small enough to fit in a bitmap,
--- we make sure they're all close enough to the bottom of the table that the
--- bitmap will be able to cover all of them.
-buildSRT :: DynFlags -> TopSRT -> CAFSet -> UniqSM (TopSRT, Maybe CmmDecl, C_SRT)
-buildSRT dflags topSRT cafs =
- do let
- -- For each label referring to a function f without a static closure,
- -- replace it with the CAFs that are reachable from f.
- sub_srt topSRT localCafs =
- let cafs = Set.elems localCafs
- mkSRT topSRT =
- do localSRTs <- procpointSRT dflags (lbl topSRT) (elt_map topSRT) cafs
- return (topSRT, localSRTs)
- in if cafs `lengthExceeds` maxBmpSize dflags then
- mkSRT (foldl add_if_missing topSRT cafs)
- else -- make sure all the cafs are near the bottom of the srt
- mkSRT (add_if_too_far topSRT cafs)
- add_if_missing srt caf =
- if cafMember srt caf then srt else addCAF caf srt
- -- If a CAF is more than maxBmpSize entries from the young end of the
- -- SRT, then we add it to the SRT again.
- -- (Note: Not in the SRT => infinitely far.)
- add_if_too_far srt@(TopSRT {elt_map = m}) cafs =
- add srt (sortBy farthestFst cafs)
- where
- farthestFst x y = case (Map.lookup x m, Map.lookup y m) of
- (Nothing, Nothing) -> EQ
- (Nothing, Just _) -> LT
- (Just _, Nothing) -> GT
- (Just d, Just d') -> compare d' d
- add srt [] = srt
- add srt@(TopSRT {next_elt = next}) (caf : rst) =
- case cafOffset srt caf of
- Just ix -> if next - ix > maxBmpSize dflags then
- add (addCAF caf srt) rst
- else srt
- Nothing -> add (addCAF caf srt) rst
- (topSRT, subSRTs) <- sub_srt topSRT cafs
- let (sub_tbls, blockSRTs) = subSRTs
- return (topSRT, sub_tbls, blockSRTs)
-
--- Construct an SRT bitmap.
--- Adapted from simpleStg/SRT.hs, which expects Id's.
-procpointSRT :: DynFlags -> CLabel -> Map CLabel Int -> [CLabel] ->
- UniqSM (Maybe CmmDecl, C_SRT)
-procpointSRT _ _ _ [] =
- return (Nothing, NoC_SRT)
-procpointSRT dflags top_srt top_table entries =
- do (top, srt) <- bitmap `seq` to_SRT dflags top_srt offset len bitmap
- return (top, srt)
- where
- ints = map (expectJust "constructSRT" . flip Map.lookup top_table) entries
- sorted_ints = sort ints
- offset = head sorted_ints
- bitmap_entries = map (subtract offset) sorted_ints
- len = P.last bitmap_entries + 1
- bitmap = intsToBitmap dflags len bitmap_entries
-
-maxBmpSize :: DynFlags -> Int
-maxBmpSize dflags = widthInBits (wordWidth dflags) `div` 2
-
--- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT.
-to_SRT :: DynFlags -> CLabel -> Int -> Int -> Bitmap -> UniqSM (Maybe CmmDecl, C_SRT)
-to_SRT dflags top_srt off len bmp
- | len > maxBmpSize dflags || bmp == [toStgWord dflags (fromStgHalfWord (srtEscape dflags))]
- = do id <- getUniqueM
- let srt_desc_lbl = mkLargeSRTLabel id
- section = Section RelocatableReadOnlyData srt_desc_lbl
- tbl = CmmData section $
- Statics srt_desc_lbl $ map CmmStaticLit
- ( cmmLabelOffW dflags top_srt off
- : mkWordCLit dflags (fromIntegral len)
- : map (mkStgWordCLit dflags) bmp)
- return (Just tbl, C_SRT srt_desc_lbl 0 (srtEscape dflags))
- | otherwise
- = return (Nothing, C_SRT top_srt off (toStgHalfWord dflags (fromStgWord (head bmp))))
- -- The fromIntegral converts to StgHalfWord
-
--- Gather CAF info for a procedure, but only if the procedure
--- doesn't have a static closure.
--- (If it has a static closure, it will already have an SRT to
--- keep its CAFs live.)
--- Any procedure referring to a non-static CAF c must keep live
--- any CAF that is reachable from c.
-localCAFInfo :: CAFEnv -> CmmDecl -> (CAFSet, Maybe CLabel)
-localCAFInfo _ (CmmData _ _) = (Set.empty, Nothing)
-localCAFInfo cafEnv proc@(CmmProc _ top_l _ (CmmGraph {g_entry=entry})) =
- case topInfoTable proc of
- Just (CmmInfoTable { cit_rep = rep })
- | not (isStaticRep rep) && not (isStackRep rep)
- -> (cafs, Just (toClosureLbl top_l))
- _other -> (cafs, Nothing)
- where
- cafs = expectJust "maybeBindCAFs" $ mapLookup entry cafEnv
-
--- Once we have the local CAF sets for some (possibly) mutually
--- recursive functions, we can create an environment mapping
--- each function to its set of CAFs. Note that a CAF may
--- be a reference to a function. If that function f does not have
--- a static closure, then we need to refer specifically
--- to the set of CAFs used by f. Of course, the set of CAFs
--- used by f must be included in the local CAF sets that are input to
--- this function. To minimize lookup time later, we return
--- the environment with every reference to f replaced by its set of CAFs.
--- To do this replacement efficiently, we gather strongly connected
--- components, then we sort the components in topological order.
-mkTopCAFInfo :: [(CAFSet, Maybe CLabel)] -> Map CLabel CAFSet
-mkTopCAFInfo localCAFs = foldl addToTop Map.empty g
- where
- addToTop env (AcyclicSCC (l, cafset)) =
- Map.insert l (flatten env cafset) env
- addToTop env (CyclicSCC nodes) =
- let (lbls, cafsets) = unzip nodes
- cafset = foldr Set.delete (foldl Set.union Set.empty cafsets) lbls
- in foldl (\env l -> Map.insert l (flatten env cafset) env) env lbls
-
- g = stronglyConnCompFromEdgedVerticesOrd
- [ DigraphNode (l,cafs) l (Set.elems cafs)
- | (cafs, Just l) <- localCAFs ]
-
-flatten :: Map CLabel CAFSet -> CAFSet -> CAFSet
-flatten env cafset = foldSet (lookup env) Set.empty cafset
- where
- lookup env caf cafset' =
- case Map.lookup caf env of
- Just cafs -> foldSet Set.insert cafset' cafs
- Nothing -> Set.insert caf cafset'
-
-bundle :: Map CLabel CAFSet
- -> (CAFEnv, CmmDecl)
- -> (CAFSet, Maybe CLabel)
- -> (LabelMap CAFSet, CmmDecl)
-bundle flatmap (env, decl@(CmmProc infos _lbl _ g)) (closure_cafs, mb_lbl)
- = ( mapMapWithKey get_cafs (info_tbls infos), decl )
- where
- entry = g_entry g
-
- entry_cafs
- | Just l <- mb_lbl = expectJust "bundle" $ Map.lookup l flatmap
- | otherwise = flatten flatmap closure_cafs
-
- get_cafs l _
- | l == entry = entry_cafs
- | Just info <- mapLookup l env = flatten flatmap info
- | otherwise = Set.empty
- -- the label might not be in the env if the code corresponding to
- -- this info table was optimised away (perhaps because it was
- -- unreachable). In this case it doesn't matter what SRT we
- -- infer, since the info table will not appear in the generated
- -- code. See #9329.
-
-bundle _flatmap (_, decl) _
- = ( mapEmpty, decl )
-
-
-flattenCAFSets :: [(CAFEnv, [CmmDecl])] -> [(LabelMap CAFSet, CmmDecl)]
-flattenCAFSets cpsdecls = zipWith (bundle flatmap) zipped localCAFs
- where
- zipped = [ (env,decl) | (env,decls) <- cpsdecls, decl <- decls ]
- localCAFs = unzipWith localCAFInfo zipped
- flatmap = mkTopCAFInfo localCAFs -- transitive closure of localCAFs
-
-doSRTs :: DynFlags
- -> TopSRT
- -> [(CAFEnv, [CmmDecl])]
- -> IO (TopSRT, [CmmDecl])
-
-doSRTs dflags topSRT tops
- = do
- let caf_decls = flattenCAFSets tops
- us <- mkSplitUniqSupply 'u'
- let (topSRT', gs') = initUs_ us $ foldM setSRT (topSRT, []) caf_decls
- return (topSRT', reverse gs' {- Note [reverse gs] -})
- where
- setSRT (topSRT, rst) (caf_map, decl@(CmmProc{})) = do
- (topSRT, srt_tables, srt_env) <- buildSRTs dflags topSRT caf_map
- let decl' = updInfoSRTs srt_env decl
- return (topSRT, decl': srt_tables ++ rst)
- setSRT (topSRT, rst) (_, decl) =
- return (topSRT, decl : rst)
-
-buildSRTs :: DynFlags -> TopSRT -> LabelMap CAFSet
- -> UniqSM (TopSRT, [CmmDecl], LabelMap C_SRT)
-buildSRTs dflags top_srt caf_map
- = foldM doOne (top_srt, [], mapEmpty) (mapToList caf_map)
- where
- doOne (top_srt, decls, srt_env) (l, cafs)
- = do (top_srt, mb_decl, srt) <- buildSRT dflags top_srt cafs
- return ( top_srt, maybeToList mb_decl ++ decls
- , mapInsert l srt srt_env )
-
-{-
-- In each CmmDecl there is a mapping from BlockId -> CmmInfoTable
-- The one corresponding to g_entry is the closure info table, the
- rest are continuations.
-- Each one needs an SRT.
-- We get the CAFSet for each one from the CAFEnv
-- flatten gives us
- [(LabelMap CAFSet, CmmDecl)]
--
--}
+-- -----------------------------------------------------------------------------
+-- ModuleSRTInfo
+
+data ModuleSRTInfo = ModuleSRTInfo
+ { thisModule :: Module
+ -- ^ Current module being compiled. Required for calling labelDynamic.
+ , dedupSRTs :: Map (Set SRTEntry) SRTEntry
+ -- ^ previous SRTs we've emitted, so we can de-duplicate.
+ -- Used to implement the [Common] optimisation.
+ , flatSRTs :: Map SRTEntry (Set SRTEntry)
+ -- ^ The reverse mapping, so that we can remove redundant
+ -- entries. e.g. if we have an SRT [a,b,c], and we know that b
+ -- points to [c,d], we can omit c and emit [a,b].
+ -- Used to implement the [Filter] optimisation.
+ }
+instance Outputable ModuleSRTInfo where
+ ppr ModuleSRTInfo{..} =
+ text "ModuleSRTInfo:" <+> ppr dedupSRTs <+> ppr flatSRTs
-{- Note [reverse gs]
+emptySRT :: Module -> ModuleSRTInfo
+emptySRT mod =
+ ModuleSRTInfo
+ { thisModule = mod
+ , dedupSRTs = Map.empty
+ , flatSRTs = Map.empty }
+
+-- -----------------------------------------------------------------------------
+-- Constructing SRTs
+
+{- Implementation notes
+
+- In each CmmDecl there is a mapping info_tbls from Label -> CmmInfoTable
+
+- The entry in info_tbls corresponding to g_entry is the closure info
+ table, the rest are continuations.
+
+- Each entry in info_tbls possibly needs an SRT. We need to make a
+ label for each of these.
+
+- We get the CAFSet for each entry from the CAFEnv
- It is important to keep the code blocks in the same order,
- otherwise binary sizes get slightly bigger. I'm not completely
- sure why this is, perhaps the assembler generates bigger jump
- instructions for forward refs. --SDM
-}
-updInfoSRTs :: LabelMap C_SRT -> CmmDecl -> CmmDecl
-updInfoSRTs srt_env (CmmProc top_info top_l live g) =
- CmmProc (top_info {info_tbls = mapMapWithKey updInfoTbl (info_tbls top_info)}) top_l live g
- where updInfoTbl l info_tbl
- = info_tbl { cit_srt = expectJust "updInfo" $ mapLookup l srt_env }
-updInfoSRTs _ t = t
+-- | Return a (Label,CLabel) pair for each labelled block of a CmmDecl,
+-- where the label is
+-- - the info label for a continuation or dynamic closure
+-- - the closure label for a top-level function (not a CAF)
+getLabelledBlocks :: CmmDecl -> [(Label, CAFLabel)]
+getLabelledBlocks (CmmData _ _) = []
+getLabelledBlocks (CmmProc top_info _ _ _) =
+ [ (blockId, mkCAFLabel (cit_lbl info))
+ | (blockId, info) <- mapToList (info_tbls top_info)
+ , let rep = cit_rep info
+ , not (isStaticRep rep) || not (isThunkRep rep)
+ ]
+
+
+-- | Put the labelled blocks that we will be annotating with SRTs into
+-- dependency order. This is so that we can process them one at a
+-- time, resolving references to earlier blocks to point to their
+-- SRTs. CAFs themselves are not included here; see getCAFs below.
+depAnalSRTs
+ :: CAFEnv
+ -> [CmmDecl]
+ -> [SCC (Label, CAFLabel, Set CAFLabel)]
+depAnalSRTs cafEnv decls =
+ srtTrace "depAnalSRTs" (ppr graph) graph
+ where
+ labelledBlocks = concatMap getLabelledBlocks decls
+ labelToBlock = Map.fromList (map swap labelledBlocks)
+ graph = stronglyConnCompFromEdgedVerticesOrd
+ [ let cafs' = Set.delete lbl cafs in
+ DigraphNode (l,lbl,cafs') l
+ (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs'))
+ | (l, lbl) <- labelledBlocks
+ , Just cafs <- [mapLookup l cafEnv] ]
+
+
+-- | Get (Label, CAFLabel, Set CAFLabel) for each block that represents a CAF.
+-- These are treated differently from other labelled blocks:
+-- - we never shortcut a reference to a CAF to the contents of its
+-- SRT, since the point of SRTs is to keep CAFs alive.
+-- - CAFs therefore don't take part in the dependency analysis in depAnalSRTs.
+-- instead we generate their SRTs after everything else.
+getCAFs :: CAFEnv -> [CmmDecl] -> [(Label, CAFLabel, Set CAFLabel)]
+getCAFs cafEnv decls =
+ [ (g_entry g, mkCAFLabel topLbl, cafs)
+ | CmmProc top_info topLbl _ g <- decls
+ , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
+ , let rep = cit_rep info
+ , isStaticRep rep && isThunkRep rep
+ , Just cafs <- [mapLookup (g_entry g) cafEnv]
+ ]
+
+
+-- | Get the list of blocks that correspond to the entry points for
+-- FUN_STATIC closures. These are the blocks for which if we have an
+-- SRT we can merge it with the static closure. [FUN]
+getStaticFuns :: [CmmDecl] -> [(BlockId, CLabel)]
+getStaticFuns decls =
+ [ (g_entry g, lbl)
+ | CmmProc top_info _ _ g <- decls
+ , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
+ , Just (id, _) <- [cit_clo info]
+ , let rep = cit_rep info
+ , isStaticRep rep && isFunRep rep
+ , let lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
+ ]
+
+
+-- | Maps labels from 'cafAnal' to the final CLabel that will appear
+-- in the SRT.
+-- - closures with singleton SRTs resolve to their single entry
+-- - closures with larger SRTs map to the label for that SRT
+-- - CAFs must not map to anything!
+-- - if a labels maps to Nothing, we found that this label's SRT
+-- is empty, so we don't need to refer to it from other SRTs.
+type SRTMap = Map CAFLabel (Maybe SRTEntry)
+
+-- | resolve a CAFLabel to its SRTEntry using the SRTMap
+resolveCAF :: SRTMap -> CAFLabel -> Maybe SRTEntry
+resolveCAF srtMap lbl@(CAFLabel l) =
+ Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap
+
+
+-- | Attach SRTs to all info tables in the CmmDecls, and add SRT
+-- declarations to the ModuleSRTInfo.
+--
+doSRTs
+ :: DynFlags
+ -> ModuleSRTInfo
+ -> [(CAFEnv, [CmmDecl])]
+ -> IO (ModuleSRTInfo, [CmmDecl])
+
+doSRTs dflags moduleSRTInfo tops = do
+ us <- mkSplitUniqSupply 'u'
+
+ -- Ignore the original grouping of decls, and combine all the
+ -- CAFEnvs into a single CAFEnv.
+ let (cafEnvs, declss) = unzip tops
+ cafEnv = mapUnions cafEnvs
+ decls = concat declss
+ staticFuns = mapFromList (getStaticFuns decls)
+
+ -- Put the decls in dependency order. Why? So that we can implement
+ -- [Inline] and [Filter]. If we need to refer to an SRT that has
+ -- a single entry, we use the entry itself, which means that we
+ -- don't need to generate the singleton SRT in the first place. But
+ -- to do this we need to process blocks before things that depend on
+ -- them.
+ let
+ sccs = depAnalSRTs cafEnv decls
+ cafsWithSRTs = getCAFs cafEnv decls
+
+ -- On each strongly-connected group of decls, construct the SRT
+ -- closures and the SRT fields for info tables.
+ let result ::
+ [ ( [CmmDecl] -- generated SRTs
+ , [(Label, CLabel)] -- SRT fields for info tables
+ , [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ ) ]
+ ((result, _srtMap), moduleSRTInfo') =
+ initUs_ us $
+ flip runStateT moduleSRTInfo $
+ flip runStateT Map.empty $ do
+ nonCAFs <- mapM (doSCC dflags staticFuns) sccs
+ cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) ->
+ oneSRT dflags staticFuns [l] [cafLbl] True{-is a CAF-} cafs
+ return (nonCAFs ++ cAFs)
+
+ (declss, pairs, funSRTs) = unzip3 result
+
+ -- Next, update the info tables with the SRTs
+ let
+ srtFieldMap = mapFromList (concat pairs)
+ funSRTMap = mapFromList (concat funSRTs)
+ decls' = concatMap (updInfoSRTs dflags srtFieldMap funSRTMap) decls
+
+ return (moduleSRTInfo', concat declss ++ decls')
+
+
+-- | Build the SRT for a strongly-connected component of blocks
+doSCC
+ :: DynFlags
+ -> LabelMap CLabel -- which blocks are static function entry points
+ -> SCC (Label, CAFLabel, Set CAFLabel)
+ -> StateT SRTMap
+ (StateT ModuleSRTInfo UniqSM)
+ ( [CmmDecl] -- generated SRTs
+ , [(Label, CLabel)] -- SRT fields for info tables
+ , [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ )
+
+doSCC dflags staticFuns (AcyclicSCC (l, cafLbl, cafs)) =
+ oneSRT dflags staticFuns [l] [cafLbl] False cafs
+
+doSCC dflags staticFuns (CyclicSCC nodes) = do
+ -- build a single SRT for the whole cycle, see Note [recursive SRTs]
+ let (blockids, lbls, cafsets) = unzip3 nodes
+ cafs = Set.unions cafsets
+ oneSRT dflags staticFuns blockids lbls False cafs
+
+
+{- Note [recursive SRTs]
+
+If the dependency analyser has found us a recursive group of
+declarations, then we build a single SRT for the whole group, on the
+grounds that everything in the group is reachable from everything
+else, so we lose nothing by having a single SRT.
+
+However, there are a couple of wrinkles to be aware of.
+
+* The Set CAFLabel for this SRT will contain labels in the group
+itself. The SRTMap will therefore not contain entries for these labels
+yet, so we can't turn them into SRTEntries using resolveCAF. BUT we
+can just remove recursive references from the Set CAFLabel before
+generating the SRT - the SRT will still contain all the CAFLabels that
+we need to refer to from this group's SRT.
+
+* That is, EXCEPT for static function closures. For the same reason
+described in Note [Invalid optimisation: shortcutting], we cannot omit
+references to static function closures.
+ - But, since we will merge the SRT with one of the static function
+ closures (see [FUN]), we can omit references to *that* static
+ function closure from the SRT.
+-}
+
+-- | Build an SRT for a set of blocks
+oneSRT
+ :: DynFlags
+ -> LabelMap CLabel -- which blocks are static function entry points
+ -> [Label] -- blocks in this set
+ -> [CAFLabel] -- labels for those blocks
+ -> Bool -- True <=> this SRT is for a CAF
+ -> Set CAFLabel -- SRT for this set
+ -> StateT SRTMap
+ (StateT ModuleSRTInfo UniqSM)
+ ( [CmmDecl] -- SRT objects we built
+ , [(Label, CLabel)] -- SRT fields for these blocks' itbls
+ , [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ )
+
+oneSRT dflags staticFuns blockids lbls isCAF cafs = do
+ srtMap <- get
+ topSRT <- lift get
+ let
+ -- Can we merge this SRT with a FUN_STATIC closure?
+ (maybeFunClosure, otherFunLabels) =
+ case [ (l,b) | b <- blockids, Just l <- [mapLookup b staticFuns] ] of
+ [] -> (Nothing, [])
+ ((l,b):xs) -> (Just (l,b), map (mkCAFLabel . fst) xs)
+
+ -- Remove recursive references from the SRT, except for (all but
+ -- one of the) static functions. See Note [recursive SRTs].
+ nonRec = cafs `Set.difference`
+ Set.fromList lbls `Set.difference` Set.fromList otherFunLabels
+
+ -- First resolve all the CAFLabels to SRTEntries
+ -- Implements the [Inline] optimisation.
+ resolved =
+ Set.fromList $
+ catMaybes (map (resolveCAF srtMap) (Set.toList nonRec))
+
+ -- The set of all SRTEntries in SRTs that we refer to from here.
+ allBelow =
+ Set.unions [ lbls | caf <- Set.toList resolved
+ , Just lbls <- [Map.lookup caf (flatSRTs topSRT)] ]
+
+ -- Remove SRTEntries that are also in an SRT that we refer to.
+ -- Implements the [Filter] optimisation.
+ filtered = Set.difference resolved allBelow
+
+ srtTrace "oneSRT:"
+ (ppr cafs <+> ppr resolved <+> ppr allBelow <+> ppr filtered) $ return ()
+
+ let
+ isStaticFun = isJust maybeFunClosure
+
+ -- For a label without a closure (e.g. a continuation), we must
+ -- update the SRTMap for the label to point to a closure. It's
+ -- important that we don't do this for static functions or CAFs,
+ -- see Note [Invalid optimisation: shortcutting].
+ updateSRTMap srtEntry =
+ when (not isCAF && not isStaticFun) $ do
+ let newSRTMap = Map.fromList [(cafLbl, srtEntry) | cafLbl <- lbls]
+ put (Map.union newSRTMap srtMap)
+
+ this_mod = thisModule topSRT
+
+ case Set.toList filtered of
+ [] -> do
+ srtTrace "oneSRT: empty" (ppr lbls) $ return ()
+ updateSRTMap Nothing
+ return ([], [], [])
+
+ -- [Inline] - when we have only one entry there is no need to
+ -- build an SRT object at all, instead we put the singleton SRT
+ -- entry in the info table.
+ [one@(SRTEntry lbl)]
+ | -- Info tables refer to SRTs by offset (as noted in the section
+ -- "Referring to an SRT from the info table" of Note [SRTs]). However,
+ -- when dynamic linking is used we cannot guarantee that the offset
+ -- between the SRT and the info table will fit in the offset field.
+ -- Consequently we build a singleton SRT in in this case.
+ not (labelDynamic dflags this_mod lbl)
+
+ -- MachO relocations can't express offsets between compilation units at
+ -- all, so we are always forced to build a singleton SRT in this case.
+ && (not (osMachOTarget $ platformOS $ targetPlatform dflags)
+ || isLocalCLabel this_mod lbl) -> do
+
+ -- If we have a static function closure, then it becomes the
+ -- SRT object, and everything else points to it. (the only way
+ -- we could have multiple labels here is if this is a
+ -- recursive group, see Note [recursive SRTs])
+ case maybeFunClosure of
+ Just (staticFunLbl,staticFunBlock) -> return ([], withLabels, [])
+ where
+ withLabels =
+ [ (b, if b == staticFunBlock then lbl else staticFunLbl)
+ | b <- blockids ]
+ Nothing -> do
+ updateSRTMap (Just one)
+ return ([], map (,lbl) blockids, [])
+
+ cafList ->
+ -- Check whether an SRT with the same entries has been emitted already.
+ -- Implements the [Common] optimisation.
+ case Map.lookup filtered (dedupSRTs topSRT) of
+ Just srtEntry@(SRTEntry srtLbl) -> do
+ srtTrace "oneSRT [Common]" (ppr lbls <+> ppr srtLbl) $ return ()
+ updateSRTMap (Just srtEntry)
+ return ([], map (,srtLbl) blockids, [])
+ Nothing -> do
+ -- No duplicates: we have to build a new SRT object
+ srtTrace "oneSRT: new" (ppr lbls <+> ppr filtered) $ return ()
+ (decls, funSRTs, srtEntry) <-
+ case maybeFunClosure of
+ Just (fun,block) ->
+ return ( [], [(block, cafList)], SRTEntry fun )
+ Nothing -> do
+ (decls, entry) <- lift . lift $ buildSRTChain dflags cafList
+ return (decls, [], entry)
+ updateSRTMap (Just srtEntry)
+ let allBelowThis = Set.union allBelow filtered
+ oldFlatSRTs = flatSRTs topSRT
+ newFlatSRTs = Map.insert srtEntry allBelowThis oldFlatSRTs
+ newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT)
+ lift (put (topSRT { dedupSRTs = newDedupSRTs
+ , flatSRTs = newFlatSRTs }))
+ let SRTEntry lbl = srtEntry
+ return (decls, map (,lbl) blockids, funSRTs)
+
+
+-- | build a static SRT object (or a chain of objects) from a list of
+-- SRTEntries.
+buildSRTChain
+ :: DynFlags
+ -> [SRTEntry]
+ -> UniqSM
+ ( [CmmDecl] -- The SRT object(s)
+ , SRTEntry -- label to use in the info table
+ )
+buildSRTChain _ [] = panic "buildSRT: empty"
+buildSRTChain dflags cafSet =
+ case splitAt mAX_SRT_SIZE cafSet of
+ (these, []) -> do
+ (decl,lbl) <- buildSRT dflags these
+ return ([decl], lbl)
+ (these,those) -> do
+ (rest, rest_lbl) <- buildSRTChain dflags (head these : those)
+ (decl,lbl) <- buildSRT dflags (rest_lbl : tail these)
+ return (decl:rest, lbl)
+ where
+ mAX_SRT_SIZE = 16
+
+
+buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDecl, SRTEntry)
+buildSRT dflags refs = do
+ id <- getUniqueM
+ let
+ lbl = mkSRTLabel id
+ srt_n_info = mkSRTInfoLabel (length refs)
+ fields =
+ mkStaticClosure dflags srt_n_info dontCareCCS
+ [ CmmLabel lbl | SRTEntry lbl <- refs ]
+ [] -- no padding
+ [mkIntCLit dflags 0] -- link field
+ [] -- no saved info
+ return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl)
+
+
+-- | Update info tables with references to their SRTs. Also generate
+-- static closures, splicing in SRT fields as necessary.
+updInfoSRTs
+ :: DynFlags
+ -> LabelMap CLabel -- SRT labels for each block
+ -> LabelMap [SRTEntry] -- SRTs to merge into FUN_STATIC closures
+ -> CmmDecl
+ -> [CmmDecl]
+
+updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g)
+ | Just (_,closure) <- maybeStaticClosure = [ proc, closure ]
+ | otherwise = [ proc ]
+ where
+ proc = CmmProc top_info { info_tbls = newTopInfo } top_l live g
+ newTopInfo = mapMapWithKey updInfoTbl (info_tbls top_info)
+ updInfoTbl l info_tbl
+ | l == g_entry g, Just (inf, _) <- maybeStaticClosure = inf
+ | otherwise = info_tbl { cit_srt = mapLookup l srt_env }
+
+ -- Generate static closures [FUN]. Note that this also generates
+ -- static closures for thunks (CAFs), because it's easier to treat
+ -- them uniformly in the code generator.
+ maybeStaticClosure :: Maybe (CmmInfoTable, CmmDecl)
+ maybeStaticClosure
+ | Just info_tbl@CmmInfoTable{..} <-
+ mapLookup (g_entry g) (info_tbls top_info)
+ , Just (id, ccs) <- cit_clo
+ , isStaticRep cit_rep =
+ let
+ (newInfo, srtEntries) = case mapLookup (g_entry g) funSRTEnv of
+ Nothing ->
+ -- if we don't add SRT entries to this closure, then we
+ -- want to set the srt field in its info table as usual
+ (info_tbl { cit_srt = mapLookup (g_entry g) srt_env }, [])
+ Just srtEntries -> srtTrace "maybeStaticFun" (ppr res)
+ (info_tbl { cit_rep = new_rep }, res)
+ where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ]
+ fields = mkStaticClosureFields dflags info_tbl ccs (idCafInfo id)
+ srtEntries
+ new_rep = case cit_rep of
+ HeapRep sta ptrs nptrs ty ->
+ HeapRep sta (ptrs + length srtEntries) nptrs ty
+ _other -> panic "maybeStaticFun"
+ lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
+ in
+ Just (newInfo, mkDataLits (Section Data lbl) lbl fields)
+ | otherwise = Nothing
+
+updInfoSRTs _ _ _ t = [t]
+
+
+srtTrace :: String -> SDoc -> b -> b
+-- srtTrace = pprTrace
+srtTrace _ _ b = b
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 440ee5634f..e1067e9519 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -1,5 +1,3 @@
-{-# LANGUAGE CPP #-}
-
module CmmCallConv (
ParamLocation(..),
assignArgumentsPos,
@@ -7,7 +5,7 @@ module CmmCallConv (
realArgRegsCover
) where
-#include "HsVersions.h"
+import GhcPrelude
import CmmExpr
import SMRep
@@ -129,9 +127,10 @@ assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args)
assign_stk offset assts (r:rs)
= assign_stk off' ((r, StackParam off') : assts) rs
where w = typeWidth (arg_ty r)
- size = (((widthInBytes w - 1) `div` word_size) + 1) * word_size
off' = offset + size
- word_size = wORD_SIZE dflags
+ -- Stack arguments always take a whole number of words, we never
+ -- pack them unlike constructor fields.
+ size = roundUpToWords dflags (widthInBytes w)
-----------------------------------------------------------------------------
-- Local information about the registers available
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
index 3c23e70b8c..1af9a84028 100644
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ b/compiler/cmm/CmmCommonBlockElim.hs
@@ -1,17 +1,19 @@
-{-# LANGUAGE GADTs, BangPatterns #-}
+{-# LANGUAGE GADTs, BangPatterns, ScopedTypeVariables #-}
+
module CmmCommonBlockElim
( elimCommonBlocks
)
where
+import GhcPrelude hiding (iterate, succ, unzip, zip)
+
import BlockId
import Cmm
import CmmUtils
import CmmSwitch (eqSwitchTargetWith)
import CmmContFlowOpt
-- import PprCmm ()
-import Prelude hiding (iterate, succ, unzip, zip)
import Hoopl.Block
import Hoopl.Graph
@@ -23,11 +25,11 @@ import qualified Data.List as List
import Data.Word
import qualified Data.Map as M
import Outputable
-import UniqFM
-import UniqDFM
import qualified TrieMap as TM
+import UniqFM
import Unique
import Control.Arrow (first, second)
+import Data.List (foldl')
-- -----------------------------------------------------------------------------
-- Eliminate common blocks
@@ -62,7 +64,11 @@ elimCommonBlocks :: CmmGraph -> CmmGraph
elimCommonBlocks g = replaceLabels env $ copyTicks env g
where
env = iterate mapEmpty blocks_with_key
- groups = groupByInt hash_block (postorderDfs g)
+ -- The order of blocks doesn't matter here. While we could use
+ -- revPostorder which drops unreachable blocks this is done in
+ -- ContFlowOpt already which runs before this pass. So we use
+ -- toBlockList since it is faster.
+ groups = groupByInt hash_block (toBlockList g) :: [[CmmBlock]]
blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups]
-- Invariant: The blocks in the list are pairwise distinct
@@ -90,6 +96,8 @@ iterate subst blocks
subst' = subst `mapUnion` new_substs
updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks
+-- Combine two lists of blocks.
+-- While they are internally distinct they can still share common blocks.
mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks)
mergeBlocks subst existing new = go new
where
@@ -165,14 +173,14 @@ hash_block block =
hash_lit (CmmVec ls) = hash_list hash_lit ls
hash_lit (CmmLabel _) = 119 -- ugh
hash_lit (CmmLabelOff _ i) = cvt $ 199 + i
- hash_lit (CmmLabelDiffOff _ _ i) = cvt $ 299 + i
+ hash_lit (CmmLabelDiffOff _ _ i _) = cvt $ 299 + i
hash_lit (CmmBlock _) = 191 -- ugh
hash_lit (CmmHighStackMark) = cvt 313
hash_tgt (ForeignTarget e _) = hash_e e
hash_tgt (PrimTarget _) = 31 -- lots of these
- hash_list f = foldl (\z x -> f x + z) (0::Word32)
+ hash_list f = foldl' (\z x -> f x + z) (0::Word32)
cvt = fromInteger . toInteger
@@ -208,7 +216,7 @@ eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2)
= eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2
eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1)
(CmmUnsafeForeignCall t2 r2 a2)
- = t1 == t2 && r1 == r2 && and (zipWith (eqExprWith eqBid) a1 a2)
+ = t1 == t2 && r1 == r2 && eqListWith (eqExprWith eqBid) a1 a2
eqMiddleWith _ _ _ = False
eqExprWith :: (BlockId -> BlockId -> Bool)
@@ -223,7 +231,7 @@ eqExprWith eqBid = eq
CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2
_e1 `eq` _e2 = False
- xs `eqs` ys = and (zipWith eq xs ys)
+ xs `eqs` ys = eqListWith eq xs ys
eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2
eqLit l1 l2 = l1 == l2
@@ -246,7 +254,7 @@ eqBlockBodyWith eqBid block block'
(_,m',l') = blockSplit block'
nodes' = filter (not . dont_care) (blockToList m')
- equal = and (zipWith (eqMiddleWith eqBid) nodes nodes') &&
+ equal = eqListWith (eqMiddleWith eqBid) nodes nodes' &&
eqLastWith eqBid l l'
@@ -265,6 +273,11 @@ eqMaybeWith eltEq (Just e) (Just e') = eltEq e e'
eqMaybeWith _ Nothing Nothing = True
eqMaybeWith _ _ _ = False
+eqListWith :: (a -> b -> Bool) -> [a] -> [b] -> Bool
+eqListWith f (a : as) (b : bs) = f a b && eqListWith f as bs
+eqListWith _ [] [] = True
+eqListWith _ _ _ = False
+
-- | Given a block map, ensure that all "target" blocks are covered by
-- the same ticks as the respective "source" blocks. This not only
-- means copying ticks, but also adjusting tick scopes where
@@ -275,8 +288,8 @@ copyTicks env g
| otherwise = ofBlockMap (g_entry g) $ mapMap copyTo blockMap
where -- Reverse block merge map
blockMap = toBlockMap g
- revEnv = mapFoldWithKey insertRev M.empty env
- insertRev k x = M.insertWith (const (k:)) x [k]
+ revEnv = mapFoldlWithKey insertRev M.empty env
+ insertRev m k x = M.insertWith (const (k:)) x [k] m
-- Copy ticks and scopes into the given block
copyTo block = case M.lookup (entryLabel block) revEnv of
Nothing -> block
@@ -289,17 +302,21 @@ copyTicks env g
foldr blockCons code (map CmmTick ticks)
-- Group by [Label]
-groupByLabel :: [(Key, a)] -> [(Key, [a])]
-groupByLabel = go (TM.emptyTM :: TM.ListMap UniqDFM a)
- where
- go !m [] = TM.foldTM (:) m []
- go !m ((k,v) : entries) = go (TM.alterTM k' adjust m) entries
- where k' = map getUnique k
- adjust Nothing = Just (k,[v])
- adjust (Just (_,vs)) = Just (k,v:vs)
-
+-- See Note [Compressed TrieMap] in coreSyn/TrieMap about the usage of GenMap.
+groupByLabel :: [(Key, DistinctBlocks)] -> [(Key, [DistinctBlocks])]
+groupByLabel =
+ go (TM.emptyTM :: TM.ListMap (TM.GenMap LabelMap) (Key, [DistinctBlocks]))
+ where
+ go !m [] = TM.foldTM (:) m []
+ go !m ((k,v) : entries) = go (TM.alterTM k adjust m) entries
+ where --k' = map (getKey . getUnique) k
+ adjust Nothing = Just (k,[v])
+ adjust (Just (_,vs)) = Just (k,v:vs)
groupByInt :: (a -> Int) -> [a] -> [[a]]
groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs
- -- See Note [Unique Determinism and code generation]
- where go m x = alterUFM (Just . maybe [x] (x:)) m (f x)
+ -- See Note [Unique Determinism and code generation]
+ where
+ go m x = alterUFM addEntry m (f x)
+ where
+ addEntry xs = Just $! maybe [x] (x:) xs
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
index 219b68e42a..92dd7abba5 100644
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE GADTs #-}
+{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module CmmContFlowOpt
( cmmCfgOpts
@@ -8,6 +9,8 @@ module CmmContFlowOpt
)
where
+import GhcPrelude hiding (succ, unzip, zip)
+
import Hoopl.Block
import Hoopl.Collections
import Hoopl.Graph
@@ -21,7 +24,6 @@ import Panic
import Util
import Control.Monad
-import Prelude hiding (succ, unzip, zip)
-- Note [What is shortcutting]
@@ -53,7 +55,7 @@ import Prelude hiding (succ, unzip, zip)
--
-- This optimisation does three things:
--
--- - If a block finishes in an unconditonal branch to another block
+-- - If a block finishes in an unconditional branch to another block
-- and that is the only jump to that block we concatenate the
-- destination block at the end of the current one.
--
@@ -171,11 +173,10 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
| otherwise
= (entry_id, shortcut_map)
- -- blocks is a list of blocks in DFS postorder, while blockmap is
- -- a map of blocks. We process each element from blocks and update
- -- blockmap accordingly
- blocks = postorderDfs g
- blockmap = foldr addBlock emptyBody blocks
+ -- blocks are sorted in reverse postorder, but we want to go from the exit
+ -- towards beginning, so we use foldr below.
+ blocks = revPostorder g
+ blockmap = foldl' (flip addBlock) emptyBody blocks
-- Accumulator contains three components:
-- * map of blocks in a graph
@@ -194,7 +195,7 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
maybe_concat :: CmmBlock
-> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
-> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
- maybe_concat block (blocks, shortcut_map, backEdges)
+ maybe_concat block (!blocks, !shortcut_map, !backEdges)
-- If:
-- (1) current block ends with unconditional branch to b' and
-- (2) it has exactly one predecessor (namely, current block)
@@ -252,8 +253,8 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
-- unconditional jump to a block that can be shortcut.
| Nothing <- callContinuation_maybe last
= let oldSuccs = successors last
- newSuccs = successors swapcond_last
- in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
+ newSuccs = successors rewrite_last
+ in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
, shortcut_map
, if oldSuccs == newSuccs
then backEdges
@@ -281,34 +282,58 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
Just b | Just dest <- canShortcut b -> dest
_otherwise -> l
- -- For a conditional, we invert the conditional if that would make it
- -- more likely that the branch-not-taken case becomes a fallthrough.
- -- This helps the native codegen a little bit, and probably has no
- -- effect on LLVM. It's convenient to do it here, where we have the
- -- information about predecessors.
- swapcond_last
+ rewrite_last
+ -- Sometimes we can get rid of the conditional completely.
+ | CmmCondBranch _cond t f _l <- shortcut_last
+ , t == f
+ = CmmBranch t
+
+ -- See Note [Invert Cmm conditionals]
| CmmCondBranch cond t f l <- shortcut_last
- , likelyFalse l
- , numPreds f > 1
- , hasOnePredecessor t
+ , hasOnePredecessor t -- inverting will make t a fallthrough
+ , likelyTrue l || (numPreds f > 1)
, Just cond' <- maybeInvertCmmExpr cond
= CmmCondBranch cond' f t (invertLikeliness l)
| otherwise
= shortcut_last
- likelyFalse (Just False) = True
- likelyFalse Nothing = True
- likelyFalse _ = False
+ likelyTrue (Just True) = True
+ likelyTrue _ = False
- invertLikeliness (Just b) = Just (not b)
- invertLikeliness Nothing = Nothing
+ invertLikeliness :: Maybe Bool -> Maybe Bool
+ invertLikeliness = fmap not
-- Number of predecessors for a block
numPreds bid = mapLookup bid backEdges `orElse` 0
hasOnePredecessor b = numPreds b == 1
+{-
+ Note [Invert Cmm conditionals]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The native code generator always produces jumps to the true branch.
+ Falling through to the false branch is however faster. So we try to
+ arrange for that to happen.
+ This means we invert the condition if:
+ * The likely path will become a fallthrough.
+ * We can't guarantee a fallthrough for the false branch but for the
+ true branch.
+
+ In some cases it's faster to avoid inverting when the false branch is likely.
+ However determining when that is the case is neither easy nor cheap so for
+ now we always invert as this produces smaller binaries and code that is
+ equally fast on average. (On an i7-6700K)
+
+ TODO:
+ There is also the edge case when both branches have multiple predecessors.
+ In this case we could assume that we will end up with a jump for BOTH
+ branches. In this case it might be best to put the likely path in the true
+ branch especially if there are large numbers of predecessors as this saves
+ us the jump thats not taken. However I haven't tested this and as of early
+ 2018 we almost never generate cmm where this would apply.
+-}
+
-- Functions for incrementing and decrementing number of predecessors. If
-- decrementing would set the predecessor count to 0, we remove entry from the
-- map.
@@ -406,14 +431,14 @@ removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
-- Remove any info_tbls for unreachable
keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
- keep_used bs = mapFoldWithKey keep mapEmpty bs
+ keep_used bs = mapFoldlWithKey keep mapEmpty bs
- keep :: Label -> CmmInfoTable -> LabelMap CmmInfoTable -> LabelMap CmmInfoTable
- keep l i env | l `setMember` used_lbls = mapInsert l i env
+ keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
+ keep env l i | l `setMember` used_lbls = mapInsert l i env
| otherwise = env
used_blocks :: [CmmBlock]
- used_blocks = postorderDfs g
+ used_blocks = revPostorder g
used_lbls :: LabelSet
- used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks
+ used_lbls = setFromList $ map entryLabel used_blocks
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index bb610a0b88..d129d601f4 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -1,5 +1,4 @@
{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
@@ -10,7 +9,10 @@ module CmmExpr
, CmmReg(..), cmmRegType
, CmmLit(..), cmmLitType
, LocalReg(..), localRegType
- , GlobalReg(..), isArgReg, globalRegType, spReg, hpReg, spLimReg, nodeReg, node, baseReg
+ , GlobalReg(..), isArgReg, globalRegType
+ , spReg, hpReg, spLimReg, hpLimReg, nodeReg
+ , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg
+ , node, baseReg
, VGcPtr(..)
, DefinerOfRegs, UserOfRegs
@@ -28,6 +30,8 @@ module CmmExpr
)
where
+import GhcPrelude
+
import BlockId
import CLabel
import CmmMachOp
@@ -37,7 +41,6 @@ import Outputable (panic)
import Unique
import Data.Set (Set)
-import Data.List
import qualified Data.Set as Set
-----------------------------------------------------------------------------
@@ -184,7 +187,14 @@ data CmmLit
-- Don't use it at all unless tablesNextToCode.
-- It is also used inside the NCG during when generating
-- position-independent code.
- | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
+ | CmmLabelDiffOff CLabel CLabel Int Width -- label1 - label2 + offset
+ -- In an expression, the width just has the effect of MO_SS_Conv
+ -- from wordWidth to the desired width.
+ --
+ -- In a static literal, the supported Widths depend on the
+ -- architecture: wordWidth is supported on all
+ -- architectures. Additionally W32 is supported on x86_64 when
+ -- using the small memory model.
| CmmBlock {-# UNPACK #-} !BlockId -- Code label
-- Invariant: must be a continuation BlockId
@@ -217,7 +227,7 @@ cmmLitType cflags (CmmVec (l:ls)) = let ty = cmmLitType cflags l
else panic "cmmLitType: CmmVec"
cmmLitType dflags (CmmLabel lbl) = cmmLabelType dflags lbl
cmmLitType dflags (CmmLabelOff lbl _) = cmmLabelType dflags lbl
-cmmLitType dflags (CmmLabelDiffOff {}) = bWord dflags
+cmmLitType _ (CmmLabelDiffOff _ _ _ width) = cmmBits width
cmmLitType dflags (CmmBlock _) = bWord dflags
cmmLitType dflags (CmmHighStackMark) = bWord dflags
@@ -549,12 +559,18 @@ instance Ord GlobalReg where
compare _ EagerBlackholeInfo = GT
-- convenient aliases
-baseReg, spReg, hpReg, spLimReg, nodeReg :: CmmReg
+baseReg, spReg, hpReg, spLimReg, hpLimReg, nodeReg,
+ currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg :: CmmReg
baseReg = CmmGlobal BaseReg
spReg = CmmGlobal Sp
hpReg = CmmGlobal Hp
+hpLimReg = CmmGlobal HpLim
spLimReg = CmmGlobal SpLim
nodeReg = CmmGlobal node
+currentTSOReg = CmmGlobal CurrentTSO
+currentNurseryReg = CmmGlobal CurrentNursery
+hpAllocReg = CmmGlobal HpAlloc
+cccsReg = CmmGlobal CCCS
node :: GlobalReg
node = VanillaReg 1 VGcPtr
diff --git a/compiler/cmm/CmmImplementSwitchPlans.hs b/compiler/cmm/CmmImplementSwitchPlans.hs
index eda031e840..2e2da5d305 100644
--- a/compiler/cmm/CmmImplementSwitchPlans.hs
+++ b/compiler/cmm/CmmImplementSwitchPlans.hs
@@ -4,6 +4,8 @@ module CmmImplementSwitchPlans
)
where
+import GhcPrelude
+
import Hoopl.Block
import BlockId
import Cmm
diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs
index e849c810ef..43cba2526d 100644
--- a/compiler/cmm/CmmInfo.hs
+++ b/compiler/cmm/CmmInfo.hs
@@ -34,6 +34,8 @@ module CmmInfo (
#include "HsVersions.h"
+import GhcPrelude
+
import Cmm
import CmmUtils
import CLabel
@@ -43,6 +45,7 @@ import Stream (Stream)
import qualified Stream
import Hoopl.Collections
+import Platform
import Maybes
import DynFlags
import Panic
@@ -60,7 +63,8 @@ mkEmptyContInfoTable info_lbl
= CmmInfoTable { cit_lbl = info_lbl
, cit_rep = mkStackRep []
, cit_prof = NoProfilingInfo
- , cit_srt = NoC_SRT }
+ , cit_srt = Nothing
+ , cit_clo = Nothing }
cmmToRawCmm :: DynFlags -> Stream IO CmmGroup ()
-> IO (Stream IO RawCmmGroup ())
@@ -186,7 +190,7 @@ mkInfoTableContents dflags
| StackRep frame <- smrep
= do { (prof_lits, prof_data) <- mkProfLits dflags prof
- ; let (srt_label, srt_bitmap) = mkSRTLit dflags srt
+ ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt
; (liveness_lit, liveness_data) <- mkLivenessBits dflags frame
; let
std_info = mkStdInfoTable dflags prof_lits rts_tag srt_bitmap liveness_lit
@@ -199,7 +203,7 @@ mkInfoTableContents dflags
| HeapRep _ ptrs nonptrs closure_type <- smrep
= do { let layout = packIntsCLit dflags ptrs nonptrs
; (prof_lits, prof_data) <- mkProfLits dflags prof
- ; let (srt_label, srt_bitmap) = mkSRTLit dflags srt
+ ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt
; (mb_srt_field, mb_layout, extra_bits, ct_data)
<- mk_pieces closure_type srt_label
; let std_info = mkStdInfoTable dflags prof_lits
@@ -209,20 +213,22 @@ mkInfoTableContents dflags
; return (prof_data ++ ct_data, (std_info, extra_bits)) }
where
mk_pieces :: ClosureTypeInfo -> [CmmLit]
- -> UniqSM ( Maybe StgHalfWord -- Override the SRT field with this
- , Maybe CmmLit -- Override the layout field with this
+ -> UniqSM ( Maybe CmmLit -- Override the SRT field with this
+ , Maybe CmmLit -- Override the layout field with this
, [CmmLit] -- "Extra bits" for info table
, [RawCmmDecl]) -- Auxiliary data decls
mk_pieces (Constr con_tag con_descr) _no_srt -- A data constructor
= do { (descr_lit, decl) <- newStringLit con_descr
- ; return ( Just (toStgHalfWord dflags (fromIntegral con_tag))
+ ; return ( Just (CmmInt (fromIntegral con_tag)
+ (halfWordWidth dflags))
, Nothing, [descr_lit], [decl]) }
mk_pieces Thunk srt_label
= return (Nothing, Nothing, srt_label, [])
mk_pieces (ThunkSelector offset) _no_srt
- = return (Just (toStgHalfWord dflags 0), Just (mkWordCLit dflags (fromIntegral offset)), [], [])
+ = return (Just (CmmInt 0 (halfWordWidth dflags)),
+ Just (mkWordCLit dflags (fromIntegral offset)), [], [])
-- Layout known (one free var); we use the layout field for offset
mk_pieces (Fun arity (ArgSpec fun_type)) srt_label
@@ -233,8 +239,9 @@ mkInfoTableContents dflags
= do { (liveness_lit, liveness_data) <- mkLivenessBits dflags arg_bits
; let fun_type | null liveness_data = aRG_GEN
| otherwise = aRG_GEN_BIG
- extra_bits = [ packIntsCLit dflags fun_type arity
- , srt_lit, liveness_lit, slow_entry ]
+ extra_bits = [ packIntsCLit dflags fun_type arity ]
+ ++ (if inlineSRT dflags then [] else [ srt_lit ])
+ ++ [ liveness_lit, slow_entry ]
; return (Nothing, Nothing, extra_bits, liveness_data) }
where
slow_entry = CmmLabel (toSlowEntryLbl info_lbl)
@@ -253,12 +260,24 @@ packIntsCLit dflags a b = packHalfWordsCLit dflags
mkSRTLit :: DynFlags
- -> C_SRT
+ -> CLabel
+ -> Maybe CLabel
-> ([CmmLit], -- srt_label, if any
- StgHalfWord) -- srt_bitmap
-mkSRTLit dflags NoC_SRT = ([], toStgHalfWord dflags 0)
-mkSRTLit dflags (C_SRT lbl off bitmap) = ([cmmLabelOffW dflags lbl off], bitmap)
+ CmmLit) -- srt_bitmap
+mkSRTLit dflags info_lbl (Just lbl)
+ | inlineSRT dflags
+ = ([], CmmLabelDiffOff lbl info_lbl 0 (halfWordWidth dflags))
+mkSRTLit dflags _ Nothing = ([], CmmInt 0 (halfWordWidth dflags))
+mkSRTLit dflags _ (Just lbl) = ([CmmLabel lbl], CmmInt 1 (halfWordWidth dflags))
+
+-- | Is the SRT offset field inline in the info table on this platform?
+--
+-- See the section "Referring to an SRT from the info table" in
+-- Note [SRTs] in CmmBuildInfoTables.hs
+inlineSRT :: DynFlags -> Bool
+inlineSRT dflags = platformArch (targetPlatform dflags) == ArchX86_64
+ && tablesNextToCode dflags
-------------------------------------------------------------------------
--
@@ -290,10 +309,10 @@ makeRelativeRefTo :: DynFlags -> CLabel -> CmmLit -> CmmLit
makeRelativeRefTo dflags info_lbl (CmmLabel lbl)
| tablesNextToCode dflags
- = CmmLabelDiffOff lbl info_lbl 0
+ = CmmLabelDiffOff lbl info_lbl 0 (wordWidth dflags)
makeRelativeRefTo dflags info_lbl (CmmLabelOff lbl off)
| tablesNextToCode dflags
- = CmmLabelDiffOff lbl info_lbl off
+ = CmmLabelDiffOff lbl info_lbl off (wordWidth dflags)
makeRelativeRefTo _ _ lit = lit
@@ -366,23 +385,23 @@ mkStdInfoTable
:: DynFlags
-> (CmmLit,CmmLit) -- Closure type descr and closure descr (profiling)
-> Int -- Closure RTS tag
- -> StgHalfWord -- SRT length
+ -> CmmLit -- SRT length
-> CmmLit -- layout field
-> [CmmLit]
-mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt_len layout_lit
+mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt layout_lit
= -- Parallel revertible-black hole field
prof_info
-- Ticky info (none at present)
-- Debug info (none at present)
- ++ [layout_lit, type_lit]
+ ++ [layout_lit, tag, srt]
where
prof_info
| gopt Opt_SccProfilingOn dflags = [type_descr, closure_descr]
| otherwise = []
- type_lit = packHalfWordsCLit dflags (toStgHalfWord dflags (fromIntegral cl_type)) srt_len
+ tag = CmmInt (fromIntegral cl_type) (halfWordWidth dflags)
-------------------------------------------------------------------------
--
@@ -415,9 +434,19 @@ srtEscape dflags = toStgHalfWord dflags (-1)
--
-------------------------------------------------------------------------
+-- | Wrap a 'CmmExpr' in an alignment check when @-falignment-sanitisation@ is
+-- enabled.
+wordAligned :: DynFlags -> CmmExpr -> CmmExpr
+wordAligned dflags e
+ | gopt Opt_AlignmentSanitisation dflags
+ = CmmMachOp (MO_AlignmentCheck (wORD_SIZE dflags) (wordWidth dflags)) [e]
+ | otherwise
+ = e
+
closureInfoPtr :: DynFlags -> CmmExpr -> CmmExpr
-- Takes a closure pointer and returns the info table pointer
-closureInfoPtr dflags e = CmmLoad e (bWord dflags)
+closureInfoPtr dflags e =
+ CmmLoad (wordAligned dflags e) (bWord dflags)
entryCode :: DynFlags -> CmmExpr -> CmmExpr
-- Takes an info pointer (the first word of a closure)
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index 4151aa0c4e..1d6c209953 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -1,8 +1,10 @@
-{-# LANGUAGE BangPatterns, CPP, RecordWildCards, GADTs #-}
+{-# LANGUAGE BangPatterns, RecordWildCards, GADTs #-}
module CmmLayoutStack (
cmmLayoutStack, setInfoTableStackMap
) where
+import GhcPrelude hiding ((<*>))
+
import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX layering violation
import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX layering violation
@@ -35,11 +37,7 @@ import qualified Data.Set as Set
import Control.Monad.Fix
import Data.Array as Array
import Data.Bits
-import Data.List (nub)
-
-import Prelude hiding ((<*>))
-
-#include "HsVersions.h"
+import Data.List (nub, foldl')
{- Note [Stack Layout]
@@ -246,7 +244,7 @@ cmmLayoutStack dflags procpoints entry_args
-- We need liveness info. Dead assignments are removed later
-- by the sinking pass.
let liveness = cmmLocalLiveness dflags graph
- blocks = postorderDfs graph
+ blocks = revPostorder graph
(final_stackmaps, _final_high_sp, new_blocks) <-
mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
@@ -324,7 +322,7 @@ layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high
-- Sp = Sp + sp_off -- Sp adjustment goes here
-- last1 -- the last node
--
- let middle_pre = blockToList $ foldl blockSnoc middle0 middle1
+ let middle_pre = blockToList $ foldl' blockSnoc middle0 middle1
let final_blocks =
manifestSp dflags final_stackmaps stack0 sp0 final_sp_high
@@ -579,15 +577,8 @@ makeFixupBlock dflags sp0 l stack tscope assigs
| otherwise = do
tmp_lbl <- newBlockId
let sp_off = sp0 - sm_sp stack
- maybeAddUnwind block
- | debugLevel dflags > 0
- = block `blockSnoc` CmmUnwind [(Sp, Just unwind_val)]
- | otherwise
- = block
- where unwind_val = cmmOffset dflags (CmmReg spReg) (sm_sp stack)
block = blockJoin (CmmEntry tmp_lbl tscope)
- ( maybeAddSpAdj dflags sp_off
- $ maybeAddUnwind
+ ( maybeAddSpAdj dflags sp0 sp_off
$ blockFromList assigs )
(CmmBranch l)
return (tmp_lbl, [block])
@@ -853,28 +844,7 @@ manifestSp dflags stackmaps stack0 sp0 sp_high
adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off)
adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
- -- Add unwind pseudo-instruction at the beginning of each block to
- -- document Sp level for debugging
- add_initial_unwind block
- | debugLevel dflags > 0
- = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block
- | otherwise
- = block
- where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags)
-
- -- Add unwind pseudo-instruction right before the Sp adjustment
- -- if there is one.
- add_adj_unwind block
- | debugLevel dflags > 0
- , sp_off /= 0
- = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)]
- | otherwise
- = block
- where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off)
-
- final_middle = maybeAddSpAdj dflags sp_off
- . add_adj_unwind
- . add_initial_unwind
+ final_middle = maybeAddSpAdj dflags sp0 sp_off
. blockFromList
. map adj_pre_sp
. elimStackStores stack0 stackmaps area_off
@@ -893,11 +863,33 @@ getAreaOff stackmaps (Young l) =
Nothing -> pprPanic "getAreaOff" (ppr l)
-maybeAddSpAdj :: DynFlags -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
-maybeAddSpAdj _ 0 block = block
-maybeAddSpAdj dflags sp_off block = block `blockSnoc` adj
+maybeAddSpAdj
+ :: DynFlags -> ByteOff -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
+maybeAddSpAdj dflags sp0 sp_off block =
+ add_initial_unwind $ add_adj_unwind $ adj block
where
- adj = CmmAssign spReg (cmmOffset dflags (CmmReg spReg) sp_off)
+ adj block
+ | sp_off /= 0
+ = block `blockSnoc` CmmAssign spReg (cmmOffset dflags spExpr sp_off)
+ | otherwise = block
+ -- Add unwind pseudo-instruction at the beginning of each block to
+ -- document Sp level for debugging
+ add_initial_unwind block
+ | debugLevel dflags > 0
+ = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block
+ | otherwise
+ = block
+ where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags)
+
+ -- Add unwind pseudo-instruction right after the Sp adjustment
+ -- if there is one.
+ add_adj_unwind block
+ | debugLevel dflags > 0
+ , sp_off /= 0
+ = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)]
+ | otherwise
+ = block
+ where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off)
{- Note [SP old/young offsets]
@@ -920,7 +912,7 @@ arguments.
areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n)
- = cmmOffset dflags (CmmReg spReg) (sp_old - area_off area - n)
+ = cmmOffset dflags spExpr (sp_old - area_off area - n)
-- Replace (CmmStackSlot area n) with an offset from Sp
areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark)
@@ -1090,7 +1082,7 @@ insertReloads dflags stackmap live =
[ CmmAssign (CmmLocal reg)
-- This cmmOffset basically corresponds to manifesting
-- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets]
- (CmmLoad (cmmOffset dflags (CmmReg spReg) (sp_off - reg_off))
+ (CmmLoad (cmmOffset dflags spExpr (sp_off - reg_off))
(localRegType reg))
| (reg, reg_off) <- stackSlotRegs stackmap
, reg `elemRegSet` live
@@ -1143,7 +1135,7 @@ lowerSafeForeignCall dflags block
-- Both 'id' and 'new_base' are KindNonPtr because they're
-- RTS-only objects and are not subject to garbage collection
id <- newTemp (bWord dflags)
- new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
+ new_base <- newTemp (cmmRegType dflags baseReg)
let (caller_save, caller_load) = callerSaveVolatileRegs dflags
save_state_code <- saveThreadState dflags
load_state_code <- loadThreadState dflags
@@ -1154,7 +1146,7 @@ lowerSafeForeignCall dflags block
resume = mkMiddle (callResumeThread new_base id) <*>
-- Assign the result to BaseReg: we
-- might now have a different Capability!
- mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
+ mkAssign baseReg (CmmReg (CmmLocal new_base)) <*>
caller_load <*>
load_state_code
@@ -1169,7 +1161,7 @@ lowerSafeForeignCall dflags block
-- different. Hence we continue by jumping to the top stack frame,
-- not by jumping to succ.
jump = CmmCall { cml_target = entryCode dflags $
- CmmLoad (CmmReg spReg) (bWord dflags)
+ CmmLoad spExpr (bWord dflags)
, cml_cont = Just succ
, cml_args_regs = regs
, cml_args = widthInBytes (wordWidth dflags)
@@ -1199,7 +1191,7 @@ callSuspendThread dflags id intrbl =
CmmUnsafeForeignCall
(ForeignTarget (foreignLbl (fsLit "suspendThread"))
(ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn))
- [id] [CmmReg (CmmGlobal BaseReg), mkIntExpr dflags (fromEnum intrbl)]
+ [id] [baseExpr, mkIntExpr dflags (fromEnum intrbl)]
callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
callResumeThread new_base id =
diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x
index 82f7bee965..691ca5eb28 100644
--- a/compiler/cmm/CmmLex.x
+++ b/compiler/cmm/CmmLex.x
@@ -22,6 +22,8 @@ module CmmLex (
CmmToken(..), cmmlex,
) where
+import GhcPrelude
+
import CmmExpr
import Lexer
@@ -97,6 +99,10 @@ $white_no_nl+ ;
"&&" { kw CmmT_BoolAnd }
"||" { kw CmmT_BoolOr }
+ "True" { kw CmmT_True }
+ "False" { kw CmmT_False }
+ "likely" { kw CmmT_likely}
+
P@decimal { global_regN (\n -> VanillaReg n VGcPtr) }
R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) }
F@decimal { global_regN FloatReg }
@@ -178,6 +184,9 @@ data CmmToken
| CmmT_Int Integer
| CmmT_Float Rational
| CmmT_EOF
+ | CmmT_False
+ | CmmT_True
+ | CmmT_likely
deriving (Show)
-- -----------------------------------------------------------------------------
@@ -264,7 +273,10 @@ reservedWordsFM = listToUFM $
( "b512", CmmT_bits512 ),
( "f32", CmmT_float32 ),
( "f64", CmmT_float64 ),
- ( "gcptr", CmmT_gcptr )
+ ( "gcptr", CmmT_gcptr ),
+ ( "likely", CmmT_likely),
+ ( "True", CmmT_True ),
+ ( "False", CmmT_False )
]
tok_decimal span buf len
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
index 64b4400378..3224bb8cab 100644
--- a/compiler/cmm/CmmLint.hs
+++ b/compiler/cmm/CmmLint.hs
@@ -10,6 +10,8 @@ module CmmLint (
cmmLint, cmmLintGraph
) where
+import GhcPrelude
+
import Hoopl.Block
import Hoopl.Collections
import Hoopl.Graph
diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs
index 944a9e394e..f340c32c8a 100644
--- a/compiler/cmm/CmmLive.hs
+++ b/compiler/cmm/CmmLive.hs
@@ -12,6 +12,8 @@ module CmmLive
)
where
+import GhcPrelude
+
import DynFlags
import BlockId
import Cmm
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index d736f14bfc..c5e9d9bf27 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -1,5 +1,3 @@
-{-# LANGUAGE CPP #-}
-
module CmmMachOp
( MachOp(..)
, pprMachOp, isCommutableMachOp, isAssociativeMachOp
@@ -28,7 +26,7 @@ module CmmMachOp
)
where
-#include "HsVersions.h"
+import GhcPrelude
import CmmType
import Outputable
@@ -136,9 +134,12 @@ data MachOp
-- Floating point vector operations
| MO_VF_Add Length Width
| MO_VF_Sub Length Width
- | MO_VF_Neg Length Width -- unary -
+ | MO_VF_Neg Length Width -- unary negation
| MO_VF_Mul Length Width
| MO_VF_Quot Length Width
+
+ -- Alignment check (for -falignment-sanitisation)
+ | MO_AlignmentCheck Int Width
deriving (Eq, Show)
pprMachOp :: MachOp -> SDoc
@@ -417,6 +418,8 @@ machOpResultType dflags mop tys =
MO_VF_Mul l w -> cmmVec l (cmmFloat w)
MO_VF_Quot l w -> cmmVec l (cmmFloat w)
MO_VF_Neg l w -> cmmVec l (cmmFloat w)
+
+ MO_AlignmentCheck _ _ -> ty1
where
(ty1:_) = tys
@@ -507,6 +510,8 @@ machOpArgReps dflags op =
MO_VF_Quot _ r -> [r,r]
MO_VF_Neg _ r -> [r]
+ MO_AlignmentCheck _ r -> [r]
+
-----------------------------------------------------------------------------
-- CallishMachOp
-----------------------------------------------------------------------------
@@ -526,6 +531,9 @@ data CallishMachOp
| MO_F64_Asin
| MO_F64_Acos
| MO_F64_Atan
+ | MO_F64_Asinh
+ | MO_F64_Acosh
+ | MO_F64_Atanh
| MO_F64_Log
| MO_F64_Exp
| MO_F64_Fabs
@@ -540,6 +548,9 @@ data CallishMachOp
| MO_F32_Asin
| MO_F32_Acos
| MO_F32_Atan
+ | MO_F32_Asinh
+ | MO_F32_Acosh
+ | MO_F32_Atanh
| MO_F32_Log
| MO_F32_Exp
| MO_F32_Fabs
@@ -551,6 +562,7 @@ data CallishMachOp
| MO_U_QuotRem Width
| MO_U_QuotRem2 Width
| MO_Add2 Width
+ | MO_AddWordC Width
| MO_SubWordC Width
| MO_AddIntC Width
| MO_SubIntC Width
@@ -575,8 +587,11 @@ data CallishMachOp
| MO_Memcpy Int
| MO_Memset Int
| MO_Memmove Int
+ | MO_Memcmp Int
| MO_PopCnt Width
+ | MO_Pdep Width
+ | MO_Pext Width
| MO_Clz Width
| MO_Ctz Width
@@ -607,6 +622,7 @@ callishMachOpHints op = case op of
MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint])
MO_Memset _ -> ([], [AddrHint,NoHint,NoHint])
MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint])
+ MO_Memcmp _ -> ([], [AddrHint, AddrHint, NoHint])
_ -> ([],[])
-- empty lists indicate NoHint
@@ -616,4 +632,5 @@ machOpMemcpyishAlign op = case op of
MO_Memcpy align -> Just align
MO_Memset align -> Just align
MO_Memmove align -> Just align
+ MO_Memcmp align -> Just align
_ -> Nothing
diff --git a/compiler/cmm/CmmMonad.hs b/compiler/cmm/CmmMonad.hs
index fc66bf5928..f3b4441a9b 100644
--- a/compiler/cmm/CmmMonad.hs
+++ b/compiler/cmm/CmmMonad.hs
@@ -7,16 +7,15 @@
-- The parser for C-- requires access to a lot more of the 'DynFlags',
-- so 'PD' provides access to 'DynFlags' via a 'HasDynFlags' instance.
-----------------------------------------------------------------------------
-{-# LANGUAGE CPP #-}
module CmmMonad (
PD(..)
, liftP
) where
+import GhcPrelude
+
import Control.Monad
-#if __GLASGOW_HASKELL__ > 710
import qualified Control.Monad.Fail as MonadFail
-#endif
import DynFlags
import Lexer
@@ -32,12 +31,10 @@ instance Applicative PD where
instance Monad PD where
(>>=) = thenPD
- fail = failPD
+ fail = MonadFail.fail
-#if __GLASGOW_HASKELL__ > 710
instance MonadFail.MonadFail PD where
fail = failPD
-#endif
liftP :: P a -> PD a
liftP (P f) = PD $ \_ s -> f s
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
index f452b0b3f5..286b1e306c 100644
--- a/compiler/cmm/CmmNode.hs
+++ b/compiler/cmm/CmmNode.hs
@@ -22,6 +22,8 @@ module CmmNode (
CmmTickScope(..), isTickSubScope, combineTickScopes,
) where
+import GhcPrelude hiding (succ)
+
import CodeGen.Platform
import CmmExpr
import CmmSwitch
@@ -38,7 +40,6 @@ import Hoopl.Graph
import Hoopl.Label
import Data.Maybe
import Data.List (tails,sortBy)
-import Prelude hiding (succ)
import Unique (nonDetCmpUnique)
import Util
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index 3cb28217f2..e837d29783 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -1,10 +1,6 @@
-{-# LANGUAGE CPP #-}
-
-- The default iteration limit is a bit too low for the definitions
-- in this module.
-#if __GLASGOW_HASKELL__ >= 800
{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
-#endif
-----------------------------------------------------------------------------
--
@@ -21,7 +17,7 @@ module CmmOpt (
cmmMachOpFoldM
) where
-#include "HsVersions.h"
+import GhcPrelude
import CmmUtils
import Cmm
@@ -357,35 +353,51 @@ cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
MO_U_Quot rep
| Just p <- exactLog2 n ->
Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
+ MO_U_Rem rep
+ | Just _ <- exactLog2 n ->
+ Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
MO_S_Quot rep
| Just p <- exactLog2 n,
- CmmReg _ <- x -> -- We duplicate x below, hence require
+ CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
+ -- it is a reg. FIXME: remove this restriction.
+ Just (cmmMachOpFold dflags (MO_S_Shr rep)
+ [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)])
+ MO_S_Rem rep
+ | Just p <- exactLog2 n,
+ CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
-- it is a reg. FIXME: remove this restriction.
- -- shift right is not the same as quot, because it rounds
- -- to minus infinity, whereasq quot rounds toward zero.
- -- To fix this up, we add one less than the divisor to the
- -- dividend if it is a negative number.
- --
- -- to avoid a test/jump, we use the following sequence:
- -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
- -- x2 = y & (divisor-1)
- -- result = (x+x2) >>= log2(divisor)
- -- this could be done a bit more simply using conditional moves,
- -- but we're processor independent here.
- --
- -- we optimise the divide by 2 case slightly, generating
- -- x1 = x >> word_size-1 (unsigned)
- -- return = (x + x1) >>= log2(divisor)
- let
- bits = fromIntegral (widthInBits rep) - 1
- shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
- x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
- x2 = if p == 1 then x1 else
- CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
- x3 = CmmMachOp (MO_Add rep) [x, x2]
- in
- Just (cmmMachOpFold dflags (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
+ -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
+ -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
+ -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
+ Just (cmmMachOpFold dflags (MO_Sub rep)
+ [x, cmmMachOpFold dflags (MO_And rep)
+ [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
_ -> Nothing
+ where
+ -- In contrast with unsigned integers, for signed ones
+ -- shift right is not the same as quot, because it rounds
+ -- to minus infinity, whereas quot rounds toward zero.
+ -- To fix this up, we add one less than the divisor to the
+ -- dividend if it is a negative number.
+ --
+ -- to avoid a test/jump, we use the following sequence:
+ -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
+ -- x2 = y & (divisor-1)
+ -- result = x + x2
+ -- this could be done a bit more simply using conditional moves,
+ -- but we're processor independent here.
+ --
+ -- we optimise the divide by 2 case slightly, generating
+ -- x1 = x >> word_size-1 (unsigned)
+ -- return = x + x1
+ signedQuotRemHelper :: Width -> Integer -> CmmExpr
+ signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2]
+ where
+ bits = fromIntegral (widthInBits rep) - 1
+ shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
+ x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
+ x2 = if p == 1 then x1 else
+ CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
-- Unfortunately this needs a unique supply because x might not be a
@@ -410,14 +422,6 @@ That's what the constant-folding operations on comparison operators do above.
-- -----------------------------------------------------------------------------
-- Utils
-isLit :: CmmExpr -> Bool
-isLit (CmmLit _) = True
-isLit _ = False
-
-isComparisonExpr :: CmmExpr -> Bool
-isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
-isComparisonExpr _ = False
-
isPicReg :: CmmExpr -> Bool
isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
isPicReg _ = False
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index e2fe593b5d..8cc988383e 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -200,6 +200,8 @@ necessary to the stack to accommodate it (e.g. 2).
{
module CmmParse ( parseCmmFile ) where
+import GhcPrelude
+
import StgCmmExtCode
import CmmCallConv
import StgCmmProf
@@ -297,6 +299,10 @@ import qualified Data.Map as M
'&&' { L _ (CmmT_BoolAnd) }
'||' { L _ (CmmT_BoolOr) }
+ 'True' { L _ (CmmT_True ) }
+ 'False' { L _ (CmmT_False) }
+ 'likely'{ L _ (CmmT_likely)}
+
'CLOSURE' { L _ (CmmT_CLOSURE) }
'INFO_TABLE' { L _ (CmmT_INFO_TABLE) }
'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) }
@@ -396,8 +402,6 @@ statics :: { [CmmParse [CmmStatic]] }
: {- empty -} { [] }
| static statics { $1 : $2 }
--- Strings aren't used much in the RTS HC code, so it doesn't seem
--- worth allowing inline strings. C-- doesn't allow them anyway.
static :: { CmmParse [CmmStatic] }
: type expr ';' { do e <- $2;
return [CmmStaticLit (getLit e)] }
@@ -466,7 +470,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
return (mkCmmEntryLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
, cit_rep = rep
- , cit_prof = prof, cit_srt = NoC_SRT },
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
[]) }
| 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')'
@@ -482,7 +486,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
return (mkCmmEntryLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
, cit_rep = rep
- , cit_prof = prof, cit_srt = NoC_SRT },
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
[]) }
-- we leave most of the fields zero here. This is only used
-- to generate the BCO info table in the RTS at the moment.
@@ -500,7 +504,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
return (mkCmmEntryLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
, cit_rep = rep
- , cit_prof = prof, cit_srt = NoC_SRT },
+ , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing },
[]) }
-- If profiling is on, this string gets duplicated,
@@ -517,7 +521,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
return (mkCmmEntryLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3
, cit_rep = rep
- , cit_prof = prof, cit_srt = NoC_SRT },
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
[]) }
| 'INFO_TABLE_RET' '(' NAME ',' INT ')'
@@ -528,7 +532,7 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
return (mkCmmRetLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
, cit_rep = rep
- , cit_prof = prof, cit_srt = NoC_SRT },
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
[]) }
| 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')'
@@ -538,12 +542,12 @@ info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
live <- sequence $7
let prof = NoProfilingInfo
-- drop one for the info pointer
- bitmap = mkLiveness dflags (map Just (drop 1 live))
+ bitmap = mkLiveness dflags (drop 1 live)
rep = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap
return (mkCmmRetLabel pkg $3,
Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3
, cit_rep = rep
- , cit_prof = prof, cit_srt = NoC_SRT },
+ , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
live) }
body :: { CmmParse () }
@@ -627,10 +631,10 @@ stmt :: { CmmParse () }
{ doCall $2 [] $4 }
| '(' formals ')' '=' 'call' expr '(' exprs0 ')' ';'
{ doCall $6 $2 $8 }
- | 'if' bool_expr 'goto' NAME
- { do l <- lookupLabel $4; cmmRawIf $2 l }
- | 'if' bool_expr '{' body '}' else
- { cmmIfThenElse $2 (withSourceNote $3 $5 $4) $6 }
+ | 'if' bool_expr cond_likely 'goto' NAME
+ { do l <- lookupLabel $5; cmmRawIf $2 l $3 }
+ | 'if' bool_expr cond_likely '{' body '}' else
+ { cmmIfThenElse $2 (withSourceNote $4 $6 $5) $7 $3 }
| 'push' '(' exprs0 ')' maybe_body
{ pushStackFrame $3 $5 }
| 'reserve' expr '=' lreg maybe_body
@@ -719,6 +723,12 @@ else :: { CmmParse () }
: {- empty -} { return () }
| 'else' '{' body '}' { withSourceNote $2 $4 $3 }
+cond_likely :: { Maybe Bool }
+ : '(' 'likely' ':' 'True' ')' { Just True }
+ | '(' 'likely' ':' 'False' ')' { Just False }
+ | {- empty -} { Nothing }
+
+
-- we have to write this out longhand so that Happy's precedence rules
-- can kick in.
expr :: { CmmParse CmmExpr }
@@ -992,6 +1002,7 @@ callishMachOps = listToUFM $
( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
( "memset", memcpyLikeTweakArgs MO_Memset ),
( "memmove", memcpyLikeTweakArgs MO_Memmove ),
+ ( "memcmp", memcpyLikeTweakArgs MO_Memcmp ),
("prefetch0", (,) $ MO_Prefetch_Data 0),
("prefetch1", (,) $ MO_Prefetch_Data 1),
@@ -1003,6 +1014,16 @@ callishMachOps = listToUFM $
( "popcnt32", (,) $ MO_PopCnt W32 ),
( "popcnt64", (,) $ MO_PopCnt W64 ),
+ ( "pdep8", (,) $ MO_Pdep W8 ),
+ ( "pdep16", (,) $ MO_Pdep W16 ),
+ ( "pdep32", (,) $ MO_Pdep W32 ),
+ ( "pdep64", (,) $ MO_Pdep W64 ),
+
+ ( "pext8", (,) $ MO_Pext W8 ),
+ ( "pext16", (,) $ MO_Pext W16 ),
+ ( "pext32", (,) $ MO_Pext W32 ),
+ ( "pext64", (,) $ MO_Pext W64 ),
+
( "cmpxchg8", (,) $ MO_Cmpxchg W8 ),
( "cmpxchg16", (,) $ MO_Cmpxchg W16 ),
( "cmpxchg32", (,) $ MO_Cmpxchg W32 ),
@@ -1276,11 +1297,11 @@ data BoolExpr
-- ToDo: smart constructors which simplify the boolean expression.
-cmmIfThenElse cond then_part else_part = do
+cmmIfThenElse cond then_part else_part likely = do
then_id <- newBlockId
join_id <- newBlockId
c <- cond
- emitCond c then_id
+ emitCond c then_id likely
else_part
emit (mkBranch join_id)
emitLabel then_id
@@ -1288,38 +1309,38 @@ cmmIfThenElse cond then_part else_part = do
-- fall through to join
emitLabel join_id
-cmmRawIf cond then_id = do
+cmmRawIf cond then_id likely = do
c <- cond
- emitCond c then_id
+ emitCond c then_id likely
-- 'emitCond cond true_id' emits code to test whether the cond is true,
-- branching to true_id if so, and falling through otherwise.
-emitCond (BoolTest e) then_id = do
+emitCond (BoolTest e) then_id likely = do
else_id <- newBlockId
- emit (mkCbranch e then_id else_id Nothing)
+ emit (mkCbranch e then_id else_id likely)
emitLabel else_id
-emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id
+emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id likely
| Just op' <- maybeInvertComparison op
- = emitCond (BoolTest (CmmMachOp op' args)) then_id
-emitCond (BoolNot e) then_id = do
+ = emitCond (BoolTest (CmmMachOp op' args)) then_id (not <$> likely)
+emitCond (BoolNot e) then_id likely = do
else_id <- newBlockId
- emitCond e else_id
+ emitCond e else_id likely
emit (mkBranch then_id)
emitLabel else_id
-emitCond (e1 `BoolOr` e2) then_id = do
- emitCond e1 then_id
- emitCond e2 then_id
-emitCond (e1 `BoolAnd` e2) then_id = do
+emitCond (e1 `BoolOr` e2) then_id likely = do
+ emitCond e1 then_id likely
+ emitCond e2 then_id likely
+emitCond (e1 `BoolAnd` e2) then_id likely = do
-- we'd like to invert one of the conditionals here to avoid an
-- extra branch instruction, but we can't use maybeInvertComparison
-- here because we can't look too closely at the expression since
-- we're in a loop.
and_id <- newBlockId
else_id <- newBlockId
- emitCond e1 and_id
+ emitCond e1 and_id likely
emit (mkBranch else_id)
emitLabel and_id
- emitCond e2 then_id
+ emitCond e2 then_id likely
emitLabel else_id
-- -----------------------------------------------------------------------------
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index bc827dfe87..77598a4b09 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -7,6 +7,8 @@ module CmmPipeline (
cmmPipeline
) where
+import GhcPrelude
+
import Cmm
import CmmLint
import CmmBuildInfoTables
@@ -30,21 +32,22 @@ import Platform
-- | Top level driver for C-- pipeline
-----------------------------------------------------------------------------
-cmmPipeline :: HscEnv -- Compilation env including
- -- dynamic flags: -dcmm-lint -ddump-cmm-cps
- -> TopSRT -- SRT table and accumulating list of compiled procs
- -> CmmGroup -- Input C-- with Procedures
- -> IO (TopSRT, CmmGroup) -- Output CPS transformed C--
+cmmPipeline
+ :: HscEnv -- Compilation env including
+ -- dynamic flags: -dcmm-lint -ddump-cmm-cps
+ -> ModuleSRTInfo -- Info about SRTs generated so far
+ -> CmmGroup -- Input C-- with Procedures
+ -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C--
-cmmPipeline hsc_env topSRT prog =
+cmmPipeline hsc_env srtInfo prog =
do let dflags = hsc_dflags hsc_env
tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog
- (topSRT, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags topSRT tops
+ (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops
dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" (ppr cmms)
- return (topSRT, cmms)
+ return (srtInfo, cmms)
cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl])
@@ -103,7 +106,7 @@ cpsTop hsc_env proc =
Opt_D_dump_cmm_sink "Sink assignments"
------------- CAF analysis ----------------------------------------------
- let cafEnv = {-# SCC "cafAnal" #-} cafAnal g
+ let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g
dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" (ppr cafEnv)
g <- if splitting_proc_points
@@ -163,7 +166,7 @@ cpsTop hsc_env proc =
|| -- Note [inconsistent-pic-reg]
usingInconsistentPicReg
usingInconsistentPicReg
- = case (platformArch platform, platformOS platform, gopt Opt_PIC dflags)
+ = case (platformArch platform, platformOS platform, positionIndependent dflags)
of (ArchX86, OSDarwin, pic) -> pic
(ArchPPC, OSDarwin, pic) -> pic
_ -> False
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
index 2e2c22c10d..bef8f384b8 100644
--- a/compiler/cmm/CmmProcPoint.hs
+++ b/compiler/cmm/CmmProcPoint.hs
@@ -8,7 +8,7 @@ module CmmProcPoint
)
where
-import Prelude hiding (last, unzip, succ, zip)
+import GhcPrelude hiding (last, unzip, succ, zip)
import DynFlags
import BlockId
@@ -19,7 +19,7 @@ import CmmUtils
import CmmInfo
import CmmLive
import CmmSwitch
-import Data.List (sortBy)
+import Data.List (sortBy, foldl')
import Maybes
import Control.Monad
import Outputable
@@ -178,9 +178,9 @@ procPointLattice = DataflowLattice unreached add_to
--
-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds].
callProcPoints :: CmmGraph -> ProcPointSet
-callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g
- where add :: CmmBlock -> LabelSet -> LabelSet
- add b set = case lastNode b of
+callProcPoints g = foldlGraphBlocks add (setSingleton (g_entry g)) g
+ where add :: LabelSet -> CmmBlock -> LabelSet
+ add set b = case lastNode b of
CmmCall {cml_cont = Just k} -> setInsert k set
CmmForeignCall {succ=k} -> setInsert k set
_ -> set
@@ -190,17 +190,17 @@ minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph
-- Given the set of successors of calls (which must be proc-points)
-- figure out the minimal set of necessary proc-points
minimalProcPointSet platform callProcPoints g
- = extendPPSet platform g (postorderDfs g) callProcPoints
+ = extendPPSet platform g (revPostorder g) callProcPoints
extendPPSet
:: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet
extendPPSet platform g blocks procPoints =
let env = procPointAnalysis procPoints g
- add block pps = let id = entryLabel block
+ add pps block = let id = entryLabel block
in case mapLookup id env of
Just ProcPoint -> setInsert id pps
_ -> pps
- procPoints' = foldGraphBlocks add setEmpty g
+ procPoints' = foldlGraphBlocks add setEmpty g
newPoints = mapMaybe ppSuccessor blocks
newPoint = listToMaybe newPoints
ppSuccessor b =
@@ -242,11 +242,11 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap
(CmmProc (TopInfo {info_tbls = info_tbls})
top_l _ g@(CmmGraph {g_entry=entry})) =
do -- Build a map from procpoints to the blocks they reach
- let addBlock
- :: CmmBlock
+ let add_block
+ :: LabelMap (LabelMap CmmBlock)
+ -> CmmBlock
-> LabelMap (LabelMap CmmBlock)
- -> LabelMap (LabelMap CmmBlock)
- addBlock b graphEnv =
+ add_block graphEnv b =
case mapLookup bid procMap of
Just ProcPoint -> add graphEnv bid bid b
Just (ReachedBy set) ->
@@ -265,7 +265,7 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap
regSetToList $
expectJust "ppLiveness" $ mapLookup pp liveness
- graphEnv <- return $ foldGraphBlocks addBlock mapEmpty g
+ graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g
-- Build a map from proc point BlockId to pairs of:
-- * Labels for their new procedures
@@ -275,12 +275,13 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap
let add_label map pp = mapInsert pp lbls map
where lbls | pp == entry = (entry_label, fmap cit_lbl (mapLookup entry info_tbls))
| otherwise = (block_lbl, guard (setMember pp callPPs) >>
- Just (toInfoLbl block_lbl))
- where block_lbl = blockLbl pp
+ Just info_table_lbl)
+ where block_lbl = blockLbl pp
+ info_table_lbl = infoTblLbl pp
procLabels :: LabelMap (CLabel, Maybe CLabel)
- procLabels = foldl add_label mapEmpty
- (filter (flip mapMember (toBlockMap g)) (setElems procPoints))
+ procLabels = foldl' add_label mapEmpty
+ (filter (flip mapMember (toBlockMap g)) (setElems procPoints))
-- In each new graph, add blocks jumping off to the new procedures,
-- and replace branches to procpoints with branches to the jump-off blocks
@@ -301,7 +302,7 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap
-> UniqSM (LabelMap CmmGraph)
add_jumps newGraphEnv (ppId, blockEnv) =
do let needed_jumps = -- find which procpoints we currently branch to
- mapFold add_if_branch_to_pp [] blockEnv
+ mapFoldr add_if_branch_to_pp [] blockEnv
add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)]
add_if_branch_to_pp block rst =
case lastNode block of
@@ -329,7 +330,7 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap
-- replace branches to procpoints with branches to jumps
blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv'
-- add the jump blocks to the graph
- blockEnv''' = foldl (flip insertBlock) blockEnv'' jumpBlocks
+ blockEnv''' = foldl' (flip addBlock) blockEnv'' jumpBlocks
let g' = ofBlockMap ppId blockEnv'''
-- pprTrace "g' pre jumps" (ppr g') $ do
return (mapInsert ppId g' newGraphEnv)
@@ -372,9 +373,10 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap
-- call sites. Here, we sort them in reverse order -- it gets
-- reversed later.
let (_, block_order) =
- foldl add_block_num (0::Int, mapEmpty :: LabelMap Int)
- (postorderDfs g)
- add_block_num (i, map) block = (i+1, mapInsert (entryLabel block) i map)
+ foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int)
+ (revPostorder g)
+ add_block_num (i, map) block =
+ (i + 1, mapInsert (entryLabel block) i map)
sort_fn (bid, _) (bid', _) =
compare (expectJust "block_order" $ mapLookup bid block_order)
(expectJust "block_order" $ mapLookup bid' block_order)
diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
index 517605b9ff..6317cfe929 100644
--- a/compiler/cmm/CmmSink.hs
+++ b/compiler/cmm/CmmSink.hs
@@ -3,6 +3,8 @@ module CmmSink (
cmmSink
) where
+import GhcPrelude
+
import Cmm
import CmmOpt
import CmmLive
@@ -15,13 +17,31 @@ import CodeGen.Platform
import Platform (isARM, platformArch)
import DynFlags
+import Unique
import UniqFM
import PprCmm ()
+import qualified Data.IntSet as IntSet
import Data.List (partition)
import qualified Data.Set as Set
import Data.Maybe
+-- Compact sets for membership tests of local variables.
+
+type LRegSet = IntSet.IntSet
+
+emptyLRegSet :: LRegSet
+emptyLRegSet = IntSet.empty
+
+nullLRegSet :: LRegSet -> Bool
+nullLRegSet = IntSet.null
+
+insertLRegSet :: LocalReg -> LRegSet -> LRegSet
+insertLRegSet l = IntSet.insert (getKey (getUnique l))
+
+elemLRegSet :: LocalReg -> LRegSet -> Bool
+elemLRegSet l = IntSet.member (getKey (getUnique l))
+
-- -----------------------------------------------------------------------------
-- Sinking and inlining
@@ -152,7 +172,7 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
liveness = cmmLocalLiveness dflags graph
getLive l = mapFindWithDefault Set.empty l liveness
- blocks = postorderDfs graph
+ blocks = revPostorder graph
join_pts = findJoinPoints blocks
@@ -213,7 +233,7 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
- final_middle = foldl blockSnoc middle' dropped_last
+ final_middle = foldl' blockSnoc middle' dropped_last
sunk' = mapUnion sunk $
mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
@@ -323,7 +343,7 @@ walk dflags nodes assigs = go nodes emptyBlock assigs
(dropped, as') = dropAssignmentsSimple dflags
(\a -> conflicts dflags a node2) as1
- block' = foldl blockSnoc block dropped `blockSnoc` node2
+ block' = foldl' blockSnoc block dropped `blockSnoc` node2
--
@@ -397,7 +417,7 @@ tryToInline
, Assignments -- Remaining assignments
)
-tryToInline dflags live node assigs = go usages node [] assigs
+tryToInline dflags live node assigs = go usages node emptyLRegSet assigs
where
usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
usages = foldLocalRegsUsed dflags addUsage emptyUFM node
@@ -420,7 +440,7 @@ tryToInline dflags live node assigs = go usages node [] assigs
inline_and_keep = keep inl_node -- inline the assignment, keep it
keep node' = (final_node, a : rest')
- where (final_node, rest') = go usages' node' (l:skipped) rest
+ where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest
usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
usages rhs
-- we must not inline anything that is mentioned in the RHS
@@ -428,7 +448,7 @@ tryToInline dflags live node assigs = go usages node [] assigs
-- usages of the regs on the RHS to 2.
cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
- || l `elem` skipped
+ || l `elemLRegSet` skipped
|| not (okToInline dflags rhs node)
l_usages = lookupUFM usages l
@@ -437,13 +457,7 @@ tryToInline dflags live node assigs = go usages node [] assigs
occurs_once = not l_live && l_usages == Just 1
occurs_none = not l_live && l_usages == Nothing
- inl_node = case mapExpDeep inl_exp node of
- -- See Note [Improving conditionals]
- CmmCondBranch (CmmMachOp (MO_Ne w) args)
- ti fi l
- -> CmmCondBranch (cmmMachOpFold dflags (MO_Eq w) args)
- fi ti l
- node' -> node'
+ inl_node = improveConditional (mapExpDeep inl_exp node)
inl_exp :: CmmExpr -> CmmExpr
-- inl_exp is where the inlining actually takes place!
@@ -454,22 +468,43 @@ tryToInline dflags live node assigs = go usages node [] assigs
inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args
inl_exp other = other
-{- Note [Improving conditionals]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Given
- CmmCondBranch ((a >## b) != 1) t f
-where a,b, are Floats, the constant folder /cannot/ turn it into
- CmmCondBranch (a <=## b) t f
-because comparison on floats are not invertible
-(see CmmMachOp.maybeInvertComparison).
-What we want instead is simply to reverse the true/false branches thus
+{- Note [improveConditional]
+
+cmmMachOpFold tries to simplify conditionals to turn things like
+ (a == b) != 1
+into
+ (a != b)
+but there's one case it can't handle: when the comparison is over
+floating-point values, we can't invert it, because floating-point
+comparisons aren't invertible (because of NaNs).
+
+But we *can* optimise this conditional by swapping the true and false
+branches. Given
CmmCondBranch ((a >## b) != 1) t f
--->
+we can turn it into
CmmCondBranch (a >## b) f t
-And we do that right here in tryToInline, just as we do cmmMachOpFold.
+So here we catch conditionals that weren't optimised by cmmMachOpFold,
+and apply above transformation to eliminate the comparison against 1.
+
+It's tempting to just turn every != into == and then let cmmMachOpFold
+do its thing, but that risks changing a nice fall-through conditional
+into one that requires two jumps. (see swapcond_last in
+CmmContFlowOpt), so instead we carefully look for just the cases where
+we can eliminate a comparison.
-}
+improveConditional :: CmmNode O x -> CmmNode O x
+improveConditional
+ (CmmCondBranch (CmmMachOp mop [x, CmmLit (CmmInt 1 _)]) t f l)
+ | neLike mop, isComparisonExpr x
+ = CmmCondBranch x f t (fmap not l)
+ where
+ neLike (MO_Ne _) = True
+ neLike (MO_U_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1
+ neLike (MO_S_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1
+ neLike _ = False
+improveConditional other = other
-- Note [dependent assignments]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -519,11 +554,11 @@ And we do that right here in tryToInline, just as we do cmmMachOpFold.
addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
addUsage m r = addToUFM_C (+) m r 1
-regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
-regsUsedIn [] _ = False
+regsUsedIn :: LRegSet -> CmmExpr -> Bool
+regsUsedIn ls _ | nullLRegSet ls = False
regsUsedIn ls e = wrapRecExpf f e False
- where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
- f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
+ where f (CmmReg (CmmLocal l)) _ | l `elemLRegSet` ls = True
+ f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True
f _ z = z
-- we don't inline into CmmUnsafeForeignCall if the expression refers
@@ -721,7 +756,7 @@ loadAddr dflags e w =
case e of
CmmReg r -> regAddr dflags r 0 w
CmmRegOff r i -> regAddr dflags r i w
- _other | regUsedIn dflags (CmmGlobal Sp) e -> StackMem
+ _other | regUsedIn dflags spReg e -> StackMem
| otherwise -> AnyMem
regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
diff --git a/compiler/cmm/CmmSwitch.hs b/compiler/cmm/CmmSwitch.hs
index b0ca4be762..ce779465e3 100644
--- a/compiler/cmm/CmmSwitch.hs
+++ b/compiler/cmm/CmmSwitch.hs
@@ -11,6 +11,8 @@ module CmmSwitch (
createSwitchPlan,
) where
+import GhcPrelude
+
import Outputable
import DynFlags
import Hoopl.Label (Label)
@@ -107,7 +109,7 @@ data SwitchTargets =
(M.Map Integer Label) -- The branches
deriving (Show, Eq)
--- | The smart constructr mkSwitchTargets normalises the map a bit:
+-- | The smart constructor mkSwitchTargets normalises the map a bit:
-- * No entries outside the range
-- * No entries equal to the default
-- * No default if all elements have explicit values
@@ -249,6 +251,68 @@ data SwitchPlan
-- findSingleValues
-- 5. The thus collected pieces are assembled to a balanced binary tree.
+{-
+ Note [Two alts + default]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Discussion and a bit more info at #14644
+
+When dealing with a switch of the form:
+switch(e) {
+ case 1: goto l1;
+ case 3000: goto l2;
+ default: goto ldef;
+}
+
+If we treat it as a sparse jump table we would generate:
+
+if (e > 3000) //Check if value is outside of the jump table.
+ goto ldef;
+else {
+ if (e < 3000) { //Compare to upper value
+ if(e != 1) //Compare to remaining value
+ goto ldef;
+ else
+ goto l2;
+ }
+ else
+ goto l1;
+}
+
+Instead we special case this to :
+
+if (e==1) goto l1;
+else if (e==3000) goto l2;
+else goto l3;
+
+This means we have:
+* Less comparisons for: 1,<3000
+* Unchanged for 3000
+* One more for >3000
+
+This improves code in a few ways:
+* One comparison less means smaller code which helps with cache.
+* It exchanges a taken jump for two jumps no taken in the >range case.
+ Jumps not taken are cheaper (See Agner guides) making this about as fast.
+* For all other cases the first range check is removed making it faster.
+
+The end result is that the change is not measurably slower for the case
+>3000 and faster for the other cases.
+
+This makes running this kind of match in an inner loop cheaper by 10-20%
+depending on the data.
+In nofib this improves wheel-sieve1 by 4-9% depending on problem
+size.
+
+We could also add a second conditional jump after the comparison to
+keep the range check like this:
+ cmp 3000, rArgument
+ jg <default>
+ je <branch 2>
+While this is fairly cheap it made no big difference for the >3000 case
+and slowed down all other cases making it not worthwhile.
+-}
+
-- | Does the target support switch out of the box? Then leave this to the
-- target!
@@ -264,13 +328,16 @@ createSwitchPlan :: SwitchTargets -> SwitchPlan
createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
| [(x, l)] <- M.toList m
= IfEqual x l (Unconditionally defLabel)
--- And another common case, matching booleans
+-- And another common case, matching "booleans"
createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
- | [(x1, l1), (x2,l2)] <- M.toAscList m
- , x1 == lo
- , x2 == hi
- , x1 + 1 == x2
+ | [(x1, l1), (_x2,l2)] <- M.toAscList m
+ --Checking If |range| = 2 is enough if we have two unique literals
+ , hi - lo == 1
= IfEqual x1 l1 (Unconditionally l2)
+-- See Note [Two alts + default]
+createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
+ | [(x1, l1), (x2,l2)] <- M.toAscList m
+ = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
createSwitchPlan (SwitchTargets signed range mbdef m) =
-- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
plan
diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs
index 4abbeaf0c1..97b181a243 100644
--- a/compiler/cmm/CmmType.hs
+++ b/compiler/cmm/CmmType.hs
@@ -1,5 +1,3 @@
-{-# LANGUAGE CPP #-}
-
module CmmType
( CmmType -- Abstract
, b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord
@@ -29,7 +27,8 @@ module CmmType
)
where
-#include "HsVersions.h"
+
+import GhcPrelude
import DynFlags
import FastString
@@ -71,7 +70,7 @@ instance Outputable CmmCat where
-- Why is CmmType stratified? For native code generation,
-- most of the time you just want to know what sort of register
-- to put the thing in, and for this you need to know how
--- many bits thing has and whether it goes in a floating-point
+-- many bits thing has, and whether it goes in a floating-point
-- register. By contrast, the distinction between GcPtr and
-- GcNonPtr is of interest to only a few parts of the code generator.
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
index 74524c997f..42d64842e2 100644
--- a/compiler/cmm/CmmUtils.hs
+++ b/compiler/cmm/CmmUtils.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP, GADTs, RankNTypes #-}
+{-# LANGUAGE GADTs, RankNTypes #-}
-----------------------------------------------------------------------------
--
@@ -35,7 +35,10 @@ module CmmUtils(
cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord,
cmmToWord,
- isTrivialCmmExpr, hasNoGlobalRegs,
+ isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr,
+
+ baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr,
+ currentTSOExpr, currentNurseryExpr, cccsExpr,
-- Statics
blankWord,
@@ -53,16 +56,16 @@ module CmmUtils(
-- * Operations that probably don't belong here
modifyGraph,
- ofBlockMap, toBlockMap, insertBlock,
+ ofBlockMap, toBlockMap,
ofBlockList, toBlockList, bodyToBlockList,
toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough,
- foldGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1,
+ foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1,
-- * Ticks
blockTicks
) where
-#include "HsVersions.h"
+import GhcPrelude
import TyCon ( PrimRep(..), PrimElemRep(..) )
import RepType ( UnaryType, SlotTy (..), typePrimRep1 )
@@ -73,11 +76,9 @@ import BlockId
import CLabel
import Outputable
import DynFlags
-import Util
import CodeGen.Platform
import Data.Word
-import Data.Maybe
import Data.Bits
import Hoopl.Graph
import Hoopl.Label
@@ -252,8 +253,8 @@ cmmRegOff reg byte_off = CmmRegOff reg byte_off
cmmOffsetLit :: CmmLit -> Int -> CmmLit
cmmOffsetLit (CmmLabel l) byte_off = cmmLabelOff l byte_off
cmmOffsetLit (CmmLabelOff l m) byte_off = cmmLabelOff l (m+byte_off)
-cmmOffsetLit (CmmLabelDiffOff l1 l2 m) byte_off
- = CmmLabelDiffOff l1 l2 (m+byte_off)
+cmmOffsetLit (CmmLabelDiffOff l1 l2 m w) byte_off
+ = CmmLabelDiffOff l1 l2 (m+byte_off) w
cmmOffsetLit (CmmInt m rep) byte_off = CmmInt (m + fromIntegral byte_off) rep
cmmOffsetLit _ byte_off = pprPanic "cmmOffsetLit" (ppr byte_off)
@@ -340,7 +341,6 @@ cmmEqWord dflags e1 e2 = CmmMachOp (mo_wordEq dflags) [e1, e2]
cmmULtWord dflags e1 e2 = CmmMachOp (mo_wordULt dflags) [e1, e2]
cmmUGeWord dflags e1 e2 = CmmMachOp (mo_wordUGe dflags) [e1, e2]
cmmUGtWord dflags e1 e2 = CmmMachOp (mo_wordUGt dflags) [e1, e2]
---cmmShlWord dflags e1 e2 = CmmMachOp (mo_wordShl dflags) [e1, e2]
cmmSLtWord dflags e1 e2 = CmmMachOp (mo_wordSLt dflags) [e1, e2]
cmmUShrWord dflags e1 e2 = CmmMachOp (mo_wordUShr dflags) [e1, e2]
cmmAddWord dflags e1 e2 = CmmMachOp (mo_wordAdd dflags) [e1, e2]
@@ -385,6 +385,14 @@ hasNoGlobalRegs (CmmReg (CmmLocal _)) = True
hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True
hasNoGlobalRegs _ = False
+isLit :: CmmExpr -> Bool
+isLit (CmmLit _) = True
+isLit _ = False
+
+isComparisonExpr :: CmmExpr -> Bool
+isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
+isComparisonExpr _ = False
+
---------------------------------------------------
--
-- Tagging
@@ -392,23 +400,20 @@ hasNoGlobalRegs _ = False
---------------------------------------------------
-- Tag bits mask
---cmmTagBits = CmmLit (mkIntCLit tAG_BITS)
cmmTagMask, cmmPointerMask :: DynFlags -> CmmExpr
cmmTagMask dflags = mkIntExpr dflags (tAG_MASK dflags)
cmmPointerMask dflags = mkIntExpr dflags (complement (tAG_MASK dflags))
-- Used to untag a possibly tagged pointer
-- A static label need not be untagged
-cmmUntag :: DynFlags -> CmmExpr -> CmmExpr
+cmmUntag, cmmIsTagged, cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr
cmmUntag _ e@(CmmLit (CmmLabel _)) = e
-- Default case
cmmUntag dflags e = cmmAndWord dflags e (cmmPointerMask dflags)
-- Test if a closure pointer is untagged
-cmmIsTagged :: DynFlags -> CmmExpr -> CmmExpr
cmmIsTagged dflags e = cmmNeWord dflags (cmmAndWord dflags e (cmmTagMask dflags)) (zeroExpr dflags)
-cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr
-- Get constructor tag, but one based.
cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags)
@@ -451,20 +456,17 @@ regUsedIn dflags = regUsedIn_ where
--
---------------------------------------------
-mkLiveness :: DynFlags -> [Maybe LocalReg] -> Liveness
+mkLiveness :: DynFlags -> [LocalReg] -> Liveness
mkLiveness _ [] = []
mkLiveness dflags (reg:regs)
- = take sizeW bits ++ mkLiveness dflags regs
+ = bits ++ mkLiveness dflags regs
where
- sizeW = case reg of
- Nothing -> 1
- Just r -> (widthInBytes (typeWidth (localRegType r)) + wORD_SIZE dflags - 1)
- `quot` wORD_SIZE dflags
- -- number of words, rounded up
- bits = repeat $ is_non_ptr reg -- True <=> Non Ptr
+ sizeW = (widthInBytes (typeWidth (localRegType reg)) + wORD_SIZE dflags - 1)
+ `quot` wORD_SIZE dflags
+ -- number of words, rounded up
+ bits = replicate sizeW is_non_ptr -- True <=> Non Ptr
- is_non_ptr Nothing = True
- is_non_ptr (Just reg) = not $ isGcPtrType (localRegType reg)
+ is_non_ptr = not $ isGcPtrType (localRegType reg)
-- ============================================== -
@@ -486,12 +488,6 @@ toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body
ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph
ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO}
-insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock
-insertBlock block map =
- ASSERT(isNothing $ mapLookup id map)
- mapInsert id block map
- where id = entryLabel block
-
toBlockList :: CmmGraph -> [CmmBlock]
toBlockList g = mapElems $ toBlockMap g
@@ -554,11 +550,12 @@ mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGra
mapGraphNodes1 f = modifyGraph (mapGraph f)
-foldGraphBlocks :: (CmmBlock -> a -> a) -> a -> CmmGraph -> a
-foldGraphBlocks k z g = mapFold k z $ toBlockMap g
+foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a
+foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g
-postorderDfs :: CmmGraph -> [CmmBlock]
-postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g)
+revPostorder :: CmmGraph -> [CmmBlock]
+revPostorder g = {-# SCC "revPostorder" #-}
+ revPostorderFrom (toBlockMap g) (g_entry g)
-------------------------------------------------
-- Tick utilities
@@ -569,3 +566,18 @@ blockTicks b = reverse $ foldBlockNodesF goStmt b []
where goStmt :: CmmNode e x -> [CmmTickish] -> [CmmTickish]
goStmt (CmmTick t) ts = t:ts
goStmt _other ts = ts
+
+
+-- -----------------------------------------------------------------------------
+-- Access to common global registers
+
+baseExpr, spExpr, hpExpr, currentTSOExpr, currentNurseryExpr,
+ spLimExpr, hpLimExpr, cccsExpr :: CmmExpr
+baseExpr = CmmReg baseReg
+spExpr = CmmReg spReg
+spLimExpr = CmmReg spLimReg
+hpExpr = CmmReg hpReg
+hpLimExpr = CmmReg hpLimReg
+currentTSOExpr = CmmReg currentTSOReg
+currentNurseryExpr = CmmReg currentNurseryReg
+cccsExpr = CmmReg cccsReg
diff --git a/compiler/cmm/Debug.hs b/compiler/cmm/Debug.hs
index 33595d8987..da37495530 100644
--- a/compiler/cmm/Debug.hs
+++ b/compiler/cmm/Debug.hs
@@ -22,6 +22,8 @@ module Debug (
UnwindExpr(..), toUnwindExpr
) where
+import GhcPrelude
+
import BlockId
import CLabel
import Cmm
@@ -33,7 +35,7 @@ import Outputable
import PprCore ()
import PprCmmExpr ( pprExpr )
import SrcLoc
-import Util
+import Util ( seqList )
import Hoopl.Block
import Hoopl.Collections
@@ -44,6 +46,7 @@ import Data.Maybe
import Data.List ( minimumBy, nubBy )
import Data.Ord ( comparing )
import qualified Data.Map as Map
+import Data.Either ( partitionEithers )
-- | Debug information about a block of code. Ticks scope over nested
-- blocks.
@@ -98,7 +101,7 @@ cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes
-- Analyse tick scope structure: Each one is either a top-level
-- tick scope, or the child of another.
(topScopes, childScopes)
- = splitEithers $ map (\a -> findP a a) $ Map.keys blockCtxs
+ = partitionEithers $ map (\a -> findP a a) $ Map.keys blockCtxs
findP tsc GlobalScope = Left tsc -- top scope
findP tsc scp | scp' `Map.member` blockCtxs = Right (scp', tsc)
| otherwise = findP tsc scp'
@@ -328,7 +331,7 @@ code,
v :: P64 = R2;
if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
-After c2fe we we may pass to either c2ff or c2fg; let's first consider the
+After c2fe we may pass to either c2ff or c2fg; let's first consider the
former. In this case there is nothing in particular that we need to do other
than reiterate what we already know about Sp,
@@ -346,8 +349,8 @@ in addition to the usual beginning-of-block statement,
unwind Sp = Just Sp + 0;
I64[Sp - 8] = c2dD;
R1 = v :: P64;
- unwind Sp = Just Sp + 8;
Sp = Sp - 8;
+ unwind Sp = Just Sp + 8;
if (R1 & 7 != 0) goto c2dD; else goto c2dE;
The remaining blocks are simple,
@@ -389,10 +392,95 @@ The flow of unwinding information through the compiler is a bit convoluted:
* This unwind information is converted to DebugBlocks by Debug.cmmDebugGen
- * These DebugBlcosk are then converted to, e.g., DWARF unwinding tables
+ * These DebugBlocks are then converted to, e.g., DWARF unwinding tables
(by the Dwarf module) and emitted in the final object.
-See also: Note [Unwinding information in the NCG] in AsmCodeGen.
+See also:
+ Note [Unwinding information in the NCG] in AsmCodeGen,
+ Note [Unwind pseudo-instruction in Cmm],
+ Note [Debugging DWARF unwinding info].
+
+
+Note [Debugging DWARF unwinding info]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For debugging generated unwinding info I've found it most useful to dump the
+disassembled binary with objdump -D and dump the debug info with
+readelf --debug-dump=frames-interp.
+
+You should get something like this:
+
+ 0000000000000010 <stg_catch_frame_info>:
+ 10: 48 83 c5 18 add $0x18,%rbp
+ 14: ff 65 00 jmpq *0x0(%rbp)
+
+and:
+
+ Contents of the .debug_frame section:
+
+ 00000000 0000000000000014 ffffffff CIE "" cf=1 df=-8 ra=16
+ LOC CFA rbp rsp ra
+ 0000000000000000 rbp+0 v+0 s c+0
+
+ 00000018 0000000000000024 00000000 FDE cie=00000000 pc=000000000000000f..0000000000000017
+ LOC CFA rbp rsp ra
+ 000000000000000f rbp+0 v+0 s c+0
+ 000000000000000f rbp+24 v+0 s c+0
+
+To read it http://www.dwarfstd.org/doc/dwarf-2.0.0.pdf has a nice example in
+Appendix 5 (page 101 of the pdf) and more details in the relevant section.
+
+The key thing to keep in mind is that the value at LOC is the value from
+*before* the instruction at LOC executes. In other words it answers the
+question: if my $rip is at LOC, how do I get the relevant values given the
+values obtained through unwinding so far.
+
+If the readelf --debug-dump=frames-interp output looks wrong, it may also be
+useful to look at readelf --debug-dump=frames, which is closer to the
+information that GHC generated.
+
+It's also useful to dump the relevant Cmm with -ddump-cmm -ddump-opt-cmm
+-ddump-cmm-proc -ddump-cmm-verbose. Note [Unwind pseudo-instruction in Cmm]
+explains how to interpret it.
+
+Inside gdb there are a couple useful commands for inspecting frames.
+For example:
+
+ gdb> info frame <num>
+
+It shows the values of registers obtained through unwinding.
+
+Another useful thing to try when debugging the DWARF unwinding is to enable
+extra debugging output in GDB:
+
+ gdb> set debug frame 1
+
+This makes GDB produce a trace of its internal workings. Having gone this far,
+it's just a tiny step to run GDB in GDB. Make sure you install debugging
+symbols for gdb if you obtain it through a package manager.
+
+Keep in mind that the current release of GDB has an instruction pointer handling
+heuristic that works well for C-like languages, but doesn't always work for
+Haskell. See Note [Info Offset] in Dwarf.Types for more details.
+
+Note [Unwind pseudo-instruction in Cmm]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+One of the possible CmmNodes is a CmmUnwind pseudo-instruction. It doesn't
+generate any assembly, but controls what DWARF unwinding information gets
+generated.
+
+It's important to understand what ranges of code the unwind pseudo-instruction
+refers to.
+For a sequence of CmmNodes like:
+
+ A // starts at addr X and ends at addr Y-1
+ unwind Sp = Just Sp + 16;
+ B // starts at addr Y and ends at addr Z
+
+the unwind statement reflects the state after A has executed, but before B
+has executed. If you consult the Note [Debugging DWARF unwinding info], the
+LOC this information will end up in is Y.
-}
-- | A label associated with an 'UnwindTable'
diff --git a/compiler/cmm/Hoopl/Block.hs b/compiler/cmm/Hoopl/Block.hs
index 3623fcd242..c4ff1794e8 100644
--- a/compiler/cmm/Hoopl/Block.hs
+++ b/compiler/cmm/Hoopl/Block.hs
@@ -33,6 +33,7 @@ module Hoopl.Block
, replaceLastNode
) where
+import GhcPrelude
-- -----------------------------------------------------------------------------
-- Shapes: Open and Closed
diff --git a/compiler/cmm/Hoopl/Collections.hs b/compiler/cmm/Hoopl/Collections.hs
index 679057626b..f8bdfda3d1 100644
--- a/compiler/cmm/Hoopl/Collections.hs
+++ b/compiler/cmm/Hoopl/Collections.hs
@@ -1,11 +1,22 @@
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
module Hoopl.Collections
( IsSet(..)
, setInsertList, setDeleteList, setUnions
, IsMap(..)
, mapInsertList, mapDeleteList, mapUnions
+ , UniqueMap, UniqueSet
) where
+import GhcPrelude
+
+import qualified Data.IntMap.Strict as M
+import qualified Data.IntSet as S
+
import Data.List (foldl', foldl1')
class IsSet set where
@@ -25,7 +36,8 @@ class IsSet set where
setIntersection :: set -> set -> set
setIsSubsetOf :: set -> set -> Bool
- setFold :: (ElemOf set -> b -> b) -> b -> set -> b
+ setFoldl :: (b -> ElemOf set -> b) -> b -> set -> b
+ setFoldr :: (ElemOf set -> b -> b) -> b -> set -> b
setElems :: set -> [ElemOf set]
setFromList :: [ElemOf set] -> set
@@ -56,6 +68,7 @@ class IsMap map where
mapInsert :: KeyOf map -> a -> map a -> map a
mapInsertWith :: (a -> a -> a) -> KeyOf map -> a -> map a -> map a
mapDelete :: KeyOf map -> map a -> map a
+ mapAlter :: (Maybe a -> Maybe a) -> KeyOf map -> map a -> map a
mapUnion :: map a -> map a -> map a
mapUnionWithKey :: (KeyOf map -> a -> a -> a) -> map a -> map a -> map a
@@ -65,8 +78,9 @@ class IsMap map where
mapMap :: (a -> b) -> map a -> map b
mapMapWithKey :: (KeyOf map -> a -> b) -> map a -> map b
- mapFold :: (a -> b -> b) -> b -> map a -> b
- mapFoldWithKey :: (KeyOf map -> a -> b -> b) -> b -> map a -> b
+ mapFoldl :: (b -> a -> b) -> b -> map a -> b
+ mapFoldr :: (a -> b -> b) -> b -> map a -> b
+ mapFoldlWithKey :: (b -> KeyOf map -> a -> b) -> b -> map a -> b
mapFilter :: (a -> Bool) -> map a -> map a
mapElems :: map a -> [a]
@@ -85,3 +99,70 @@ mapDeleteList keys map = foldl' (flip mapDelete) map keys
mapUnions :: IsMap map => [map a] -> map a
mapUnions [] = mapEmpty
mapUnions maps = foldl1' mapUnion maps
+
+-----------------------------------------------------------------------------
+-- Basic instances
+-----------------------------------------------------------------------------
+
+newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show)
+
+instance IsSet UniqueSet where
+ type ElemOf UniqueSet = Int
+
+ setNull (US s) = S.null s
+ setSize (US s) = S.size s
+ setMember k (US s) = S.member k s
+
+ setEmpty = US S.empty
+ setSingleton k = US (S.singleton k)
+ setInsert k (US s) = US (S.insert k s)
+ setDelete k (US s) = US (S.delete k s)
+
+ setUnion (US x) (US y) = US (S.union x y)
+ setDifference (US x) (US y) = US (S.difference x y)
+ setIntersection (US x) (US y) = US (S.intersection x y)
+ setIsSubsetOf (US x) (US y) = S.isSubsetOf x y
+
+ setFoldl k z (US s) = S.foldl' k z s
+ setFoldr k z (US s) = S.foldr k z s
+
+ setElems (US s) = S.elems s
+ setFromList ks = US (S.fromList ks)
+
+newtype UniqueMap v = UM (M.IntMap v)
+ deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
+
+instance IsMap UniqueMap where
+ type KeyOf UniqueMap = Int
+
+ mapNull (UM m) = M.null m
+ mapSize (UM m) = M.size m
+ mapMember k (UM m) = M.member k m
+ mapLookup k (UM m) = M.lookup k m
+ mapFindWithDefault def k (UM m) = M.findWithDefault def k m
+
+ mapEmpty = UM M.empty
+ mapSingleton k v = UM (M.singleton k v)
+ mapInsert k v (UM m) = UM (M.insert k v m)
+ mapInsertWith f k v (UM m) = UM (M.insertWith f k v m)
+ mapDelete k (UM m) = UM (M.delete k m)
+ mapAlter f k (UM m) = UM (M.alter f k m)
+
+ mapUnion (UM x) (UM y) = UM (M.union x y)
+ mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey f x y)
+ mapDifference (UM x) (UM y) = UM (M.difference x y)
+ mapIntersection (UM x) (UM y) = UM (M.intersection x y)
+ mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y
+
+ mapMap f (UM m) = UM (M.map f m)
+ mapMapWithKey f (UM m) = UM (M.mapWithKey f m)
+ mapFoldl k z (UM m) = M.foldl' k z m
+ mapFoldr k z (UM m) = M.foldr k z m
+ mapFoldlWithKey k z (UM m) = M.foldlWithKey' k z m
+ mapFilter f (UM m) = UM (M.filter f m)
+
+ mapElems (UM m) = M.elems m
+ mapKeys (UM m) = M.keys m
+ mapToList (UM m) = M.toList m
+ mapFromList assocs = UM (M.fromList assocs)
+ mapFromListWith f assocs = UM (M.fromListWith f assocs)
diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs
index c2ace502b3..bf12b3f6a1 100644
--- a/compiler/cmm/Hoopl/Dataflow.hs
+++ b/compiler/cmm/Hoopl/Dataflow.hs
@@ -30,14 +30,16 @@ module Hoopl.Dataflow
, rewriteCmmBwd
, changedIf
, joinOutFacts
+ , joinFacts
)
where
+import GhcPrelude
+
import Cmm
import UniqSupply
import Data.Array
-import Data.List
import Data.Maybe
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
@@ -109,8 +111,7 @@ analyzeCmm dir lattice transfer cmmGraph initFact =
blockMap =
case hooplGraph of
GMany NothingO bm NothingO -> bm
- entries = if mapNull initFact then [entry] else mapKeys initFact
- in fixpointAnalysis dir lattice transfer entries blockMap initFact
+ in fixpointAnalysis dir lattice transfer entry blockMap initFact
-- Fixpoint algorithm.
fixpointAnalysis
@@ -118,19 +119,20 @@ fixpointAnalysis
Direction
-> DataflowLattice f
-> TransferFun f
- -> [Label]
+ -> Label
-> LabelMap CmmBlock
-> FactBase f
-> FactBase f
-fixpointAnalysis direction lattice do_block entries blockmap = loop start
+fixpointAnalysis direction lattice do_block entry blockmap = loop start
where
-- Sorting the blocks helps to minimize the number of times we need to
-- process blocks. For instance, for forward analysis we want to look at
-- blocks in reverse postorder. Also, see comments for sortBlocks.
- blocks = sortBlocks direction entries blockmap
+ blocks = sortBlocks direction entry blockmap
num_blocks = length blocks
block_arr = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks
- start = {-# SCC "start" #-} [0 .. num_blocks - 1]
+ start = {-# SCC "start" #-} IntSet.fromDistinctAscList
+ [0 .. num_blocks - 1]
dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
join = fact_join lattice
@@ -138,17 +140,17 @@ fixpointAnalysis direction lattice do_block entries blockmap = loop start
:: IntHeap -- ^ Worklist, i.e., blocks to process
-> FactBase f -- ^ Current result (increases monotonically)
-> FactBase f
- loop [] !fbase1 = fbase1
- loop (index : todo1) !fbase1 =
+ loop todo !fbase1 | Just (index, todo1) <- IntSet.minView todo =
let block = block_arr ! index
out_facts = {-# SCC "do_block" #-} do_block block fbase1
-- For each of the outgoing edges, we join it with the current
-- information in fbase1 and (if something changed) we update it
-- and add the affected blocks to the worklist.
(todo2, fbase2) = {-# SCC "mapFoldWithKey" #-}
- mapFoldWithKey
+ mapFoldlWithKey
(updateFact join dep_blocks) (todo1, fbase1) out_facts
in loop todo2 fbase2
+ loop _ !fbase1 = fbase1
rewriteCmmBwd
:: DataflowLattice f
@@ -171,9 +173,8 @@ rewriteCmm dir lattice rwFun cmmGraph initFact = do
blockMap1 =
case hooplGraph of
GMany NothingO bm NothingO -> bm
- entries = if mapNull initFact then [entry] else mapKeys initFact
(blockMap2, facts) <-
- fixpointRewrite dir lattice rwFun entries blockMap1 initFact
+ fixpointRewrite dir lattice rwFun entry blockMap1 initFact
return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts)
fixpointRewrite
@@ -181,20 +182,21 @@ fixpointRewrite
Direction
-> DataflowLattice f
-> RewriteFun f
- -> [Label]
+ -> Label
-> LabelMap CmmBlock
-> FactBase f
-> UniqSM (LabelMap CmmBlock, FactBase f)
-fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap
+fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap
where
-- Sorting the blocks helps to minimize the number of times we need to
-- process blocks. For instance, for forward analysis we want to look at
-- blocks in reverse postorder. Also, see comments for sortBlocks.
- blocks = sortBlocks dir entries blockmap
+ blocks = sortBlocks dir entry blockmap
num_blocks = length blocks
block_arr = {-# SCC "block_arr_rewrite" #-}
listArray (0, num_blocks - 1) blocks
- start = {-# SCC "start_rewrite" #-} [0 .. num_blocks - 1]
+ start = {-# SCC "start_rewrite" #-}
+ IntSet.fromDistinctAscList [0 .. num_blocks - 1]
dep_blocks = {-# SCC "dep_blocks_rewrite" #-} mkDepBlocks dir blocks
join = fact_join lattice
@@ -203,8 +205,8 @@ fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap
-> LabelMap CmmBlock -- ^ Rewritten blocks.
-> FactBase f -- ^ Current facts.
-> UniqSM (LabelMap CmmBlock, FactBase f)
- loop [] !blocks1 !fbase1 = return (blocks1, fbase1)
- loop (index : todo1) !blocks1 !fbase1 = do
+ loop todo !blocks1 !fbase1
+ | Just (index, todo1) <- IntSet.minView todo = do
-- Note that we use the *original* block here. This is important.
-- We're optimistically rewriting blocks even before reaching the fixed
-- point, which means that the rewrite might be incorrect. So if the
@@ -215,9 +217,10 @@ fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap
do_block block fbase1
let blocks2 = mapInsert (entryLabel new_block) new_block blocks1
(todo2, fbase2) = {-# SCC "mapFoldWithKey_rewrite" #-}
- mapFoldWithKey
+ mapFoldlWithKey
(updateFact join dep_blocks) (todo1, fbase1) out_facts
loop todo2 blocks2 fbase2
+ loop _ !blocks1 !fbase1 = return (blocks1, fbase1)
{-
@@ -263,20 +266,15 @@ we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4.
-- | Sort the blocks into the right order for analysis. This means reverse
-- postorder for a forward analysis. For the backward one, we simply reverse
-- that (see Note [Backward vs forward analysis]).
---
--- Note: We're using Hoopl's confusingly named `postorder_dfs_from` but AFAICS
--- it returns the *reverse* postorder of the blocks (it visits blocks in the
--- postorder and uses (:) to collect them, which gives the reverse of the
--- visitation order).
sortBlocks
:: NonLocal n
- => Direction -> [Label] -> LabelMap (Block n C C) -> [Block n C C]
-sortBlocks direction entries blockmap =
+ => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C]
+sortBlocks direction entry blockmap =
case direction of
Fwd -> fwd
Bwd -> reverse fwd
where
- fwd = postorder_dfs_from blockmap entries
+ fwd = revPostorderFrom blockmap entry
-- Note [Backward vs forward analysis]
--
@@ -328,11 +326,11 @@ mkDepBlocks Bwd blocks = go blocks 0 mapEmpty
updateFact
:: JoinFun f
-> LabelMap IntSet
+ -> (IntHeap, FactBase f)
-> Label
-> f -- out fact
-> (IntHeap, FactBase f)
- -> (IntHeap, FactBase f)
-updateFact fact_join dep_blocks lbl new_fact (todo, fbase)
+updateFact fact_join dep_blocks (todo, fbase) lbl new_fact
= case lookupFact lbl fbase of
Nothing ->
-- Note [No old fact]
@@ -342,7 +340,7 @@ updateFact fact_join dep_blocks lbl new_fact (todo, fbase)
(NotChanged _) -> (todo, fbase)
(Changed f) -> let !z = mapInsert lbl f fbase in (changed, z)
where
- changed = IntSet.foldr insertIntHeap todo $
+ changed = todo `IntSet.union`
mapFindWithDefault IntSet.empty lbl dep_blocks
{-
@@ -376,6 +374,11 @@ joinOutFacts lattice nonLocal fact_base = foldl' join (fact_bot lattice) facts
, isJust fact
]
+joinFacts :: DataflowLattice f -> [f] -> f
+joinFacts lattice facts = foldl' join (fact_bot lattice) facts
+ where
+ join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new)
+
-- | Returns the joined facts for each label.
mkFactBase :: DataflowLattice f -> [(Label, f)] -> FactBase f
mkFactBase lattice = foldl' add mapEmpty
@@ -434,19 +437,4 @@ joinBlocksOO (BMiddle n) b = blockCons n b
joinBlocksOO b (BMiddle n) = blockSnoc b n
joinBlocksOO b1 b2 = BCat b1 b2
--- -----------------------------------------------------------------------------
--- a Heap of Int
-
--- We should really use a proper Heap here, but my attempts to make
--- one have not succeeded in beating the simple ordered list. Another
--- alternative is IntSet (using deleteFindMin), but that was also
--- slower than the ordered list in my experiments --SDM 25/1/2012
-
-type IntHeap = [Int] -- ordered
-
-insertIntHeap :: Int -> [Int] -> [Int]
-insertIntHeap x [] = [x]
-insertIntHeap x (y:ys)
- | x < y = x : y : ys
- | x == y = x : ys
- | otherwise = y : insertIntHeap x ys
+type IntHeap = IntSet
diff --git a/compiler/cmm/Hoopl/Graph.hs b/compiler/cmm/Hoopl/Graph.hs
index 87da072458..0142f70c76 100644
--- a/compiler/cmm/Hoopl/Graph.hs
+++ b/compiler/cmm/Hoopl/Graph.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE RankNTypes #-}
@@ -14,10 +15,13 @@ module Hoopl.Graph
, labelsDefined
, mapGraph
, mapGraphBlocks
- , postorder_dfs_from
+ , revPostorderFrom
) where
+import GhcPrelude
+import Util
+
import Hoopl.Label
import Hoopl.Block
import Hoopl.Collections
@@ -49,13 +53,14 @@ emptyBody = mapEmpty
bodyList :: Body' block n -> [(Label,block n C C)]
bodyList body = mapToList body
-addBlock :: NonLocal thing
- => thing C C -> LabelMap (thing C C)
- -> LabelMap (thing C C)
-addBlock b body
- | mapMember lbl body = error $ "duplicate label " ++ show lbl ++ " in graph"
- | otherwise = mapInsert lbl b body
- where lbl = entryLabel b
+addBlock
+ :: (NonLocal block, HasDebugCallStack)
+ => block C C -> LabelMap (block C C) -> LabelMap (block C C)
+addBlock block body = mapAlter add lbl body
+ where
+ lbl = entryLabel block
+ add Nothing = Just block
+ add _ = error $ "duplicate label " ++ show lbl ++ " in graph"
-- ---------------------------------------------------------------------------
@@ -107,9 +112,9 @@ labelsDefined :: forall block n e x . NonLocal (block n) => Graph' block n e x
-> LabelSet
labelsDefined GNil = setEmpty
labelsDefined (GUnit{}) = setEmpty
-labelsDefined (GMany _ body x) = mapFoldWithKey addEntry (exitLabel x) body
- where addEntry :: forall a. ElemOf LabelSet -> a -> LabelSet -> LabelSet
- addEntry label _ labels = setInsert label labels
+labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body
+ where addEntry :: forall a. LabelSet -> ElemOf LabelSet -> a -> LabelSet
+ addEntry labels label _ = setInsert label labels
exitLabel :: MaybeO x (block n C O) -> LabelSet
exitLabel NothingO = setEmpty
exitLabel (JustO b) = setSingleton (entryLabel b)
@@ -117,22 +122,10 @@ labelsDefined (GMany _ body x) = mapFoldWithKey addEntry (exitLabel x) body
----------------------------------------------------------------
-class LabelsPtr l where
- targetLabels :: l -> [Label]
-
-instance NonLocal n => LabelsPtr (n e C) where
- targetLabels n = successors n
-
-instance LabelsPtr Label where
- targetLabels l = [l]
-
-instance LabelsPtr LabelSet where
- targetLabels = setElems
-
-instance LabelsPtr l => LabelsPtr [l] where
- targetLabels = concatMap targetLabels
-
--- | This is the most important traversal over this data structure. It drops
+-- | Returns a list of blocks reachable from the provided Labels in the reverse
+-- postorder.
+--
+-- This is the most important traversal over this data structure. It drops
-- unreachable code and puts blocks in an order that is good for solving forward
-- dataflow problems quickly. The reverse order is good for solving backward
-- dataflow problems quickly. The forward order is also reasonably good for
@@ -141,59 +134,52 @@ instance LabelsPtr l => LabelsPtr [l] where
-- that you would need a more serious analysis, probably based on dominators, to
-- identify loop headers.
--
--- The ubiquity of 'postorder_dfs' is one reason for the ubiquity of the 'LGraph'
--- representation, when for most purposes the plain 'Graph' representation is
--- more mathematically elegant (but results in more complicated code).
---
--- Here's an easy way to go wrong! Consider
+-- For forward analyses we want reverse postorder visitation, consider:
-- @
-- A -> [B,C]
-- B -> D
-- C -> D
-- @
--- Then ordinary dfs would give [A,B,D,C] which has a back ref from C to D.
--- Better to get [A,B,C,D]
-
-
--- | Traversal: 'postorder_dfs' returns a list of blocks reachable
--- from the entry of enterable graph. The entry and exit are *not* included.
--- The list has the following property:
---
--- Say a "back reference" exists if one of a block's
--- control-flow successors precedes it in the output list
---
--- Then there are as few back references as possible
---
--- The output is suitable for use in
--- a forward dataflow problem. For a backward problem, simply reverse
--- the list. ('postorder_dfs' is sufficiently tricky to implement that
--- one doesn't want to try and maintain both forward and backward
--- versions.)
-
-postorder_dfs_from_except :: forall block e . (NonLocal block, LabelsPtr e)
- => LabelMap (block C C) -> e -> LabelSet -> [block C C]
-postorder_dfs_from_except blocks b visited =
- vchildren (get_children b) (\acc _visited -> acc) [] visited
- where
- vnode :: block C C -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a
- vnode block cont acc visited =
- if setMember id visited then
- cont acc visited
- else
- let cont' acc visited = cont (block:acc) visited in
- vchildren (get_children block) cont' acc (setInsert id visited)
- where id = entryLabel block
- vchildren :: forall a. [block C C] -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a
- vchildren bs cont acc visited = next bs acc visited
- where next children acc visited =
- case children of [] -> cont acc visited
- (b:bs) -> vnode b (next bs) acc visited
- get_children :: forall l. LabelsPtr l => l -> [block C C]
- get_children block = foldr add_id [] $ targetLabels block
- add_id id rst = case lookupFact id blocks of
- Just b -> b : rst
- Nothing -> rst
-
-postorder_dfs_from
- :: (NonLocal block, LabelsPtr b) => LabelMap (block C C) -> b -> [block C C]
-postorder_dfs_from blocks b = postorder_dfs_from_except blocks b setEmpty
+-- Postorder: [D, C, B, A] (or [D, B, C, A])
+-- Reverse postorder: [A, B, C, D] (or [A, C, B, D])
+-- This matters for, e.g., forward analysis, because we want to analyze *both*
+-- B and C before we analyze D.
+revPostorderFrom
+ :: forall block. (NonLocal block)
+ => LabelMap (block C C) -> Label -> [block C C]
+revPostorderFrom graph start = go start_worklist setEmpty []
+ where
+ start_worklist = lookup_for_descend start Nil
+
+ -- To compute the postorder we need to "visit" a block (mark as done)
+ -- *after* visiting all its successors. So we need to know whether we
+ -- already processed all successors of each block (and @NonLocal@ allows
+ -- arbitrary many successors). So we use an explicit stack with an extra bit
+ -- of information:
+ -- * @ConsTodo@ means to explore the block if it wasn't visited before
+ -- * @ConsMark@ means that all successors were already done and we can add
+ -- the block to the result.
+ --
+ -- NOTE: We add blocks to the result list in postorder, but we *prepend*
+ -- them (i.e., we use @(:)@), which means that the final list is in reverse
+ -- postorder.
+ go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C]
+ go Nil !_ !result = result
+ go (ConsMark block rest) !wip_or_done !result =
+ go rest wip_or_done (block : result)
+ go (ConsTodo block rest) !wip_or_done !result
+ | entryLabel block `setMember` wip_or_done = go rest wip_or_done result
+ | otherwise =
+ let new_worklist =
+ foldr lookup_for_descend
+ (ConsMark block rest)
+ (successors block)
+ in go new_worklist (setInsert (entryLabel block) wip_or_done) result
+
+ lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C)
+ lookup_for_descend label wl
+ | Just b <- mapLookup label graph = ConsTodo b wl
+ | otherwise =
+ error $ "Label that doesn't have a block?! " ++ show label
+
+data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil
diff --git a/compiler/cmm/Hoopl/Label.hs b/compiler/cmm/Hoopl/Label.hs
index 5ee4f72fc3..7fddbf4c3f 100644
--- a/compiler/cmm/Hoopl/Label.hs
+++ b/compiler/cmm/Hoopl/Label.hs
@@ -2,32 +2,37 @@
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
module Hoopl.Label
( Label
, LabelMap
, LabelSet
, FactBase
, lookupFact
- , uniqueToLbl
+ , mkHooplLabel
) where
+import GhcPrelude
+
import Outputable
-import Hoopl.Collections
-- TODO: This should really just use GHC's Unique and Uniq{Set,FM}
-import Hoopl.Unique
+import Hoopl.Collections
import Unique (Uniquable(..))
+import TrieMap
+
-----------------------------------------------------------------------------
-- Label
-----------------------------------------------------------------------------
-newtype Label = Label { lblToUnique :: Unique }
+newtype Label = Label { lblToUnique :: Int }
deriving (Eq, Ord)
-uniqueToLbl :: Unique -> Label
-uniqueToLbl = Label
+mkHooplLabel :: Int -> Label
+mkHooplLabel = Label
instance Show Label where
show (Label n) = "L" ++ show n
@@ -60,9 +65,10 @@ instance IsSet LabelSet where
setIntersection (LS x) (LS y) = LS (setIntersection x y)
setIsSubsetOf (LS x) (LS y) = setIsSubsetOf x y
- setFold k z (LS s) = setFold (k . uniqueToLbl) z s
+ setFoldl k z (LS s) = setFoldl (\a v -> k a (mkHooplLabel v)) z s
+ setFoldr k z (LS s) = setFoldr (\v a -> k (mkHooplLabel v) a) z s
- setElems (LS s) = map uniqueToLbl (setElems s)
+ setElems (LS s) = map mkHooplLabel (setElems s)
setFromList ks = LS (setFromList (map lblToUnique ks))
-----------------------------------------------------------------------------
@@ -85,22 +91,25 @@ instance IsMap LabelMap where
mapInsert (Label k) v (LM m) = LM (mapInsert k v m)
mapInsertWith f (Label k) v (LM m) = LM (mapInsertWith f k v m)
mapDelete (Label k) (LM m) = LM (mapDelete k m)
+ mapAlter f (Label k) (LM m) = LM (mapAlter f k m)
mapUnion (LM x) (LM y) = LM (mapUnion x y)
- mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . uniqueToLbl) x y)
+ mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . mkHooplLabel) x y)
mapDifference (LM x) (LM y) = LM (mapDifference x y)
mapIntersection (LM x) (LM y) = LM (mapIntersection x y)
mapIsSubmapOf (LM x) (LM y) = mapIsSubmapOf x y
mapMap f (LM m) = LM (mapMap f m)
- mapMapWithKey f (LM m) = LM (mapMapWithKey (f . uniqueToLbl) m)
- mapFold k z (LM m) = mapFold k z m
- mapFoldWithKey k z (LM m) = mapFoldWithKey (k . uniqueToLbl) z m
+ mapMapWithKey f (LM m) = LM (mapMapWithKey (f . mkHooplLabel) m)
+ mapFoldl k z (LM m) = mapFoldl k z m
+ mapFoldr k z (LM m) = mapFoldr k z m
+ mapFoldlWithKey k z (LM m) =
+ mapFoldlWithKey (\a v -> k a (mkHooplLabel v)) z m
mapFilter f (LM m) = LM (mapFilter f m)
mapElems (LM m) = mapElems m
- mapKeys (LM m) = map uniqueToLbl (mapKeys m)
- mapToList (LM m) = [(uniqueToLbl k, v) | (k, v) <- mapToList m]
+ mapKeys (LM m) = map mkHooplLabel (mapKeys m)
+ mapToList (LM m) = [(mkHooplLabel k, v) | (k, v) <- mapToList m]
mapFromList assocs = LM (mapFromList [(lblToUnique k, v) | (k, v) <- assocs])
mapFromListWith f assocs = LM (mapFromListWith f [(lblToUnique k, v) | (k, v) <- assocs])
@@ -113,6 +122,14 @@ instance Outputable LabelSet where
instance Outputable a => Outputable (LabelMap a) where
ppr = ppr . mapToList
+instance TrieMap LabelMap where
+ type Key LabelMap = Label
+ emptyTM = mapEmpty
+ lookupTM k m = mapLookup k m
+ alterTM k f m = mapAlter f k m
+ foldTM k m z = mapFoldr k z m
+ mapTM f m = mapMap f m
+
-----------------------------------------------------------------------------
-- FactBase
diff --git a/compiler/cmm/Hoopl/Unique.hs b/compiler/cmm/Hoopl/Unique.hs
deleted file mode 100644
index f27961bb28..0000000000
--- a/compiler/cmm/Hoopl/Unique.hs
+++ /dev/null
@@ -1,91 +0,0 @@
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE DeriveTraversable #-}
-{-# LANGUAGE DeriveFoldable #-}
-{-# LANGUAGE TypeFamilies #-}
-module Hoopl.Unique
- ( Unique
- , UniqueMap
- , UniqueSet
- , intToUnique
- ) where
-
-import qualified Data.IntMap as M
-import qualified Data.IntSet as S
-
-import Hoopl.Collections
-
-
------------------------------------------------------------------------------
--- Unique
------------------------------------------------------------------------------
-
-type Unique = Int
-
-intToUnique :: Int -> Unique
-intToUnique = id
-
------------------------------------------------------------------------------
--- UniqueSet
-
-newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show)
-
-instance IsSet UniqueSet where
- type ElemOf UniqueSet = Unique
-
- setNull (US s) = S.null s
- setSize (US s) = S.size s
- setMember k (US s) = S.member k s
-
- setEmpty = US S.empty
- setSingleton k = US (S.singleton k)
- setInsert k (US s) = US (S.insert k s)
- setDelete k (US s) = US (S.delete k s)
-
- setUnion (US x) (US y) = US (S.union x y)
- setDifference (US x) (US y) = US (S.difference x y)
- setIntersection (US x) (US y) = US (S.intersection x y)
- setIsSubsetOf (US x) (US y) = S.isSubsetOf x y
-
- setFold k z (US s) = S.foldr k z s
-
- setElems (US s) = S.elems s
- setFromList ks = US (S.fromList ks)
-
------------------------------------------------------------------------------
--- UniqueMap
-
-newtype UniqueMap v = UM (M.IntMap v)
- deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
-
-instance IsMap UniqueMap where
- type KeyOf UniqueMap = Unique
-
- mapNull (UM m) = M.null m
- mapSize (UM m) = M.size m
- mapMember k (UM m) = M.member k m
- mapLookup k (UM m) = M.lookup k m
- mapFindWithDefault def k (UM m) = M.findWithDefault def k m
-
- mapEmpty = UM M.empty
- mapSingleton k v = UM (M.singleton k v)
- mapInsert k v (UM m) = UM (M.insert k v m)
- mapInsertWith f k v (UM m) = UM (M.insertWith f k v m)
- mapDelete k (UM m) = UM (M.delete k m)
-
- mapUnion (UM x) (UM y) = UM (M.union x y)
- mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey (f . intToUnique) x y)
- mapDifference (UM x) (UM y) = UM (M.difference x y)
- mapIntersection (UM x) (UM y) = UM (M.intersection x y)
- mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y
-
- mapMap f (UM m) = UM (M.map f m)
- mapMapWithKey f (UM m) = UM (M.mapWithKey (f . intToUnique) m)
- mapFold k z (UM m) = M.foldr k z m
- mapFoldWithKey k z (UM m) = M.foldrWithKey (k . intToUnique) z m
- mapFilter f (UM m) = UM (M.filter f m)
-
- mapElems (UM m) = M.elems m
- mapKeys (UM m) = M.keys m
- mapToList (UM m) = M.toList m
- mapFromList assocs = UM (M.fromList assocs)
- mapFromListWith f assocs = UM (M.fromListWith f assocs)
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 62dfd34da3..70229d067d 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE BangPatterns, CPP, GADTs #-}
+{-# LANGUAGE BangPatterns, GADTs #-}
module MkGraph
( CmmAGraph, CmmAGraphScoped, CgStmt(..)
@@ -21,6 +21,8 @@ module MkGraph
)
where
+import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>)
+
import BlockId
import Cmm
import CmmCallConv
@@ -35,13 +37,7 @@ import ForeignCall
import OrdList
import SMRep (ByteOff)
import UniqSupply
-
-import Control.Monad
-import Data.List
-import Data.Maybe
-import Prelude (($),Int,Bool,Eq(..)) -- avoid importing (<*>)
-
-#include "HsVersions.h"
+import Util
-----------------------------------------------------------------------------
@@ -185,12 +181,10 @@ mkNop :: CmmAGraph
mkNop = nilOL
mkComment :: FastString -> CmmAGraph
-#if defined(DEBUG)
--- SDM: generating all those comments takes time, this saved about 4% for me
-mkComment fs = mkMiddle $ CmmComment fs
-#else
-mkComment _ = nilOL
-#endif
+mkComment fs
+ -- SDM: generating all those comments takes time, this saved about 4% for me
+ | debugIsOn = mkMiddle $ CmmComment fs
+ | otherwise = nilOL
---------- Assignment and store
mkAssign :: CmmReg -> CmmExpr -> CmmAGraph
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 7d36c120b0..a979d49501 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -26,6 +26,8 @@ module PprC (
#include "HsVersions.h"
-- Cmm stuff
+import GhcPrelude
+
import BlockId
import CLabel
import ForeignCall
@@ -377,14 +379,10 @@ pprExpr e = case e of
CmmReg reg -> pprCastReg reg
CmmRegOff reg 0 -> pprCastReg reg
- CmmRegOff reg i
- | i < 0 && negate_ok -> pprRegOff (char '-') (-i)
- | otherwise -> pprRegOff (char '+') i
- where
- pprRegOff op i' = pprCastReg reg <> op <> int i'
- negate_ok = negate (fromIntegral i :: Integer) <
- fromIntegral (maxBound::Int)
- -- overflow is undefined; see #7620
+ -- CmmRegOff is an alias of MO_Add
+ CmmRegOff reg i -> sdocWithDynFlags $ \dflags ->
+ pprCastReg reg <> char '+' <>
+ pprHexVal (fromIntegral i) (wordWidth dflags)
CmmMachOp mop args -> pprMachOpApp mop args
@@ -493,7 +491,7 @@ pprLit lit = case lit of
CmmHighStackMark -> panic "PprC printing high stack mark"
CmmLabel clbl -> mkW_ <> pprCLabelAddr clbl
CmmLabelOff clbl i -> mkW_ <> pprCLabelAddr clbl <> char '+' <> int i
- CmmLabelDiffOff clbl1 _ i
+ CmmLabelDiffOff clbl1 _ i _ -- non-word widths not supported via C
-- WARNING:
-- * the lit must occur in the info table clbl2
-- * clbl1 must be an SRT, a slow entry point or a large bitmap
@@ -504,7 +502,7 @@ pprLit lit = case lit of
pprLit1 :: CmmLit -> SDoc
pprLit1 lit@(CmmLabelOff _ _) = parens (pprLit lit)
-pprLit1 lit@(CmmLabelDiffOff _ _ _) = parens (pprLit lit)
+pprLit1 lit@(CmmLabelDiffOff _ _ _ _) = parens (pprLit lit)
pprLit1 lit@(CmmFloat _ _) = parens (pprLit lit)
pprLit1 other = pprLit other
@@ -536,13 +534,29 @@ pprStatics dflags (CmmStaticLit (CmmInt i W64) : rest)
CmmStaticLit (CmmInt q W32) : rest)
where r = i .&. 0xffffffff
q = i `shiftR` 32
+pprStatics dflags (CmmStaticLit (CmmInt a W32) :
+ CmmStaticLit (CmmInt b W32) : rest)
+ | wordWidth dflags == W64
+ = if wORDS_BIGENDIAN dflags
+ then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 32) .|. b) W64) :
+ rest)
+ else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 32) .|. a) W64) :
+ rest)
+pprStatics dflags (CmmStaticLit (CmmInt a W16) :
+ CmmStaticLit (CmmInt b W16) : rest)
+ | wordWidth dflags == W32
+ = if wORDS_BIGENDIAN dflags
+ then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 16) .|. b) W32) :
+ rest)
+ else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 16) .|. a) W32) :
+ rest)
pprStatics dflags (CmmStaticLit (CmmInt _ w) : _)
| w /= wordWidth dflags
- = panic "pprStatics: cannot emit a non-word-sized static literal"
+ = pprPanic "pprStatics: cannot emit a non-word-sized static literal" (ppr w)
pprStatics dflags (CmmStaticLit lit : rest)
= pprLit1 lit : pprStatics dflags rest
pprStatics _ (other : _)
- = pprPanic "pprWord" (pprStatic other)
+ = pprPanic "pprStatics: other" (pprStatic other)
pprStatic :: CmmStatic -> SDoc
pprStatic s = case s of
@@ -721,6 +735,8 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VF_Quot"
++ " should have been handled earlier!")
+ MO_AlignmentCheck {} -> panic "-falignment-santisation not supported by unregisterised backend"
+
signedOp :: MachOp -> Bool -- Argument type(s) are signed ints
signedOp (MO_S_Quot _) = True
signedOp (MO_S_Rem _) = True
@@ -759,6 +775,9 @@ pprCallishMachOp_for_C mop
MO_F64_Tanh -> text "tanh"
MO_F64_Asin -> text "asin"
MO_F64_Acos -> text "acos"
+ MO_F64_Atanh -> text "atanh"
+ MO_F64_Asinh -> text "asinh"
+ MO_F64_Acosh -> text "acosh"
MO_F64_Atan -> text "atan"
MO_F64_Log -> text "log"
MO_F64_Exp -> text "exp"
@@ -774,6 +793,9 @@ pprCallishMachOp_for_C mop
MO_F32_Asin -> text "asinf"
MO_F32_Acos -> text "acosf"
MO_F32_Atan -> text "atanf"
+ MO_F32_Asinh -> text "asinhf"
+ MO_F32_Acosh -> text "acoshf"
+ MO_F32_Atanh -> text "atanhf"
MO_F32_Log -> text "logf"
MO_F32_Exp -> text "expf"
MO_F32_Sqrt -> text "sqrtf"
@@ -782,8 +804,11 @@ pprCallishMachOp_for_C mop
MO_Memcpy _ -> text "memcpy"
MO_Memset _ -> text "memset"
MO_Memmove _ -> text "memmove"
+ MO_Memcmp _ -> text "memcmp"
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
+ (MO_Pext w) -> ptext (sLit $ pextLabel w)
+ (MO_Pdep w) -> ptext (sLit $ pdepLabel w)
(MO_Clz w) -> ptext (sLit $ clzLabel w)
(MO_Ctz w) -> ptext (sLit $ ctzLabel w)
(MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop)
@@ -796,6 +821,7 @@ pprCallishMachOp_for_C mop
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
MO_Add2 {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
@@ -1075,7 +1101,7 @@ te_BB block = mapM_ te_Stmt (blockToList mid) >> te_Stmt last
te_Lit :: CmmLit -> TE ()
te_Lit (CmmLabel l) = te_lbl l
te_Lit (CmmLabelOff l _) = te_lbl l
-te_Lit (CmmLabelDiffOff l1 _ _) = te_lbl l1
+te_Lit (CmmLabelDiffOff l1 _ _ _) = te_lbl l1
te_Lit _ = return ()
te_Stmt :: CmmNode e x -> TE ()
diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs
index dbd4619416..90f26e4247 100644
--- a/compiler/cmm/PprCmm.hs
+++ b/compiler/cmm/PprCmm.hs
@@ -15,8 +15,8 @@
--
-- As such, this should be a well-defined syntax: we want it to look nice.
-- Thus, we try wherever possible to use syntax defined in [1],
--- "The C-- Reference Manual", http://www.cminusminus.org/. We differ
--- slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
-- than C--'s bits8 .. bits64.
--
-- We try to ensure that all information available in the abstract
@@ -39,6 +39,8 @@ module PprCmm
)
where
+import GhcPrelude hiding (succ)
+
import BlockId ()
import CLabel
import Cmm
@@ -55,8 +57,6 @@ import PprCore ()
import BasicTypes
import Hoopl.Block
import Hoopl.Graph
-import Data.List
-import Prelude hiding (succ)
-------------------------------------------------
-- Outputable instances
@@ -108,7 +108,7 @@ pprStackInfo (StackInfo {arg_space=arg_space, updfr_space=updfr_space}) =
pprTopInfo :: CmmTopInfo -> SDoc
pprTopInfo (TopInfo {info_tbls=info_tbl, stack_info=stack_info}) =
- vcat [text "info_tbl: " <> ppr info_tbl,
+ vcat [text "info_tbls: " <> ppr info_tbl,
text "stack_info: " <> ppr stack_info]
----------------------------------------------------------
@@ -141,8 +141,8 @@ pprCmmGraph g
= text "{" <> text "offset"
$$ nest 2 (vcat $ map ppr blocks)
$$ text "}"
- where blocks = postorderDfs g
- -- postorderDfs has the side-effect of discarding unreachable code,
+ where blocks = revPostorder g
+ -- revPostorder has the side-effect of discarding unreachable code,
-- so pretty-printed Cmm will omit any unreachable blocks. This can
-- sometimes be confusing.
@@ -185,9 +185,13 @@ pprNode node = pp_node <+> pp_debug
pp_node :: SDoc
pp_node = sdocWithDynFlags $ \dflags -> case node of
-- label:
- CmmEntry id tscope -> ppr id <> colon <+>
+ CmmEntry id tscope -> lbl <> colon <+>
(sdocWithDynFlags $ \dflags ->
ppUnless (gopt Opt_SuppressTicks dflags) (text "//" <+> ppr tscope))
+ where
+ lbl = if gopt Opt_SuppressUniques dflags
+ then text "_lbl_"
+ else ppr id
-- // text
CmmComment s -> text "//" <+> ftext s
@@ -252,8 +256,8 @@ pprNode node = pp_node <+> pp_debug
, ppr l <> semi
]
def | Just l <- mbdef = hsep
- [ text "default: goto"
- , ppr l <> semi
+ [ text "default:"
+ , braces (text "goto" <+> ppr l <> semi)
]
| otherwise = empty
diff --git a/compiler/cmm/PprCmmDecl.hs b/compiler/cmm/PprCmmDecl.hs
index ce8fb0dc5d..c4ee6fd068 100644
--- a/compiler/cmm/PprCmmDecl.hs
+++ b/compiler/cmm/PprCmmDecl.hs
@@ -1,5 +1,3 @@
-{-# LANGUAGE CPP #-}
-
----------------------------------------------------------------------------
--
-- Pretty-printing of common Cmm types
@@ -15,8 +13,8 @@
--
-- As such, this should be a well-defined syntax: we want it to look nice.
-- Thus, we try wherever possible to use syntax defined in [1],
--- "The C-- Reference Manual", http://www.cminusminus.org/. We differ
--- slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
-- than C--'s bits8 .. bits64.
--
-- We try to ensure that all information available in the abstract
@@ -40,6 +38,8 @@ module PprCmmDecl
)
where
+import GhcPrelude
+
import PprCmmExpr
import Cmm
@@ -52,7 +52,6 @@ import System.IO
-- Temp Jan08
import SMRep
-#include "../includes/rts/storage/FunTypes.h"
pprCmms :: (Outputable info, Outputable g)
@@ -116,18 +115,15 @@ pprTop (CmmData section ds) =
pprInfoTable :: CmmInfoTable -> SDoc
pprInfoTable (CmmInfoTable { cit_lbl = lbl, cit_rep = rep
, cit_prof = prof_info
- , cit_srt = _srt })
- = vcat [ text "label:" <+> ppr lbl
- , text "rep:" <> ppr rep
+ , cit_srt = srt })
+ = vcat [ text "label: " <> ppr lbl
+ , text "rep: " <> ppr rep
, case prof_info of
NoProfilingInfo -> empty
- ProfilingInfo ct cd -> vcat [ text "type:" <+> pprWord8String ct
- , text "desc: " <> pprWord8String cd ] ]
-
-instance Outputable C_SRT where
- ppr NoC_SRT = text "_no_srt_"
- ppr (C_SRT label off bitmap)
- = parens (ppr label <> comma <> ppr off <> comma <> ppr bitmap)
+ ProfilingInfo ct cd ->
+ vcat [ text "type: " <> pprWord8String ct
+ , text "desc: " <> pprWord8String cd ]
+ , text "srt: " <> ppr srt ]
instance Outputable ForeignHint where
ppr NoHint = empty
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 77c92407bc..7bf73f1ca6 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -13,8 +13,8 @@
--
-- As such, this should be a well-defined syntax: we want it to look nice.
-- Thus, we try wherever possible to use syntax defined in [1],
--- "The C-- Reference Manual", http://www.cminusminus.org/. We differ
--- slightly, in some cases. For one, we use I8 .. I64 for types, rather
+-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We
+-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather
-- than C--'s bits8 .. bits64.
--
-- We try to ensure that all information available in the abstract
@@ -38,9 +38,12 @@ module PprCmmExpr
)
where
+import GhcPrelude
+
import CmmExpr
import Outputable
+import DynFlags
import Data.Maybe
import Numeric ( fromRat )
@@ -196,7 +199,7 @@ pprLit lit = sdocWithDynFlags $ \dflags ->
CmmVec lits -> char '<' <> commafy (map pprLit lits) <> char '>'
CmmLabel clbl -> ppr clbl
CmmLabelOff clbl i -> ppr clbl <> ppr_offset i
- CmmLabelDiffOff clbl1 clbl2 i -> ppr clbl1 <> char '-'
+ CmmLabelDiffOff clbl1 clbl2 i _ -> ppr clbl1 <> char '-'
<> ppr clbl2 <> ppr_offset i
CmmBlock id -> ppr id
CmmHighStackMark -> text "<highSp>"
@@ -224,14 +227,18 @@ pprReg r
-- We only print the type of the local reg if it isn't wordRep
--
pprLocalReg :: LocalReg -> SDoc
-pprLocalReg (LocalReg uniq rep)
+pprLocalReg (LocalReg uniq rep) = sdocWithDynFlags $ \dflags ->
-- = ppr rep <> char '_' <> ppr uniq
-- Temp Jan08
- = char '_' <> ppr uniq <>
+ char '_' <> pprUnique dflags uniq <>
(if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08 -- sigh
then dcolon <> ptr <> ppr rep
else dcolon <> ptr <> ppr rep)
where
+ pprUnique dflags unique =
+ if gopt Opt_SuppressUniques dflags
+ then text "_locVar_"
+ else ppr unique
ptr = empty
--if isGcPtrType rep
-- then doubleQuotes (text "ptr")
diff --git a/compiler/cmm/SMRep.hs b/compiler/cmm/SMRep.hs
index d40af4ff1c..743631527e 100644
--- a/compiler/cmm/SMRep.hs
+++ b/compiler/cmm/SMRep.hs
@@ -9,7 +9,7 @@ module SMRep (
-- * Words and bytes
WordOff, ByteOff,
wordsToBytes, bytesToWordsRoundUp,
- roundUpToWords,
+ roundUpToWords, roundUpTo,
StgWord, fromStgWord, toStgWord,
StgHalfWord, fromStgHalfWord, toStgHalfWord,
@@ -47,8 +47,7 @@ module SMRep (
pprWord8String, stringToWord8s
) where
-#include "../HsVersions.h"
-#include "../includes/MachDeps.h"
+import GhcPrelude
import BasicTypes( ConTagZ )
import DynFlags
@@ -77,8 +76,11 @@ type ByteOff = Int
-- | Round up the given byte count to the next byte count that's a
-- multiple of the machine's word size.
roundUpToWords :: DynFlags -> ByteOff -> ByteOff
-roundUpToWords dflags n =
- (n + (wORD_SIZE dflags - 1)) .&. (complement (wORD_SIZE dflags - 1))
+roundUpToWords dflags n = roundUpTo n (wORD_SIZE dflags)
+
+-- | Round up @base@ to a multiple of @size@.
+roundUpTo :: ByteOff -> ByteOff -> ByteOff
+roundUpTo base size = (base + (size - 1)) .&. (complement (size - 1))
-- | Convert the given number of words to a number of bytes.
--
@@ -277,10 +279,10 @@ isConRep (HeapRep _ _ _ Constr{}) = True
isConRep _ = False
isThunkRep :: SMRep -> Bool
-isThunkRep (HeapRep _ _ _ Thunk{}) = True
+isThunkRep (HeapRep _ _ _ Thunk) = True
isThunkRep (HeapRep _ _ _ ThunkSelector{}) = True
-isThunkRep (HeapRep _ _ _ BlackHole{}) = True
-isThunkRep (HeapRep _ _ _ IndStatic{}) = True
+isThunkRep (HeapRep _ _ _ BlackHole) = True
+isThunkRep (HeapRep _ _ _ IndStatic) = True
isThunkRep _ = False
isFunRep :: SMRep -> Bool
@@ -384,10 +386,10 @@ heapClosureSizeW _ _ = panic "SMRep.heapClosureSize"
closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff
closureTypeHdrSize dflags ty = case ty of
- Thunk{} -> thunkHdrSize dflags
+ Thunk -> thunkHdrSize dflags
ThunkSelector{} -> thunkHdrSize dflags
- BlackHole{} -> thunkHdrSize dflags
- IndStatic{} -> thunkHdrSize dflags
+ BlackHole -> thunkHdrSize dflags
+ IndStatic -> thunkHdrSize dflags
_ -> fixedHdrSizeW dflags
-- All thunks use thunkHdrSize, even if they are non-updatable.
-- this is because we don't have separate closure types for
@@ -446,21 +448,19 @@ rtsClosureType rep
HeapRep False 0 2 Fun{} -> FUN_0_2
HeapRep False _ _ Fun{} -> FUN
- HeapRep False 1 0 Thunk{} -> THUNK_1_0
- HeapRep False 0 1 Thunk{} -> THUNK_0_1
- HeapRep False 2 0 Thunk{} -> THUNK_2_0
- HeapRep False 1 1 Thunk{} -> THUNK_1_1
- HeapRep False 0 2 Thunk{} -> THUNK_0_2
- HeapRep False _ _ Thunk{} -> THUNK
+ HeapRep False 1 0 Thunk -> THUNK_1_0
+ HeapRep False 0 1 Thunk -> THUNK_0_1
+ HeapRep False 2 0 Thunk -> THUNK_2_0
+ HeapRep False 1 1 Thunk -> THUNK_1_1
+ HeapRep False 0 2 Thunk -> THUNK_0_2
+ HeapRep False _ _ Thunk -> THUNK
HeapRep False _ _ ThunkSelector{} -> THUNK_SELECTOR
- HeapRep True _ _ Fun{} -> FUN_STATIC
- HeapRep True _ _ Thunk{} -> THUNK_STATIC
-
- HeapRep False _ _ BlackHole{} -> BLACKHOLE
-
- HeapRep False _ _ IndStatic{} -> IND_STATIC
+ HeapRep True _ _ Fun{} -> FUN_STATIC
+ HeapRep True _ _ Thunk -> THUNK_STATIC
+ HeapRep False _ _ BlackHole -> BLACKHOLE
+ HeapRep False _ _ IndStatic -> IND_STATIC
_ -> panic "rtsClosureType"