summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2020-04-29 12:24:23 -0400
committerBen Gamari <ben@smart-cactus.org>2021-10-07 12:33:46 -0400
commit56fcd8a33536ed9ad0d5f907ba264bc3918d5bfc (patch)
tree6d61638b8a5df4e30ba1a3cc2698ed3b6b929e60
parentd36ac3ec6d96d0e5b5ed5a8f4b2c8c9f7a762143 (diff)
downloadhaskell-wip/T17609.tar.gz
nativeGen: Deduplicate DWARF stringswip/T17609
As noted in #17609, we previously made no attempt to deduplicate strings. This resulted in unnecessarily long compile times and large object files. Fix this. Fixes #17609.
-rw-r--r--compiler/GHC/CmmToAsm/Dwarf.hs23
-rw-r--r--compiler/GHC/CmmToAsm/Dwarf/Constants.hs12
-rw-r--r--compiler/GHC/CmmToAsm/Dwarf/Types.hs111
-rw-r--r--compiler/GHC/Types/Unique.hs1
4 files changed, 108 insertions, 39 deletions
diff --git a/compiler/GHC/CmmToAsm/Dwarf.hs b/compiler/GHC/CmmToAsm/Dwarf.hs
index fcff4be74e..a1907e8089 100644
--- a/compiler/GHC/CmmToAsm/Dwarf.hs
+++ b/compiler/GHC/CmmToAsm/Dwarf.hs
@@ -12,6 +12,7 @@ import GHC.Cmm.DebugBlock
import GHC.Unit.Module
import GHC.Utils.Outputable
import GHC.Platform
+import GHC.Types.SrcLoc
import GHC.Types.Unique
import GHC.Types.Unique.Supply
@@ -46,11 +47,12 @@ dwarfGen config modLoc us blocks = do
compPath <- getCurrentDirectory
let lowLabel = dblCLabel $ head procs
highLabel = mkAsmTempProcEndLabel $ dblCLabel $ last procs
+ producer = dwarfStringFromString $ cProjectName ++ " " ++ cProjectVersion
dwarfUnit = DwarfCompileUnit
{ dwChildren = map (procToDwarf config) (map stripBlocks procs)
- , dwName = fromMaybe "" (ml_hs_file modLoc)
- , dwCompDir = addTrailingPathSeparator compPath
- , dwProducer = cProjectName ++ " " ++ cProjectVersion
+ , dwName = dwarfStringFromString $ fromMaybe "" (ml_hs_file modLoc)
+ , dwCompDir = dwarfStringFromString $ addTrailingPathSeparator compPath
+ , dwProducer = producer
, dwLowLabel = pdoc platform lowLabel
, dwHighLabel = pdoc platform highLabel
, dwLineLabel = dwarfLineLabel
@@ -76,6 +78,9 @@ dwarfGen config modLoc us blocks = do
, compileUnitFooter platform unitU
]
+ -- .debug_str section: Strings
+ let stringsSct = dwarfStringsSection platform (dwarfInfoStrings dwarfUnit)
+
-- .debug_line section: Generated mainly by the assembler, but we
-- need to label it
let lineSct = dwarfLineSection platform $$
@@ -92,7 +97,7 @@ dwarfGen config modLoc us blocks = do
| otherwise = [DwarfARange lowLabel highLabel]
let aranges = dwarfARangesSection platform $$ pprDwarfARanges platform aranges' unitU
- return (infoSct $$ abbrevSct $$ lineSct $$ frameSct $$ aranges, us'')
+ return (infoSct $$ stringsSct $$ abbrevSct $$ lineSct $$ frameSct $$ aranges, us'')
-- | Build an address range entry for one proc.
-- With split sections, each proc needs its own entry, since they may get
@@ -177,7 +182,7 @@ parent, B.
procToDwarf :: NCGConfig -> DebugBlock -> DwarfInfo
procToDwarf config prc
= DwarfSubprogram { dwChildren = map (blockToDwarf config) (dblBlocks prc)
- , dwName = case dblSourceTick prc of
+ , dwName = dwarfStringFromString $ case dblSourceTick prc of
Just s@SourceNote{} -> sourceName s
_otherwise -> show (dblLabel prc)
, dwLabel = dblCLabel prc
@@ -211,7 +216,13 @@ blockToDwarf config blk
| otherwise = Nothing -- block was optimized out
tickToDwarf :: CmmTickish -> [DwarfInfo]
-tickToDwarf (SourceNote ss _) = [DwarfSrcNote ss]
+tickToDwarf (SourceNote ss _) =
+ [DwarfSrcNote { dwSpanFile = dwarfStringFromFastString (srcSpanFile ss)
+ , dwSpanStartLine = srcSpanStartLine ss
+ , dwSpanStartCol = srcSpanStartCol ss
+ , dwSpanEndLine = srcSpanEndLine ss
+ , dwSpanEndCol = srcSpanEndCol ss
+ }]
tickToDwarf _ = []
-- | Generates the data for the debug frame section, which encodes the
diff --git a/compiler/GHC/CmmToAsm/Dwarf/Constants.hs b/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
index b8fb5706cb..d0cc770893 100644
--- a/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
+++ b/compiler/GHC/CmmToAsm/Dwarf/Constants.hs
@@ -85,12 +85,14 @@ dW_CHILDREN_no, dW_CHILDREN_yes :: Word8
dW_CHILDREN_no = 0
dW_CHILDREN_yes = 1
-dW_FORM_addr, dW_FORM_data2, dW_FORM_data4, dW_FORM_string, dW_FORM_flag,
+dW_FORM_addr, dW_FORM_data2, dW_FORM_data4,
+ dW_FORM_strp,dW_FORM_string, dW_FORM_flag,
dW_FORM_block1, dW_FORM_ref4, dW_FORM_ref_addr, dW_FORM_flag_present :: Word
dW_FORM_addr = 0x01
dW_FORM_data2 = 0x05
dW_FORM_data4 = 0x06
dW_FORM_string = 0x08
+dW_FORM_strp = 0x0e
dW_FORM_flag = 0x0c
dW_FORM_block1 = 0x0a
dW_FORM_ref_addr = 0x10
@@ -144,11 +146,13 @@ dW_OP_call_frame_cfa = 0x9c
-- * Dwarf section declarations
dwarfInfoSection, dwarfAbbrevSection, dwarfLineSection,
- dwarfFrameSection, dwarfGhcSection, dwarfARangesSection :: Platform -> SDoc
+ dwarfFrameSection, dwarfStringSection,
+ dwarfGhcSection, dwarfARangesSection :: Platform -> SDoc
dwarfInfoSection platform = dwarfSection platform "info"
dwarfAbbrevSection platform = dwarfSection platform "abbrev"
dwarfLineSection platform = dwarfSection platform "line"
dwarfFrameSection platform = dwarfSection platform "frame"
+dwarfStringSection platform = dwarfSection platform "str"
dwarfGhcSection platform = dwarfSection platform "ghc"
dwarfARangesSection platform = dwarfSection platform "aranges"
@@ -164,11 +168,13 @@ dwarfSection platform name =
-> text "\t.section .debug_" <> text name <> text ",\"dr\""
-- * Dwarf section labels
-dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel, dwarfFrameLabel :: SDoc
+dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel, dwarfFrameLabel,
+ dwarfStringLabel :: SDoc
dwarfInfoLabel = text ".Lsection_info"
dwarfAbbrevLabel = text ".Lsection_abbrev"
dwarfLineLabel = text ".Lsection_line"
dwarfFrameLabel = text ".Lsection_frame"
+dwarfStringLabel = text ".Lsection_str"
-- | Mapping of registers to DWARF register numbers
dwarfRegNo :: Platform -> Reg -> Word8
diff --git a/compiler/GHC/CmmToAsm/Dwarf/Types.hs b/compiler/GHC/CmmToAsm/Dwarf/Types.hs
index f8f0ae5c44..d90aa1030b 100644
--- a/compiler/GHC/CmmToAsm/Dwarf/Types.hs
+++ b/compiler/GHC/CmmToAsm/Dwarf/Types.hs
@@ -2,12 +2,19 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE RecordWildCards #-}
module GHC.CmmToAsm.Dwarf.Types
( -- * Dwarf information
DwarfInfo(..)
, pprDwarfInfo
, pprAbbrevDecls
+ , dwarfInfoStrings
+ -- * Dwarf Strings section
+ , DwarfString
+ , dwarfStringsSection
+ , dwarfStringFromString
+ , dwarfStringFromFastString
-- * Dwarf address range table
, DwarfARange(..)
, pprDwarfARanges
@@ -32,18 +39,15 @@ import GHC.Prelude
import GHC.Cmm.DebugBlock
import GHC.Cmm.CLabel
import GHC.Cmm.Expr ( GlobalReg(..) )
-import GHC.Utils.Encoding
import GHC.Data.FastString
import GHC.Utils.Outputable
import GHC.Platform
import GHC.Types.Unique
+import GHC.Types.Unique.Set
import GHC.Platform.Reg
-import GHC.Types.SrcLoc
-import GHC.Utils.Misc
import GHC.CmmToAsm.Dwarf.Constants
-import qualified Data.ByteString as BS
import qualified GHC.Utils.Monad.State.Strict as S
import Control.Monad (zipWithM, join)
import qualified Data.Map as Map
@@ -52,18 +56,55 @@ import Data.Char
import GHC.Platform.Regs
+-- | A string in the DWARF @.debug_str@ section.
+newtype DwarfString = DwarfString FastString
+
+instance Uniquable DwarfString where
+ getUnique (DwarfString fs) = getUnique fs
+
+dwarfStringFromString :: String -> DwarfString
+dwarfStringFromString = dwarfStringFromFastString . fsLit
+
+dwarfStringFromFastString :: FastString -> DwarfString
+dwarfStringFromFastString = DwarfString
+
+dwarfStringSymbol :: DwarfString -> CLabel
+dwarfStringSymbol (DwarfString fs) =
+ mkAsmTempDerivedLabel (mkAsmTempLabel u) (fsLit "_fstr")
+ where
+ -- N.B. FastStrings have a tag character of '\x00', which would produce
+ -- an invalid symbol name. Instead of handling this rare case in
+ -- pprUniqueAlways, incurring significant overhead in hot paths, we rather
+ -- override the unique tag here.
+ u = newTagUnique (getUnique fs) 'S'
+
+pprDwarfString :: Platform -> DwarfString -> SDoc
+pprDwarfString plat s =
+ sectionOffset plat (pdoc plat $ dwarfStringSymbol s) dwarfStringLabel
+
+dwarfStringsSection :: Platform -> UniqSet DwarfString -> SDoc
+dwarfStringsSection platform xs = vcat
+ [ dwarfStringLabel <> colon
+ , dwarfStringSection platform
+ , vcat (map string $ nonDetEltsUniqSet xs)
+ ]
+ where
+ string :: DwarfString -> SDoc
+ string dstr@(DwarfString fstr) =
+ pdoc platform (dwarfStringSymbol dstr) <> colon $$ pprFastString fstr
+
-- | Individual dwarf records. Each one will be encoded as an entry in
-- the @.debug_info@ section.
data DwarfInfo
= DwarfCompileUnit { dwChildren :: [DwarfInfo]
- , dwName :: String
- , dwProducer :: String
- , dwCompDir :: String
+ , dwName :: DwarfString
+ , dwProducer :: DwarfString
+ , dwCompDir :: DwarfString
, dwLowLabel :: SDoc
, dwHighLabel :: SDoc
, dwLineLabel :: SDoc }
| DwarfSubprogram { dwChildren :: [DwarfInfo]
- , dwName :: String
+ , dwName :: DwarfString
, dwLabel :: CLabel
, dwParent :: Maybe CLabel
-- ^ label of DIE belonging to the parent tick
@@ -72,9 +113,23 @@ data DwarfInfo
, dwLabel :: CLabel
, dwMarker :: Maybe CLabel
}
- | DwarfSrcNote { dwSrcSpan :: RealSrcSpan
+ | DwarfSrcNote { dwSpanFile :: !DwarfString
+ , dwSpanStartLine :: !Int
+ , dwSpanStartCol :: !Int
+ , dwSpanEndLine :: !Int
+ , dwSpanEndCol :: !Int
}
+-- | 'DwarfStrings' mentioned by the given 'DwarfInfo'.
+dwarfInfoStrings :: DwarfInfo -> UniqSet DwarfString
+dwarfInfoStrings dwinfo =
+ case dwinfo of
+ DwarfCompileUnit {..} -> mkUniqSet [dwName, dwProducer, dwCompDir] `unionUniqSets` foldMap dwarfInfoStrings dwChildren
+ DwarfSubprogram {..} -> unitUniqSet dwName `unionUniqSets` foldMap dwarfInfoStrings dwChildren
+ DwarfBlock {..} -> foldMap dwarfInfoStrings dwChildren
+ DwarfSrcNote {..} -> unitUniqSet dwSpanFile
+
+
-- | Abbreviation codes used for encoding above records in the
-- @.debug_info@ section.
data DwarfAbbrev
@@ -103,7 +158,7 @@ pprAbbrevDecls platform haveDebugLine =
-- These are shared between DwAbbrSubprogram and
-- DwAbbrSubprogramWithParent
subprogramAttrs =
- [ (dW_AT_name, dW_FORM_string)
+ [ (dW_AT_name, dW_FORM_strp)
, (dW_AT_linkage_name, dW_FORM_string)
, (dW_AT_external, dW_FORM_flag)
, (dW_AT_low_pc, dW_FORM_addr)
@@ -113,10 +168,10 @@ pprAbbrevDecls platform haveDebugLine =
in dwarfAbbrevSection platform $$
dwarfAbbrevLabel <> colon $$
mkAbbrev DwAbbrCompileUnit dW_TAG_compile_unit dW_CHILDREN_yes
- ([(dW_AT_name, dW_FORM_string)
- , (dW_AT_producer, dW_FORM_string)
+ ([(dW_AT_name, dW_FORM_strp)
+ , (dW_AT_producer, dW_FORM_strp)
, (dW_AT_language, dW_FORM_data4)
- , (dW_AT_comp_dir, dW_FORM_string)
+ , (dW_AT_comp_dir, dW_FORM_strp)
, (dW_AT_use_UTF8, dW_FORM_flag_present) -- not represented in body
, (dW_AT_low_pc, dW_FORM_addr)
, (dW_AT_high_pc, dW_FORM_addr)
@@ -137,7 +192,7 @@ pprAbbrevDecls platform haveDebugLine =
, (dW_AT_high_pc, dW_FORM_addr)
] $$
mkAbbrev DwAbbrGhcSrcNote dW_TAG_ghc_src_note dW_CHILDREN_no
- [ (dW_AT_ghc_span_file, dW_FORM_string)
+ [ (dW_AT_ghc_span_file, dW_FORM_strp)
, (dW_AT_ghc_span_start_line, dW_FORM_data4)
, (dW_AT_ghc_span_start_col, dW_FORM_data2)
, (dW_AT_ghc_span_end_line, dW_FORM_data4)
@@ -173,10 +228,10 @@ pprDwarfInfoOpen :: Platform -> Bool -> DwarfInfo -> SDoc
pprDwarfInfoOpen platform haveSrc (DwarfCompileUnit _ name producer compDir lowLabel
highLabel lineLbl) =
pprAbbrev DwAbbrCompileUnit
- $$ pprString name
- $$ pprString producer
+ $$ pprDwarfString platform name
+ $$ pprDwarfString platform producer
$$ pprData4 dW_LANG_Haskell
- $$ pprString compDir
+ $$ pprDwarfString platform compDir
-- Offset due to Note [Info Offset]
$$ pprWord platform (lowLabel <> text "-1")
$$ pprWord platform highLabel
@@ -186,7 +241,7 @@ pprDwarfInfoOpen platform haveSrc (DwarfCompileUnit _ name producer compDir lowL
pprDwarfInfoOpen platform _ (DwarfSubprogram _ name label parent) =
pdoc platform (mkAsmTempDieLabel label) <> colon
$$ pprAbbrev abbrev
- $$ pprString name
+ $$ pprDwarfString platform name
$$ pprLabelString platform label
$$ pprFlag (externallyVisibleCLabel label)
-- Offset due to Note [Info Offset]
@@ -210,13 +265,13 @@ pprDwarfInfoOpen platform _ (DwarfBlock _ label (Just marker)) =
$$ pprLabelString platform label
$$ pprWord platform (pdoc platform marker)
$$ pprWord platform (pdoc platform $ mkAsmTempEndLabel marker)
-pprDwarfInfoOpen _ _ (DwarfSrcNote ss) =
+pprDwarfInfoOpen platform _ (DwarfSrcNote {..}) =
pprAbbrev DwAbbrGhcSrcNote
- $$ pprString' (ftext $ srcSpanFile ss)
- $$ pprData4 (fromIntegral $ srcSpanStartLine ss)
- $$ pprHalf (fromIntegral $ srcSpanStartCol ss)
- $$ pprData4 (fromIntegral $ srcSpanEndLine ss)
- $$ pprHalf (fromIntegral $ srcSpanEndCol ss)
+ $$ pprDwarfString platform dwSpanFile
+ $$ pprData4 (fromIntegral dwSpanStartLine)
+ $$ pprHalf (fromIntegral dwSpanStartCol)
+ $$ pprData4 (fromIntegral dwSpanEndLine)
+ $$ pprHalf (fromIntegral dwSpanEndCol)
-- | Close a DWARF info record with children
pprDwarfInfoClose :: SDoc
@@ -595,12 +650,8 @@ pprString' :: SDoc -> SDoc
pprString' str = text "\t.asciz \"" <> str <> char '"'
-- | Generate a string constant. We take care to escape the string.
-pprString :: String -> SDoc
-pprString str
- = pprString' $ hcat $ map escapeChar $
- if str `lengthIs` utf8EncodedLength str
- then str
- else map (chr . fromIntegral) $ BS.unpack $ utf8EncodeString str
+pprFastString :: FastString -> SDoc
+pprFastString = pprString' . hcat . map escapeChar . unpackFS
-- | Escape a single non-unicode character
escapeChar :: Char -> SDoc
diff --git a/compiler/GHC/Types/Unique.hs b/compiler/GHC/Types/Unique.hs
index 85e7d5f958..196a06e314 100644
--- a/compiler/GHC/Types/Unique.hs
+++ b/compiler/GHC/Types/Unique.hs
@@ -51,6 +51,7 @@ module GHC.Types.Unique (
import GHC.Prelude
import GHC.Data.FastString
+import GHC.Utils.Constants (debugIsOn)
import GHC.Utils.Outputable
import GHC.Utils.Panic.Plain