summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoritz Angermann <moritz.angermann@gmail.com>2017-05-01 11:13:36 -0400
committerBen Gamari <ben@smart-cactus.org>2017-05-01 11:14:59 -0400
commit667abf17dced8b4a4cd2dc6a291a6f244ffa031f (patch)
tree95dd026ca573f955b4a5201016e83baf8f92f9a1
parentc04bd55a8daaf254436cef02934215d0b4ccfa2f (diff)
downloadhaskell-667abf17dced8b4a4cd2dc6a291a6f244ffa031f.tar.gz
Make LLVM output robust to -dead_strip on mach-o platforms
This reverses commit 1686f30951292e94bf3076ce8b3eafb0bcbba91d (Mangle .subsections_via_symbols away., D3287), and implements proper support for `-dead_strip` via the injection of `.alt_entry` symbols for the function definition pointing to the beginning of the prefix data. This is the result of a lengthy discussion with rwbarton, and the following llvm-dev mailing list thread: http://lists.llvm.org/pipermail/llvm-dev/2017-March/110733.html The essential problem is that there is no reference from a function to its info table. This combined with `.subsections_via_symbols`, which llvm emits unconditionally, leads the linker to believe that the prefix data is unnecessary and stripping it away if presented with the `-dead_strip` flag. The NCG has for this purpose special $dsp (dead strip preventer) symbols and adds a relocation to the end of each function body pointing to that function's $dsp symbol. We cannot easily do the same thing via LLVM. Instead we use the `.alt_entry` directive on the function symbol, which causes the linker to treat it as a continuation of the previous symbol, namely the $dsp symbol. As a result the function body will not be separated from its info table. Reviewers: erikd, austin, rwbarton, bgamari Reviewed By: bgamari Subscribers: michalt, thomie Differential Revision: https://phabricator.haskell.org/D3290
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Ppr.hs72
-rw-r--r--compiler/llvmGen/LlvmMangler.hs11
2 files changed, 71 insertions, 12 deletions
diff --git a/compiler/llvmGen/LlvmCodeGen/Ppr.hs b/compiler/llvmGen/LlvmCodeGen/Ppr.hs
index 37d1391519..8b6340d6f8 100644
--- a/compiler/llvmGen/LlvmCodeGen/Ppr.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Ppr.hs
@@ -21,6 +21,7 @@ import FastString
import Outputable
import Unique
+import DynFlags (targetPlatform)
-- ----------------------------------------------------------------------------
-- * Top level
@@ -150,8 +151,75 @@ pprLlvmCmmDecl (CmmProc mb_info entry_lbl live (ListGraph blks))
alias = LMGlobal funVar
(Just $ LMBitc (LMStaticPointer defVar)
(LMPointer $ LMInt 8))
-
- return (ppLlvmGlobal alias $+$ ppLlvmFunction fun', [])
+ -- our beloved dead_strip preventer.
+ -- the idea here is to inject
+ --
+ -- module asm "_symbol$dsp = _symbol-24" -- assuming prefix
+ -- of <{i64, i64, i64}>
+ -- module asm ".no_dead_strip _symbol$dsp"
+ --
+ -- and thereby generating a second symbol
+ -- at the start of the info table, which is dead strip prevented.
+ --
+ -- ideally, llvm should generate these for us, but as
+ -- things stand, this is the least hacky solution to
+ -- prevent dead_stripping of the prefix data, while
+ -- retaining dead stripping in general.
+ --
+ -- The general layout of the above code results in the following:
+ --
+ -- .------------. <- @<name>$def$dsp
+ -- | Info Table |
+ -- |------------| <- @<name>, @<name>$def
+ -- | Fn Body |
+ -- '------------'
+ --
+ -- Why this @<name> and @<name>$def? As the textual llvm ir
+ -- generator is only handed typeless labes, it often does not
+ -- know the type of the label (e.g. function to call), until
+ -- the actual call happens. However, llvm requires symbol
+ -- lookups to be typed. Therfore we create the actual function
+ -- as @<name>$def, and alias a bitcast to i8* as @<name>.
+ -- Any subsequent lookup can lookup @<name> as i8* and
+ -- bitcast it to the required type once we want to call it.
+ --
+ -- Why .no_dead_strip? Doesn't this prevent the linker from
+ -- -dead_strip'ing anything? Yes, it does. And we'll have to
+ -- live with this wart until a better solution is found that
+ -- ensures that all symbols that are used directly or
+ -- indirectly are marked used.
+ --
+ -- This is all rather annoying. ghc 8.2 uses the infamous
+ -- Mangler to drop the .subsections_via_symbols directive
+ -- from the assembly. LLVM ingeniously emits said directive
+ -- unconditionally for mach-o files. To lift the need for
+ -- extra mangler step, we explicitly mark every symbol
+ -- .no_dead_strip.
+ --
+ -- We are making a few assumptions here:
+ -- - the symbols end up being name _<symbol> in the final
+ -- assembly file.
+ --
+ dsp <- case mb_info of
+ Nothing -> pure empty
+ Just (Statics _ statics)
+ | platformHasSubsectionsViaSymbols (targetPlatform dflags) -> do
+ infoStatics <- mapM genData statics
+ -- remember, the prefix_size is in bits!
+ let prefix_size = sum (map (llvmWidthInBits dflags . getStatType)
+ infoStatics)
+ dspName = defName `appendFS` fsLit "$dsp"
+ defSymbol = text "_" <> ftext defName
+ dspSymbol = text "_" <> ftext dspName
+ moduleAsm s = text "module asm" <+> doubleQuotes s
+ return $ text "; insert dead_strip preventer"
+ $+$ moduleAsm (dspSymbol <+> text "=" <+> defSymbol
+ <> text "-" <> int (prefix_size `div` 8))
+ $+$ moduleAsm (text ".no_dead_strip" <+> dspSymbol)
+ $+$ text "; end dead_strip preventer"
+ | otherwise -> pure empty
+
+ return (ppLlvmGlobal alias $+$ ppLlvmFunction fun' $+$ dsp, [])
-- | The section we are putting info tables and their entry code into, should
diff --git a/compiler/llvmGen/LlvmMangler.hs b/compiler/llvmGen/LlvmMangler.hs
index eed13ba203..acf344fe2d 100644
--- a/compiler/llvmGen/LlvmMangler.hs
+++ b/compiler/llvmGen/LlvmMangler.hs
@@ -47,20 +47,11 @@ type Rewrite = DynFlags -> B.ByteString -> Maybe B.ByteString
-- | Rewrite a line of assembly source with the given rewrites,
-- taking the first rewrite that applies.
rewriteLine :: DynFlags -> [Rewrite] -> B.ByteString -> B.ByteString
-rewriteLine dflags rewrites l
- -- We disable .subsections_via_symbols on darwin and ios, as the llvm code
- -- gen uses prefix data for the info table. This however does not prevent
- -- llvm from generating .subsections_via_symbols, which in turn with
- -- -dead_strip, strips the info tables, and therefore breaks ghc.
- | isSubsectionsViaSymbols l =
- (B.pack "## no .subsection_via_symbols for ghc. We need our info tables!")
- | otherwise =
+rewriteLine dflags rewrites l =
case firstJust $ map (\rewrite -> rewrite dflags rest) rewrites of
Nothing -> l
Just rewritten -> B.concat $ [symbol, B.pack "\t", rewritten]
where
- isSubsectionsViaSymbols = B.isPrefixOf (B.pack ".subsections_via_symbols")
-
(symbol, rest) = splitLine l
firstJust :: [Maybe a] -> Maybe a