summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordoyougnu <jeffrey.young@iohk.io>2022-03-19 19:59:20 -0400
committerdoyougnu <jeffrey.young@iohk.io>2022-06-13 13:42:38 -0400
commit5fa834b3db4619346d9205ad38f9c5837945a08c (patch)
tree0a75dcbe090552e1e837e4e804703d33b8800db1
parent14fb7ba21d47c53c52f2d66c072dd18b228216d5 (diff)
downloadhaskell-5fa834b3db4619346d9205ad38f9c5837945a08c.tar.gz
JS.Backend: Add JS specific Linker
JS: initialize Linker, DynamicLinking JS.Printer: adapted to GHC Head JS.Printer: some cleanup and init Printer StgToJS.Printer: Compiles JS.Linker: Add types, expose JS keywords JS.Syntax: add Binary instance on Ident's JS.Linker: Migrate more Types to Data.Binary JS.Linker.Types: compiles and adapted to GHC Head JS.Linker.Types: compiles JS.Linker.Types: add UseBase type JS.Linker: Comments and Cleanup JS.Linker.Types: add TH types, Env type, DepsLoc JS.Linker: more FIXMEs numerous Linker fixes JS.Linker: removed Text references JS.UnitUtils: add package related helper functions JS.Linker: more DynFlags removal JS.Linker: Time for semantic errors JS.Linker: DynFlags finally removed JS.Linker: 107 compile errors to go JS.Linker.Utils: initialized, adapted to GHC Head JS.Linker.Utils: initialize Utils module JS.Linker.Utils: more utils JS.Rts: move rtsText to Rts JS.Linker: linkerStats implemented JS.Compactor: compiles, adapted to GHC Head JS.Compactor: have to retrofit compact for linker JS.Linker.Compactor: unwinding lenses JS.Linker.Compactor: comments over addItem JS.Linker.Compactor: Lenses removed JS.Linker.Compactor: SHA256 removed JS.Linker.Compactor: only missing instances left JS.Linker.Compactor: compiles JS.Linker: compiles, adapted to ghc Head JS.Linker: More progress JS.Linker: link in memory compiles JS.Linker: just shims left JS.Linker.DynamicLinking compiles: adapted to head JS.Linker.DynamicLinking: initialization JS.Linker.DynamicLinking: compiles up to Variants JS.Variants: initialize JS.Linker: numerous and various fixes JS.Linker.DynamicLinking: only small errors left JS.Linker.Archive: compiles, adapted to GHC Head JS.Linker: initialize Archive compat module JS.Linker.Archive: minor fixes JS.Linker.DynamicLinking: compiles JS.Linker: cleanup, remove Variants, add comments fixup: more cleanup JS.Linker: more cleanup and comments
-rw-r--r--compiler/GHC/JS/Ppr.hs11
-rw-r--r--compiler/GHC/JS/Syntax.hs36
-rw-r--r--compiler/GHC/StgToJS/Arg.hs62
-rw-r--r--compiler/GHC/StgToJS/DataCon.hs12
-rw-r--r--compiler/GHC/StgToJS/Linker/Archive.hs194
-rw-r--r--compiler/GHC/StgToJS/Linker/Compactor.hs1437
-rw-r--r--compiler/GHC/StgToJS/Linker/Dynamic.hs564
-rw-r--r--compiler/GHC/StgToJS/Linker/Linker.hs887
-rw-r--r--compiler/GHC/StgToJS/Linker/Types.hs581
-rw-r--r--compiler/GHC/StgToJS/Linker/Utils.hs101
-rw-r--r--compiler/GHC/StgToJS/Object.hs119
-rw-r--r--compiler/GHC/StgToJS/Printer.hs165
-rw-r--r--compiler/GHC/StgToJS/Rts/Rts.hs7
-rw-r--r--compiler/GHC/StgToJS/Types.hs69
-rw-r--r--compiler/GHC/StgToJS/UnitUtils.hs85
-rw-r--r--compiler/ghc.cabal.in7
16 files changed, 4256 insertions, 81 deletions
diff --git a/compiler/GHC/JS/Ppr.hs b/compiler/GHC/JS/Ppr.hs
index e46c8534ef..223d4d3e7a 100644
--- a/compiler/GHC/JS/Ppr.hs
+++ b/compiler/GHC/JS/Ppr.hs
@@ -6,7 +6,6 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE BlockArguments #-}
-{-# LANGUAGE PatternSynonyms #-}
-- | Pretty-printing JavaScript
module GHC.JS.Ppr
@@ -19,6 +18,10 @@ module GHC.JS.Ppr
, RenderJs(..)
, jsToDoc
, pprStringLit
+ , flattenBlocks
+ , braceNest
+ , braceNest'
+ , braceNest''
)
where
@@ -79,6 +82,12 @@ braceNest x = char '{' <+> nest 2 x $$ char '}'
braceNest' :: Doc -> Doc
braceNest' x = nest 2 (char '{' $+$ x) $$ char '}'
+-- FIXME: Jeff (2022,03): better naming of braceNest'' functions. Stop the
+-- madness!
+-- somewhat more compact (egyptian style) braces
+braceNest'' :: Doc -> Doc
+braceNest'' x = nest 2 (char '{' $$ x) $$ char '}'
+
class JsToDoc a where jsToDocR :: RenderJs -> a -> Doc
instance JsToDoc JStat where jsToDocR r = renderJsS r r
instance JsToDoc JExpr where jsToDocR r = renderJsE r r
diff --git a/compiler/GHC/JS/Syntax.hs b/compiler/GHC/JS/Syntax.hs
index 37104d8d9f..d70e06d801 100644
--- a/compiler/GHC/JS/Syntax.hs
+++ b/compiler/GHC/JS/Syntax.hs
@@ -48,6 +48,8 @@ module GHC.JS.Syntax
, pseudoSaturate
-- * Utility
, SaneDouble(..)
+ -- * Keywords
+ , isJsKeyword
) where
import GHC.Prelude
@@ -56,16 +58,18 @@ import Control.DeepSeq
import Data.Function
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Data
import Data.Word
import qualified Data.Semigroup as Semigroup
import GHC.Generics
+import Data.Binary
import GHC.Utils.Outputable (Outputable (..))
import qualified GHC.Utils.Outputable as O
import qualified GHC.Data.ShortText as ST
-import GHC.Data.ShortText (ShortText)
+import GHC.Data.ShortText (ShortText())
import GHC.Utils.Monad.State.Strict
-- FIXME: Jeff (2022,03): This state monad is strict, but uses a lazy list as
@@ -319,9 +323,35 @@ instance Show SaneDouble where
--------------------------------------------------------------------------------
-- Identifiers
--------------------------------------------------------------------------------
--- We use ShortText for identifier in JS backend
+-- We use ShortText for identifiers in JS backend
-- | Identifiers
newtype Ident = TxtI { itxt:: ShortText}
- deriving (Show, Typeable, Ord, Eq, Generic, NFData)
+ deriving stock (Show, Typeable, Ord, Eq, Generic)
+ deriving newtype (Binary, NFData) -- FIXME: Jeff (2022,03): ShortText uses Data.Binary
+ -- rather than GHC.Utils.Binary. What is the
+ -- difference? See related FIXME in StgToJS.Object
+
+--------------------------------------------------------------------------------
+-- JS Keywords
+--------------------------------------------------------------------------------
+-- | The set of Javascript keywords
+jsKeywords :: Set.Set Ident
+jsKeywords = Set.fromList $ TxtI <$>
+ [ "break", "case", "catch", "continue", "debugger"
+ , "default", "delete", "do", "else", "finally", "for"
+ , "function", "if", "in", "instanceof", "new", "return"
+ , "switch", "this", "throw", "try", "typeof", "var", "void"
+ , "while", "with"
+ , "class", "enum", "export", "extends", "import", "super"
+ , "const"
+ , "implements", "interface", "let", "package", "private"
+ , "protected"
+ , "public", "static", "yield"
+ , "null", "true", "false"
+ ]
+
+-- | Check if provided Ident is a JS keyword
+isJsKeyword :: Ident -> Bool
+isJsKeyword = flip Set.member jsKeywords
diff --git a/compiler/GHC/StgToJS/Arg.hs b/compiler/GHC/StgToJS/Arg.hs
index 4404aa17c2..7d005e41a9 100644
--- a/compiler/GHC/StgToJS/Arg.hs
+++ b/compiler/GHC/StgToJS/Arg.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE LambdaCase #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-} -- For ToJExpr StaticArg, see FIXME
module GHC.StgToJS.Arg
( genArg
, genStaticArg
@@ -78,30 +79,30 @@ genArg a = case a of
case lookupUFM unFloat i of
Nothing -> reg
Just expr -> unfloated expr
- where
- -- if our argument is a joinid, it can be an unboxed tuple
- r :: HasDebugCallStack => VarType
- r = uTypeVt . stgArgType $ a
- reg
- | isVoid r = return []
- | i == trueDataConId = return [true_]
- | i == falseDataConId = return [false_]
- | isMultiVar r = mapM (jsIdN i) [1..varSize r]
- | otherwise = (:[]) <$> jsId i
-
- unfloated :: HasDebugCallStack => CgStgExpr -> G [JExpr]
- unfloated = \case
- StgLit l -> genLit l
- StgConApp dc _n args _
- | isBoolDataCon dc || isUnboxableCon dc
- -> (:[]) . allocUnboxedCon dc . concat <$> mapM genArg args
- | null args -> (:[]) <$> jsId (dataConWorkId dc)
- | otherwise -> do
- as <- concat <$> mapM genArg args
- e <- enterDataCon dc
- cs <- getSettings
- return [allocDynamicE cs e as Nothing] -- FIXME: ccs
- x -> pprPanic "genArg: unexpected unfloated expression" (pprStgExpr panicStgPprOpts x)
+ where
+ -- if our argument is a joinid, it can be an unboxed tuple
+ r :: HasDebugCallStack => VarType
+ r = uTypeVt . stgArgType $ a
+ reg
+ | isVoid r = return []
+ | i == trueDataConId = return [true_]
+ | i == falseDataConId = return [false_]
+ | isMultiVar r = mapM (jsIdN i) [1..varSize r]
+ | otherwise = (:[]) <$> jsId i
+
+ unfloated :: HasDebugCallStack => CgStgExpr -> G [JExpr]
+ unfloated = \case
+ StgLit l -> genLit l
+ StgConApp dc _n args _
+ | isBoolDataCon dc || isUnboxableCon dc
+ -> (:[]) . allocUnboxedCon dc . concat <$> mapM genArg args
+ | null args -> (:[]) <$> jsId (dataConWorkId dc)
+ | otherwise -> do
+ as <- concat <$> mapM genArg args
+ e <- enterDataCon dc
+ inl_alloc <- csInlineAlloc <$> getSettings
+ return [allocDynamicE inl_alloc e as Nothing] -- FIXME: ccs
+ x -> pprPanic "genArg: unexpected unfloated expression" (pprStgExpr panicStgPprOpts x)
genIdArg :: HasDebugCallStack => Id -> G [JExpr]
genIdArg i = genArg (StgVarArg i)
@@ -192,3 +193,16 @@ allocateStaticList xs a@(StgVarArg i)
pprPanic "allocateStaticList: invalid argument (tail)" (ppr (xs, r))
allocateStaticList _ _ = panic "allocateStaticList: unexpected literal in list"
+-- FIXME: Jeff (2022,03): Fix this orphan instance. It is consumed by
+-- Linker.Linker but requires allocDynamicE, hence its presence in this file. If
+-- we put it in StgToJS.Types (where StaticArg is defined) then we'll end up in
+-- an obvious module cycle. We could put it in DataCon but then we lose cohesion
+-- in that module (i.e., why should the DataCon module be exporting this
+-- instance?). It seems to be that this module should be the one that defines
+-- StaticArg, but I leave that for a refactor later.
+instance ToJExpr StaticArg where
+ toJExpr (StaticLitArg l) = toJExpr l
+ toJExpr (StaticObjArg t) = ValExpr (JVar (TxtI t))
+ toJExpr (StaticConArg c args) =
+ -- FIXME: cost-centre stack
+ allocDynamicE False (ValExpr . JVar . TxtI $ c) (map toJExpr args) Nothing
diff --git a/compiler/GHC/StgToJS/DataCon.hs b/compiler/GHC/StgToJS/DataCon.hs
index f8dd175ac0..eb5b152783 100644
--- a/compiler/GHC/StgToJS/DataCon.hs
+++ b/compiler/GHC/StgToJS/DataCon.hs
@@ -70,9 +70,13 @@ allocUnboxedCon con = \case
| isUnboxableCon con -> x
xs -> pprPanic "allocUnboxedCon: not an unboxed constructor" (ppr (con,xs))
-allocDynamicE :: StgToJSConfig -> JExpr -> [JExpr] -> Maybe JExpr -> JExpr
-allocDynamicE s entry free cc
- | csInlineAlloc s || length free > 24 = newClosure $ Closure
+allocDynamicE :: Bool -- ^ csInlineAlloc from StgToJSConfig
+ -> JExpr
+ -> [JExpr]
+ -> Maybe JExpr
+ -> JExpr
+allocDynamicE inline_alloc entry free cc
+ | inline_alloc || length free > 24 = newClosure $ Closure
{ clEntry = entry
, clField1 = fillObj1
, clField2 = fillObj2
@@ -92,7 +96,7 @@ allocDynamicE s entry free cc
allocDynamic :: StgToJSConfig -> Bool -> Ident -> JExpr -> [JExpr] -> Maybe JExpr -> JStat
allocDynamic s haveDecl to entry free cc =
- dec to `mappend` (toJExpr to |= allocDynamicE s entry free cc)
+ dec to `mappend` (toJExpr to |= allocDynamicE (csInlineAlloc s) entry free cc)
where
dec i | haveDecl = DeclStat i
| otherwise = mempty
diff --git a/compiler/GHC/StgToJS/Linker/Archive.hs b/compiler/GHC/StgToJS/Linker/Archive.hs
new file mode 100644
index 0000000000..d0d911e8d1
--- /dev/null
+++ b/compiler/GHC/StgToJS/Linker/Archive.hs
@@ -0,0 +1,194 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE TupleSections #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.StgToJS.Archive
+-- Copyright : (c) The University of Glasgow 2001
+-- License : BSD-style (see the file LICENSE)
+--
+-- Maintainer : Sylvain Henry <sylvain.henry@iohk.io>
+-- Jeffrey Young <jeffrey.young@iohk.io>
+-- Luite Stegeman <luite.stegeman@iohk.io>
+-- Josh Meredith <josh.meredith@iohk.io>
+-- Stability : experimental
+--
+-- FIXME: Jeff(2022,04): Remove this module completely, its only consumer is
+-- GHC.StgToJS.Linker.Dynamic and is likely no longer necessary with the new
+-- GHC Api. I simply decided adapting this module was faster/easier than
+-- removing it and figuring out GHC.StgToJS.Linker.Dynamic with the new API
+-----------------------------------------------------------------------------
+module GHC.StgToJS.Linker.Archive
+ ( Entry(..), Index, IndexEntry(..), Meta(..)
+ , buildArchive
+ , readMeta, readIndex
+ , readSource, readAllSources
+ , readObject, withObject, withAllObjects
+ ) where
+
+import Control.Monad
+
+import Data.Binary
+import Data.Binary.Get
+import Data.Binary.Put
+import Data.ByteString.Lazy (ByteString)
+import qualified Data.ByteString.Lazy as B
+import Data.Data
+import Data.Int
+import GHC.Data.ShortText (ShortText)
+import qualified GHC.Data.ShortText as T
+
+import GHC.Generics hiding (Meta)
+
+import System.IO
+import Prelude
+
+import GHC.Unit.Module
+
+import GHC.StgToJS.Object ( versionTag, versionTagLength )
+
+
+-- entry, offset in data section, length
+type Index = [IndexEntry]
+
+data IndexEntry = IndexEntry { ieEntry :: Entry
+ , ieOffset :: Int64
+ , ieLength :: Int64
+ } deriving (Show, Typeable, Generic)
+
+instance Binary IndexEntry
+
+data Entry = Object ShortText -- module name
+ | JsSource FilePath
+ deriving (Show, Typeable, Generic)
+
+instance Binary Entry
+
+data Meta = Meta { metaCppOptions :: [String]
+ } deriving (Show, Typeable, Generic)
+
+instance Binary Meta
+
+-- sizes of the sections in bytes
+data Sections = Sections { sectionIndex :: !Word64
+ , sectionMeta :: !Word64
+ , sectionData :: !Word64
+ } deriving (Eq, Ord, Generic)
+
+instance Binary Sections where
+ put (Sections i m d) = putWord64le i >> putWord64le m >> putWord64le d
+ get = Sections <$> getWord64le <*> getWord64le <*> getWord64le
+
+sectionsLength :: Int
+sectionsLength = 24
+
+buildArchive :: Meta -> [(Entry, ByteString)] -> ByteString
+buildArchive meta entries =
+ versionTag <> sections <> index <> meta' <> entries'
+ where
+ bl = fromIntegral . B.length
+ sections = runPut . put $ Sections (bl index) (bl meta') (bl entries')
+ meta' = runPut (put meta)
+ index = runPut . put $ scanl1 (\(IndexEntry _ o l) (IndexEntry e _ l') -> IndexEntry e (o+l) l') $
+ map (\(e,b) -> IndexEntry e 0 (B.length b)) entries
+ entries' = mconcat (map snd entries)
+
+readMeta :: FilePath -> IO Meta
+readMeta file = withBinaryFile file ReadMode $ \h -> do
+ sections <- hReadHeader ("readMeta " ++ file) h
+ hSeek h RelativeSeek (toInteger $ sectionIndex sections)
+ m <- B.hGet h (fromIntegral $ sectionMeta sections)
+ return $! runGet get m
+
+readIndex :: FilePath -> IO Index
+readIndex file =
+ withArchive "readIndex" file $ \_sections index _h -> return index
+
+readSource :: FilePath -> FilePath -> IO ByteString
+readSource source file = withArchive "readSource" file $
+ withEntry ("readSource " ++ file)
+ ("source file " ++ source)
+ selectSrc
+ (\h l -> B.hGet h $ fromIntegral l)
+ where
+ selectSrc (JsSource src) = src == source
+ selectSrc _ = False
+
+readAllSources :: FilePath -> IO [(FilePath, ByteString)]
+readAllSources file = withArchive "readAllSources" file $ \sections index h ->
+ forM [ (o, l, src) | IndexEntry (JsSource src) o l <- index ] $ \(o, l, src) -> do
+ hSeek h AbsoluteSeek (fromIntegral $ dataSectionStart sections + fromIntegral o)
+ (src,) <$> B.hGet h (fromIntegral l)
+
+readObject :: ModuleName -> FilePath -> IO ByteString
+readObject m file = withArchive "readObject" file $
+ withModuleObject ("readObject " ++ file) m (\h l -> B.hGet h $ fromIntegral l)
+
+-- | seeks to the starting position of the object in the file
+withObject :: ModuleName -> FilePath -> (Handle -> Int64 -> IO a) -> IO a
+withObject m file f = withArchive "withObject" file $
+ withModuleObject ("withObject " ++ file) m f
+
+
+withAllObjects :: FilePath -> (ModuleName -> Handle -> Int64 -> IO a) -> IO [a]
+withAllObjects file f = withArchive "withAllObjects" file $ \sections index h ->
+ forM [ (o, l, mn) | IndexEntry (Object mn) o l <- index ] $ \(o, l, mn) -> do
+ hSeek h AbsoluteSeek (fromIntegral $ dataSectionStart sections + fromIntegral o)
+ f (mkModuleName (T.unpack mn)) h l
+
+---------------------------------------------------------------------------------
+
+withArchive :: String -> FilePath -> (Sections -> Index -> Handle -> IO a) -> IO a
+withArchive name file f = withBinaryFile file ReadMode $ \h -> do
+ let name' = name ++ " " ++ file
+ putStrLn ("reading archive: " ++ name ++ " -> " ++ file)
+ sections <- hReadHeader name' h
+ index <- hReadIndex name' sections h
+ f sections index h
+
+-- | seeks to start of entry data in file, then runs the action
+-- exactly one matching entry is expected
+withEntry :: String -> String
+ -> (Entry -> Bool) -> (Handle -> Int64 -> IO a)
+ -> Sections -> Index -> Handle
+ -> IO a
+withEntry name entryName p f sections index h =
+ case filter (p . ieEntry) index of
+ [] -> error (name ++ ": cannot find " ++ entryName)
+ [IndexEntry _ o l] -> do
+ hSeek h AbsoluteSeek (dataSectionStart sections + toInteger o)
+ f h (fromIntegral l)
+ _ -> error (name ++ ": multiple matches for " ++ entryName)
+
+withModuleObject :: String -> ModuleName -> (Handle -> Int64 -> IO a)
+ -> Sections -> Index -> Handle
+ -> IO a
+withModuleObject name m f =
+ withEntry name ("object for module " ++ ms) selectEntry f
+ where
+ ms = moduleNameString m
+ mt = T.pack ms
+ selectEntry (Object m') = mt == m'
+ selectEntry _ = False
+
+-- | expects Handle to be positioned at the start of the header
+-- Handle is positioned at start of index after return
+hReadHeader :: String -> Handle -> IO Sections
+hReadHeader name h = do
+ ts <- B.hGet h (versionTagLength + sectionsLength)
+ when (B.take (fromIntegral versionTagLength) ts /= versionTag)
+ (error $ name ++ ": version tag mismatch")
+ return $! runGet get (B.drop (fromIntegral versionTagLength) ts)
+
+-- | expects Handle to be positioned at the start of the index
+-- Handle is positioned at start of metadata section after return
+hReadIndex :: String -> Sections -> Handle -> IO Index
+hReadIndex _name s h = do
+ i <- B.hGet h (fromIntegral $ sectionIndex s)
+ return $! runGet get i
+
+-- start of data section in file
+dataSectionStart :: Sections -> Integer
+dataSectionStart s = toInteger (versionTagLength + sectionsLength)
+ + toInteger (sectionIndex s + sectionMeta s)
diff --git a/compiler/GHC/StgToJS/Linker/Compactor.hs b/compiler/GHC/StgToJS/Linker/Compactor.hs
new file mode 100644
index 0000000000..3348dc01da
--- /dev/null
+++ b/compiler/GHC/StgToJS/Linker/Compactor.hs
@@ -0,0 +1,1437 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.StgToJS.Linker
+-- Copyright : (c) The University of Glasgow 2001
+-- License : BSD-style (see the file LICENSE)
+--
+-- Maintainer : Luite Stegeman <luite.stegeman@iohk.io>
+-- Sylvain Henry <sylvain.henry@iohk.io>
+-- Jeffrey Young <jeffrey.young@iohk.io>
+-- Josh Meredith <josh.meredith@iohk.io>
+-- Stability : experimental
+--
+-- The compactor does link-time optimization. It is much simpler than the
+-- Optimizer, no fancy dataflow analysis here.
+--
+-- Optimizations:
+-- - rewrite all variables starting with h$$ to shorter names, these are internal names
+-- - write all function metadata compactly
+--
+-- TODO: Jeff (2022,03): I've adapted this to ghcHEAD but have not actually
+-- implemented the compactor. The key work function is @packString@ which
+-- currently explodes if called. The todo is to fix this, and actually implement
+-- the compactor once we have a linker that actually works.
+-----------------------------------------------------------------------------
+
+module GHC.StgToJS.Linker.Compactor
+ ( compact
+ ) where
+
+
+import GHC.Utils.Panic
+import GHC.Utils.Misc
+
+
+import Control.Applicative
+import GHC.Utils.Monad.State.Strict
+import Data.Function
+
+import qualified Data.Binary.Get as DB
+import qualified Data.Binary.Put as DB
+import qualified Data.Bits as Bits
+import Data.Bits (shiftL, shiftR)
+import Data.Bifunctor (second)
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as BSC
+import qualified Data.ByteString.Builder as BB
+import Data.Char (chr)
+import qualified Data.Graph as G
+import qualified Data.Map.Strict as M
+import Data.Map (Map)
+import Data.Int
+import Data.List
+import Data.Maybe
+import qualified Data.Set as S
+import Data.Set (Set)
+import GHC.Data.ShortText (ShortText)
+import qualified GHC.Data.ShortText as T
+
+
+import GHC.JS.Syntax
+import GHC.JS.Make
+import GHC.JS.Transform
+import GHC.StgToJS.Printer (pretty)
+import GHC.StgToJS.Types
+import GHC.StgToJS.Linker.Types
+import GHC.StgToJS.CoreUtils
+import GHC.StgToJS.Closure
+import GHC.StgToJS.Arg()
+
+import Prelude
+import GHC.Utils.Encoding
+
+
+-- | collect global objects (data / CAFs). rename them and add them to the table
+collectGlobals :: [StaticInfo]
+ -> State CompactorState ()
+collectGlobals = mapM_ (\(StaticInfo i _ _) -> renameObj i)
+
+debugShowStat :: (JStat, [ClosureInfo], [StaticInfo]) -> String
+debugShowStat (_s, cis, sis) =
+ "closures:\n" ++
+ unlines (map show cis) ++
+ "\nstatics:" ++
+ unlines (map show sis) ++
+ "\n\n"
+
+{- create a single string initializer for all StaticUnboxedString references
+ in the code, and rewrite all references to point to it
+
+ if incremental linking is used, each increment gets its own packed string
+ blob. if a string value already exists in an earlier blob it is not added
+ again
+ -}
+packStrings :: HasDebugCallStack
+ => JSLinkConfig
+ -> CompactorState
+ -> [LinkedUnit]
+ -> (CompactorState, [LinkedUnit])
+packStrings _settings _cstate _code = panic "Compactor.packstrings not yet implemented!"
+ -- let allStatics :: [StaticInfo]
+ -- allStatics = concatMap (\(_,_,x) -> x) code
+
+ -- origStringTable :: StringTable
+ -- origStringTable = cstate ^. stringTable
+
+ -- allStrings :: Set ByteString
+ -- allStrings = S.fromList $
+ -- filter (not . isExisting)
+ -- (mapMaybe (staticString . siVal) allStatics)
+ -- where
+ -- isExisting bs = isJust (M.lookup bs $ stOffsets origStringTable)
+
+ -- staticString :: StaticVal -> Maybe ByteString
+ -- staticString (StaticUnboxed (StaticUnboxedString bs)) = Just bs
+ -- staticString (StaticUnboxed (StaticUnboxedStringOffset bs)) = Just bs
+ -- staticString _ = Nothing
+
+ -- allStringsList :: [ByteString]
+ -- allStringsList = S.toList allStrings
+
+ -- -- we may see two kinds of null characters
+ -- -- - string separator, packed as \0
+ -- -- - within a string, packed as \cz\0
+ -- -- we transform the strings to
+ -- transformPackedLiteral :: ShortText -> ShortText
+ -- transformPackedLiteral = mconcat. fmap f
+ -- where
+ -- f :: Char -> ShortText
+ -- f '\0' = "\^Z\0"
+ -- f '\^Z' = "\^Z\^Z"
+ -- f x = x
+
+ -- allStringsPacked :: ShortText
+ -- allStringsPacked = T.intercalate "\0" $
+ -- map (\str -> maybe (packBase64 str)
+ -- transformPackedLiteral
+ -- (U.decodeModifiedUTF8 str))
+ -- allStringsList
+
+ -- packBase64 :: ByteString -> ShortText
+ -- packBase64 bs
+ -- | BS.null bs = mempty
+ -- | otherwise =
+ -- let (h,t) = BS.splitAt 128 bs
+ -- esc = T.singleton '\^Z' <>
+ -- T.singleton (chr . fromIntegral $ BS.length h + 0x1f)
+ -- b64 = esc <> fromJust (U.decodeModifiedUTF8 (B64.encode h))
+ -- in maybe b64 transformPackedLiteral (U.decodeModifiedUTF8 h) <>
+ -- packBase64 t
+
+ -- allStringsWithOffset :: [(ByteString, Int)]
+ -- allStringsWithOffset = snd $
+ -- mapAccumL (\o b -> let o' = o + fromIntegral (BS.length b) + 1
+ -- in o' `seq` (o', (b, o)))
+ -- 0
+ -- allStringsList
+
+ -- -- the offset of each of the strings in the big blob
+ -- offsetIndex :: HashMap ByteString Int
+ -- offsetIndex = M.fromList allStringsWithOffset
+
+ -- stringSymbol :: Ident
+ -- stringSymbol = head $ cstate ^. identSupply
+
+ -- stringSymbolT :: ShortText
+ -- stringSymbolT = let (TxtI t) = stringSymbol in t
+
+ -- stringSymbolIdx :: Int
+ -- stringSymbolIdx = snd (bounds $ stTableIdents origStringTable) + 1
+
+ -- -- append the new string symbol
+ -- newTableIdents :: Array Int ShortText
+ -- newTableIdents =
+ -- listArray (0, stringSymbolIdx)
+ -- (elems (stTableIdents origStringTable) ++ [stringSymbolT])
+
+ -- newOffsetsMap :: Map ByteString (Int, Int)
+ -- newOffsetsMap = M.union (stOffsets origStringTable)
+ -- (fmap (stringSymbolIdx,) offsetIndex)
+
+ -- newIdentsMap :: HashMap ShortText (Either Int Int)
+ -- newIdentsMap =
+ -- let f (StaticInfo s (StaticUnboxed (StaticUnboxedString bs)) _)
+ -- = Just (s, Left . fst $ newOffsetsMap M.! bs)
+ -- f (StaticInfo s (StaticUnboxed (StaticUnboxedStringOffset bs)) _)
+ -- = Just (s, Right . snd $ newOffsetsMap M.! bs)
+ -- f _ = Nothing
+ -- in M.union (stIdents origStringTable)
+ -- (M.fromList $ mapMaybe f allStatics)
+
+ -- newStringTable :: StringTable
+ -- newStringTable = StringTable newTableIdents newOffsetsMap newIdentsMap
+
+ -- newOffsetsInverted :: HashMap (Int, Int) ByteString
+ -- newOffsetsInverted = M.fromList .
+ -- map (\(x,y) -> (y,x)) .
+ -- M.toList $
+ -- newOffsetsMap
+
+ -- replaceSymbol :: ShortText -> Maybe JVal
+ -- replaceSymbol t =
+ -- let f (Left i) = JVar (TxtI $ newTableIdents ! i)
+ -- f (Right o) = JInt (fromIntegral o)
+ -- in fmap f (M.lookup t newIdentsMap)
+
+ -- cstate0 :: CompactorState
+ -- cstate0 = cstate & identSupply %~ tail
+ -- & stringTable .~ newStringTable
+
+ -- initStr :: JStat
+ -- initStr =
+ -- DeclStat stringSymbol <>
+ -- AssignStat (ValExpr $ JVar stringSymbol)
+ -- (ApplExpr (ApplExpr (ValExpr $ JVar (TxtI "h$pstr"))
+ -- [ValExpr (JStr allStringsPacked)])
+ -- [])
+
+ -- rewriteValsE :: JExpr -> JExpr
+ -- rewriteValsE (ApplExpr e xs)
+ -- | Just t <- appMatchStringLit e xs = ValExpr (JStr t)
+ -- rewriteValsE (ValExpr v) = ValExpr (rewriteVals v)
+ -- rewriteValsE e = e & exprsE %~ rewriteValsE
+
+ -- rewriteVals :: JVal -> JVal
+ -- rewriteVals (JVar (TxtI t))
+ -- | Just v <- replaceSymbol t = v
+ -- rewriteVals (JList es) = JList (map rewriteValsE es)
+ -- rewriteVals (JHash m) = JHash (fmap rewriteValsE m)
+ -- rewriteVals (JFunc args body) = JFunc args (body & exprsS %~ rewriteValsE)
+ -- rewriteVals v = v
+
+ -- rewriteStat :: JStat -> JStat
+ -- rewriteStat st = st & exprsS %~ rewriteValsE
+
+ -- appMatchStringLit :: JExpr -> [JExpr] -> Maybe ShortText
+ -- appMatchStringLit (ValExpr (JVar (TxtI "h$decodeUtf8z")))
+ -- [ValExpr (JVar (TxtI x)), ValExpr (JVar (TxtI y))]
+ -- | Just (Left i) <- M.lookup x newIdentsMap
+ -- , Just (Right j) <- M.lookup y newIdentsMap
+ -- , Just bs <- M.lookup (i,j) newOffsetsInverted =
+ -- U.decodeModifiedUTF8 bs
+ -- appMatchStringLit _ _ = Nothing
+
+ -- rewriteStatic :: StaticInfo -> Maybe StaticInfo
+ -- rewriteStatic (StaticInfo _i
+ -- (StaticUnboxed StaticUnboxedString{})
+ -- _cc) =
+ -- Nothing
+ -- rewriteStatic (StaticInfo _i
+ -- (StaticUnboxed StaticUnboxedStringOffset {})
+ -- _cc) =
+ -- Nothing
+ -- rewriteStatic si = Just (si & staticInfoArgs %~ rewriteStaticArg)
+
+ -- rewriteStaticArg :: StaticArg -> StaticArg
+ -- rewriteStaticArg a@(StaticObjArg t) =
+ -- case M.lookup t newIdentsMap of
+ -- Just (Right v) -> StaticLitArg (IntLit $ fromIntegral v)
+ -- Just (Left idx) -> StaticObjArg (newTableIdents ! idx)
+ -- _ -> a
+ -- rewriteStaticArg (StaticConArg v es)
+ -- = StaticConArg v (map rewriteStaticArg es)
+ -- rewriteStaticArg x = x
+
+ -- initStatic :: LinkedUnit
+ -- initStatic =
+ -- let (TxtI ss) = stringSymbol
+ -- in (initStr, [], [StaticInfo ss (StaticThunk Nothing) Nothing])
+
+ -- rewriteBlock :: LinkedUnit -> LinkedUnit
+ -- rewriteBlock (stat, ci, si)
+ -- = (rewriteStat stat, ci, mapMaybe rewriteStatic si)
+
+ -- in (cstate0, initStatic : map rewriteBlock code)
+
+renameInternals :: HasDebugCallStack
+ => JSLinkConfig
+ -> StgToJSConfig
+ -> CompactorState
+ -> [ShortText]
+ -> [LinkedUnit]
+ -> (CompactorState, [JStat], JStat)
+renameInternals ln_cfg cfg cs0 rtsDeps stats0a = (cs, stats, meta)
+ where
+ (stbs, stats0) = (if lcDedupe ln_cfg
+ then dedupeBodies rtsDeps . dedupe rtsDeps
+ else (mempty,)) stats0a
+ ((stats, meta), cs) = runState renamed cs0
+
+ renamed :: State CompactorState ([JStat], JStat)
+ renamed
+ | csDebugAlloc cfg || csProf cfg = do -- FIXME: Jeff (2022,03): Move these Way flags into JSLinkConfig
+ cs <- get
+ let renamedStats = map (\(s,_,_) -> identsS' (lookupRenamed cs) s) stats0
+ statics = map (renameStaticInfo cs) $
+ concatMap (\(_,_,x) -> x) stats0
+ infos = map (renameClosureInfo cs) $
+ concatMap (\(_,x,_) -> x) stats0
+ -- render metadata as individual statements
+ meta = mconcat (map staticDeclStat statics) <>
+ identsS' (lookupRenamed cs) stbs <>
+ mconcat (map (staticInitStat $ csProf cfg) statics) <>
+ mconcat (map (closureInfoStat True) infos)
+ return (renamedStats, meta)
+ | otherwise = do
+ -- collect all global objects and entries, add them to the renaming table
+ mapM_ (\(_, cis, sis) -> do
+ mapM_ (renameEntry . TxtI . ciVar) cis
+ mapM_ (renameObj . siVar) sis
+ mapM_ collectLabels sis) stats0
+
+ -- sort our entries, store the results
+ -- propagate all renamings throughtout the code
+ cs <- get
+ -- FIXME: Jeff (2022,03): Is this workaround still needed?
+ -- Safari on iOS 10 (64 bit only?) crashes on very long arrays
+ -- safariCrashWorkaround :: [Ident] -> JExpr
+ -- safariCrashWorkaround xs =
+ -- case chunksOf 10000 xs of
+ -- (y:ys) | not (null ys)
+ -- -> ApplExpr (SelExpr (toJExpr y) (TxtI "concat"))
+ -- (map toJExpr ys)
+ -- _ -> toJExpr xs
+ let renamedStats = map (\(s,_,_) -> identsS' (lookupRenamed cs) s)
+ stats0
+ sortedInfo = concatMap (\(_,xs,_) -> map (renameClosureInfo cs)
+ xs)
+ stats0
+ -- entryArr = safariCrashWorkaround $
+ entryArr = toJExpr
+ . map (TxtI . fst)
+ . sortBy (compare `on` snd)
+ . M.toList
+ $ csEntries cs
+ lblArr = map (TxtI . fst)
+ . sortBy (compare `on` snd)
+ . M.toList
+ $ csLabels cs
+ ss = concatMap (\(_,_,xs) -> map (renameStaticInfo cs) xs)
+ stats0
+ infoBlock = encodeStr (concatMap (encodeInfo cs) sortedInfo)
+ staticBlock = encodeStr (concatMap (encodeStatic cs) ss)
+ stbs' = identsS' (lookupRenamed cs) stbs
+ staticDecls = mconcat (map staticDeclStat ss) <> stbs'
+ meta = staticDecls `mappend`
+ appS "h$scheduleInit" [ entryArr
+ , var "h$staticDelayed"
+ , toJExpr lblArr
+ , toJExpr infoBlock
+ , toJExpr staticBlock
+ ]
+ -- error "scheduleInit"
+ {-
+ [j| h$scheduleInit( `entryArr`
+ , h$staticDelayed
+ , `lblArr`
+ , `infoBlock`
+ , `staticBlock`);
+ h$staticDelayed = [];
+ |] -}
+ return (renamedStats, meta)
+
+-- | initialize a global object. all global objects have to be declared (staticInfoDecl) first
+-- (this is only used with -debug, normal init would go through the static data table)
+staticInitStat :: Bool -- ^ profiling enabled
+ -> StaticInfo
+ -> JStat
+staticInitStat _prof (StaticInfo i sv cc) =
+ case sv of
+ StaticData con args -> appS "h$sti" ([var i, var con, toJExpr args] ++ ccArg)
+ StaticFun f args -> appS "h$sti" ([var i, var f, toJExpr args] ++ ccArg)
+ StaticList args mt ->
+ appS "h$stl" ([var i, toJExpr args, toJExpr $ maybe null_ (toJExpr . TxtI) mt] ++ ccArg)
+ StaticThunk (Just (f,args)) ->
+ appS "h$stc" ([var i, var f, toJExpr args] ++ ccArg)
+ _ -> mempty
+ where
+ ccArg = maybeToList (fmap toJExpr cc)
+
+-- | declare and do first-pass init of a global object (create JS object for heap objects)
+staticDeclStat :: StaticInfo -> JStat
+staticDeclStat (StaticInfo si sv _) =
+ let si' = TxtI si
+ ssv (StaticUnboxed u) = Just (ssu u)
+ ssv (StaticThunk Nothing) = Nothing
+ ssv _ = Just (app "h$d" []) -- error "StaticUnboxed" -- Just [je| h$d() |]
+ ssu (StaticUnboxedBool b) = app "h$p" [toJExpr b] -- error "StaticUnboxedBool" -- [je| h$p(`b`) |]
+ ssu (StaticUnboxedInt i) = app "h$p" [toJExpr i] -- error "StaticUnboxedInt" -- [je| h$p(`i`) |]
+ ssu (StaticUnboxedDouble d) = app "h$p" [toJExpr (unSaneDouble d)] -- error "StaticUnboxedDouble" -- [je| h$p(`unSaneDouble d`) |]
+ ssu (StaticUnboxedString str) = ApplExpr (initStr str) []
+ ssu StaticUnboxedStringOffset {} = 0
+ -- FIXME, we shouldn't do h$di, we need to record the statement to init the thunks
+ in maybe (appS "h$di" [toJExpr si']) (\v -> DeclStat si' `mappend` (toJExpr si' |= v)) (ssv sv)
+ -- error "staticDeclStat" -- maybe [j| h$di(`si'`); |] (\v -> DeclStat si' <> error "staticDeclStat" {- [j| `si'` = `v`; |]-}) (ssv sv)
+
+initStr :: BS.ByteString -> JExpr
+initStr str = app "h$str" [ValExpr (JStr . T.pack . BSC.unpack $! str)]
+ --TODO: Jeff (2022,03): This function used to call @decodeModifiedUTF8 in
+ --Gen2.Utils. I've removed the call site and opted to keep the Just case.
+ --We'll need to double check to see if we indeed do need to decoded the
+ --UTF8 strings and implement a replace function on bytestrings once the
+ --Linker is up.
+ -- Nothing -> app "h$rstr" [toJExpr $ map toInteger (BS.unpack str)]
+ -- error "initStr"
+ -- [je| h$rstr(`map toInteger (B.unpack str)`) |]
+
+-- | rename a heap object, which means adding it to the
+-- static init table in addition to the renamer
+renameObj :: ShortText
+ -> State CompactorState ShortText
+renameObj xs = do
+ (TxtI xs') <- renameVar (TxtI xs) -- added to the renamer
+ modify (addStaticEntry xs') -- and now the table
+ return xs'
+
+renameEntry :: Ident
+ -> State CompactorState Ident
+renameEntry i = do
+ i'@(TxtI i'') <- renameVar i
+ modify (addEntry i'')
+ return i'
+
+collectLabels :: StaticInfo -> State CompactorState ()
+collectLabels si = mapM_ go (labelsV . siVal $ si)
+ where
+ go :: ShortText -> State CompactorState ()
+ go = modify . addLabel
+ labelsV (StaticData _ args) = concatMap labelsA args
+ labelsV (StaticList args _) = concatMap labelsA args
+ labelsV _ = []
+ labelsA (StaticLitArg l) = labelsL l
+ labelsA _ = []
+ labelsL (LabelLit _ lbl) = [lbl]
+ labelsL _ = []
+
+lookupRenamed :: CompactorState -> Ident -> Ident
+lookupRenamed cs i@(TxtI t) =
+ fromMaybe i (M.lookup t (csNameMap cs))
+
+renameVar :: Ident -- ^ text identifier to rename
+ -> State CompactorState Ident -- ^ the updated renamer state and the new ident
+renameVar i@(TxtI t)
+ | "h$$" `isPrefixOf` T.unpack t = do
+ m <- gets csNameMap
+ case M.lookup t m of
+ Just r -> return r
+ Nothing -> do
+ y <- newIdent
+ let add_var cs' = cs' {csNameMap = M.insert t y (csNameMap cs')}
+ modify add_var
+ return y
+ | otherwise = return i
+
+newIdent :: State CompactorState Ident
+newIdent = do
+ yys <- gets csIdentSupply
+ case yys of
+ (y:ys) -> do
+ modify (\cs -> cs {csIdentSupply = ys})
+ return y
+ _ -> error "newIdent: empty list"
+
+-- | rename a compactor info entry according to the compactor state (no new renamings are added)
+renameClosureInfo :: CompactorState
+ -> ClosureInfo
+ -> ClosureInfo
+renameClosureInfo cs (ClosureInfo v rs n l t s) =
+ ClosureInfo (renameV v) rs n l t (f s)
+ where
+ renameV t = maybe t (\(TxtI t') -> t') (M.lookup t m)
+ m = csNameMap cs
+ f (CIStaticRefs rs) = CIStaticRefs (map renameV rs)
+
+-- | rename a static info entry according to the compactor state (no new renamings are added)
+renameStaticInfo :: CompactorState
+ -> StaticInfo
+ -> StaticInfo
+renameStaticInfo cs = staticIdents renameIdent
+ where
+ renameIdent t = maybe t (\(TxtI t') -> t') (M.lookup t $ csNameMap cs)
+
+staticIdents :: (ShortText -> ShortText)
+ -> StaticInfo
+ -> StaticInfo
+staticIdents f (StaticInfo i v cc) = StaticInfo (f i) (staticIdentsV f v) cc
+
+staticIdentsV ::(ShortText -> ShortText) -> StaticVal -> StaticVal
+staticIdentsV f (StaticFun i args) = StaticFun (f i) (staticIdentsA f <$> args)
+staticIdentsV f (StaticThunk (Just (i, args))) = StaticThunk . Just $
+ (f i, staticIdentsA f <$> args)
+staticIdentsV f (StaticData con args) = StaticData (f con) (staticIdentsA f <$> args)
+staticIdentsV f (StaticList xs t) = StaticList (staticIdentsA f <$> xs) (f <$> t)
+staticIdentsV _ x = x
+
+-- staticIdentsA :: Traversal' StaticArg ShortText
+staticIdentsA :: (ShortText -> ShortText) -> StaticArg -> StaticArg
+staticIdentsA f (StaticObjArg t) = StaticObjArg $! f t
+staticIdentsA _ x = x
+
+
+{-
+ simple encoding of naturals using only printable low char points,
+ rely on gzip to compress repeating sequences,
+ most significant bits first
+ 1 byte: ascii code 32-123 (0-89), \ and " unused
+ 2 byte: 124 a b (90-8189)
+ 3 byte: 125 a b c (8190-737189)
+-}
+encodeStr :: HasDebugCallStack => [Int] -> String
+encodeStr = concatMap encodeChr
+ where
+ c :: HasDebugCallStack => Int -> Char
+ c i | i > 90 || i < 0 = error ("encodeStr: c " ++ show i)
+ | i >= 59 = chr (34+i)
+ | i >= 2 = chr (33+i)
+ | otherwise = chr (32+i)
+ encodeChr :: HasDebugCallStack => Int -> String
+ encodeChr i
+ | i < 0 = panic "encodeStr: negative"
+ | i <= 89 = [c i]
+ | i <= 8189 = let (c1, c2) = (i - 90) `divMod` 90 in [chr 124, c c1, c c2]
+ | i <= 737189 = let (c2a, c3) = (i - 8190) `divMod` 90
+ (c1, c2) = c2a `divMod` 90
+ in [chr 125, c c1, c c2, c c3]
+ | otherwise = panic "encodeStr: overflow"
+
+entryIdx :: HasDebugCallStack
+ => String
+ -> CompactorState
+ -> ShortText
+ -> Int
+entryIdx msg cs i = fromMaybe lookupParent (M.lookup i' (csEntries cs))
+ where
+ (TxtI i') = lookupRenamed cs (TxtI i)
+ lookupParent = maybe err
+ (+ csNumEntries cs)
+ (M.lookup i' (csParentEntries cs))
+ err = panic (msg ++ ": invalid entry: " ++ T.unpack i')
+
+objectIdx :: HasDebugCallStack
+ => String
+ -> CompactorState
+ -> ShortText
+ -> Int
+objectIdx msg cs i = fromMaybe lookupParent (M.lookup i' (csStatics cs))
+ where
+ (TxtI i') = lookupRenamed cs (TxtI i)
+ lookupParent = maybe err
+ (+ csNumStatics cs)
+ (M.lookup i' (csParentStatics cs))
+ err = panic (msg ++ ": invalid static: " ++ T.unpack i')
+
+labelIdx :: HasDebugCallStack
+ => String
+ -> CompactorState
+ -> ShortText
+ -> Int
+labelIdx msg cs l = fromMaybe lookupParent (M.lookup l (csLabels cs))
+ where
+ lookupParent = maybe err
+ (+ csNumLabels cs)
+ (M.lookup l (csParentLabels cs))
+ err = panic (msg ++ ": invalid label: " ++ T.unpack l)
+
+encodeInfo :: HasDebugCallStack
+ => CompactorState
+ -> ClosureInfo -- ^ information to encode
+ -> [Int]
+encodeInfo cs (ClosureInfo _var regs name layout typ static)
+ | CIThunk <- typ = 0 : ls
+ | (CIFun _arity regs0) <- typ, regs0 /= argSize regs
+ = panic ("encodeInfo: inconsistent register metadata for " ++ T.unpack name)
+ | (CIFun arity _regs0) <- typ = [1, arity, encodeRegs regs] ++ ls
+ | (CICon tag) <- typ = [2, tag] ++ ls
+ | CIStackFrame <- typ = [3, encodeRegs regs] ++ ls
+-- (CIPap ar) <- typ = [4, ar] ++ ls -- these should only appear during runtime
+ | otherwise = panic $
+ "encodeInfo, unexpected closure type: " ++ show typ
+ where
+ ls = encodeLayout layout ++ encodeSrt static
+ encodeLayout CILayoutVariable = [0]
+ encodeLayout (CILayoutUnknown s) = [s+1]
+ encodeLayout (CILayoutFixed s _vs) = [s+1]
+ encodeSrt (CIStaticRefs rs) = length rs : map (objectIdx "encodeInfo" cs) rs
+ encodeRegs CIRegsUnknown = 0
+ encodeRegs (CIRegs skip regTypes) = let nregs = sum (map varSize regTypes)
+ in encodeRegsTag skip nregs
+ encodeRegsTag skip nregs
+ | skip < 0 || skip > 1 = panic "encodeRegsTag: unexpected skip"
+ | otherwise = 1 + nregs `shiftL` 1 + skip
+ argSize (CIRegs skip regTypes) = sum (map varSize regTypes) - 1 + skip
+ argSize _ = 0
+
+encodeStatic :: HasDebugCallStack
+ => CompactorState
+ -> StaticInfo
+ -> [Int]
+encodeStatic cs si =
+ -- U.trace' ("encodeStatic: " ++ show si)
+ encodeStatic0 cs si
+
+encodeStatic0 :: HasDebugCallStack
+ => CompactorState
+ -> StaticInfo
+ -> [Int]
+encodeStatic0 cs (StaticInfo _to sv _)
+ | StaticFun f args <- sv =
+ [1, entry f, length args] ++ concatMap encodeArg args
+ | StaticThunk (Just (t, args)) <- sv =
+ [2, entry t, length args] ++ concatMap encodeArg args
+ | StaticThunk Nothing <- sv =
+ [0]
+ | StaticUnboxed (StaticUnboxedBool b) <- sv =
+ [3 + fromEnum b]
+ | StaticUnboxed (StaticUnboxedInt _i) <- sv =
+ [5] -- ++ encodeInt i
+ | StaticUnboxed (StaticUnboxedDouble _d) <- sv =
+ [6] -- ++ encodeDouble d
+ | (StaticUnboxed _) <- sv = [] -- unboxed strings have their own table
+-- | StaticString t <- sv = [7, T.length t] ++ map encodeChar (T.unpack t)
+-- | StaticBin bs <- sv = [8, BS.length bs] ++ map fromIntegral (BS.unpack bs)
+ | StaticList [] Nothing <- sv =
+ [8]
+ | StaticList args t <- sv =
+ [9, length args] ++
+ maybe [0] (\t' -> [1, obj t']) t ++
+ concatMap encodeArg (reverse args)
+ | StaticData con args <- sv =
+ (if length args <= 6
+ then [11+length args]
+ else [10,length args]) ++
+ [entry con] ++
+ concatMap encodeArg args
+ where
+ obj = objectIdx "encodeStatic" cs
+ entry = entryIdx "encodeStatic" cs
+ lbl = labelIdx "encodeStatic" cs
+ -- | an argument is either a reference to a heap object or a primitive value
+ encodeArg (StaticLitArg (BoolLit b)) =
+ [0 + fromEnum b]
+ encodeArg (StaticLitArg (IntLit 0)) =
+ [2]
+ encodeArg (StaticLitArg (IntLit 1)) =
+ [3]
+ encodeArg (StaticLitArg (IntLit i)) =
+ 4 : encodeInt i
+ encodeArg (StaticLitArg NullLit) =
+ [5]
+ encodeArg (StaticLitArg (DoubleLit d)) =
+ 6 : encodeDouble d
+ encodeArg (StaticLitArg (StringLit s)) =
+ 7 : encodeString s
+ encodeArg (StaticLitArg (BinLit b)) =
+ 8 : encodeBinary b
+ encodeArg (StaticLitArg (LabelLit b l)) =
+ [9, fromEnum b, lbl l]
+ encodeArg (StaticConArg con args) =
+ [10, entry con, length args] ++ concatMap encodeArg args
+ encodeArg (StaticObjArg t) =
+ [11 + obj t]
+ -- encodeArg x = panic ("encodeArg: unexpected: " ++ show x)
+ -- encodeChar = ord -- fixme make characters more readable
+
+-- FIXME: Jeff (2022,03): Use FastString or ShortByteString and remove this
+-- serialization/deserialization
+encodeString :: ShortText -> [Int]
+encodeString = encodeBinary . BSC.pack . T.unpack
+
+-- ByteString is prefixed with length, then blocks of 4 numbers encoding 3 bytes
+encodeBinary :: BS.ByteString -> [Int]
+encodeBinary bs = BS.length bs : go bs
+ where
+ go b | BS.null b = []
+ | l == 1 = let b0 = b `BS.index` 0
+ in map fromIntegral [ b0 `shiftR` 2, (b0 Bits..&. 3) `shiftL` 4 ]
+ | l == 2 = let b0 = b `BS.index` 0
+ b1 = b `BS.index` 1
+ in map fromIntegral [ b0 `shiftR` 2
+ , ((b0 Bits..&. 3) `shiftL` 4) Bits..|. (b1 `shiftR` 4)
+ , (b1 Bits..&. 15) `shiftL` 2
+ ]
+ | otherwise = let b0 = b `BS.index` 0
+ b1 = b `BS.index` 1
+ b2 = b `BS.index` 2
+ in map fromIntegral [ b0 `shiftR` 2
+ , ((b0 Bits..&. 3) `shiftL` 4) Bits..|. (b1 `shiftR` 4)
+ , ((b1 Bits..&. 15) `shiftL` 2) Bits..|. (b2 `shiftR` 6)
+ , b2 Bits..&. 63
+ ] ++ go (BS.drop 3 b)
+ where l = BS.length b
+
+encodeInt :: Integer -> [Int]
+encodeInt i
+ | i >= -10 && i < encodeMax - 11 = [fromIntegral i + 12]
+ | i > 2^(31::Int)-1 || i < -2^(31::Int)
+ = panic "encodeInt: integer outside 32 bit range"
+ | otherwise = let i' :: Int32 = fromIntegral i
+ in [ 0
+ , fromIntegral ((i' `shiftR` 16) Bits..&. 0xffff)
+ , fromIntegral (i' Bits..&. 0xffff)
+ ]
+
+-- encode a possibly 53 bit int
+encodeSignificand :: Integer -> [Int]
+encodeSignificand i
+ | i >= -10 && i < encodeMax - 11 = [fromIntegral i + 12]
+ | i > 2^(53::Int) || i < -2^(53::Int)
+ = panic ("encodeInt: integer outside 53 bit range: " ++ show i)
+ | otherwise = let i' = abs i
+ in (if i < 0 then 0 else 1) :
+ map (\r -> fromIntegral ((i' `shiftR` r) Bits..&. 0xffff))
+ [48,32,16,0]
+
+encodeDouble :: SaneDouble -> [Int]
+encodeDouble (SaneDouble d)
+ | isNegativeZero d = [0]
+ | d == 0 = [1]
+ | isInfinite d && d > 0 = [2]
+ | isInfinite d = [3]
+ | isNaN d = [4]
+ | abs exponent <= 30
+ = (6 + fromIntegral exponent + 30) : encodeSignificand significand
+ | otherwise
+ = [5] ++ encodeInt (fromIntegral exponent) ++ encodeSignificand significand
+ where
+ (significand, exponent) = decodeFloat d
+
+encodeMax :: Integer
+encodeMax = 737189
+
+{- |
+ The Base data structure contains the information we need
+ to do incremental linking against a base bundle.
+
+ base file format:
+ GHCJSBASE
+ [renamer state]
+ [linkedPackages]
+ [packages]
+ [modules]
+ [symbols]
+ -}
+
+renderBase :: Base -- ^ base metadata
+ -> BL.ByteString -- ^ rendered result
+renderBase = DB.runPut . putBase
+
+loadBase :: FilePath -> IO Base
+loadBase file = DB.runGet (getBase file) <$> BL.readFile file
+
+staticInfoArgs :: Applicative f => (StaticArg -> f StaticArg) -> StaticInfo -> f StaticInfo
+staticInfoArgs f (StaticInfo si sv sa) = StaticInfo si <$> staticValArgs f sv <*> pure sa
+
+staticValArgs :: Applicative f => (StaticArg -> f StaticArg) -> StaticVal -> f StaticVal
+staticValArgs f (StaticFun fn as) = StaticFun fn <$> traverse f as
+staticValArgs f (StaticThunk (Just (t, as))) = StaticThunk . Just . (t,) <$> traverse f as
+staticValArgs f (StaticData c as) = StaticData c <$> traverse f as
+staticValArgs f (StaticList as mt) = StaticList <$> traverse f as <*> pure mt
+staticValArgs _ x = pure x
+
+compact :: JSLinkConfig
+ -> StgToJSConfig
+ -> CompactorState
+ -> [ShortText]
+ -> [LinkedUnit]
+ -> (CompactorState, [JStat], JStat)
+compact ln_cfg cfg cs0 rtsDeps0 input0
+-- | dumpHashes' input
+ =
+ let rtsDeps1 = rtsDeps0 ++
+ map (<> "_e") rtsDeps0 ++
+ map (<> "_con_e") rtsDeps0
+ -- FIXME: Jeff (2022,03): I've removed the real worker @packStrings@ to
+ -- get the Linker compiling. This linker will be very slow, when the time
+ -- comes, we need to uncomment packStrings and actually implement it to do
+ -- the link time compiling
+ -- (cs1, input1) = packStrings settings dflags cs0 input0
+ in renameInternals ln_cfg cfg cs0 rtsDeps1 input0
+
+
+-- hash compactification
+
+dedupeBodies :: [ShortText]
+ -> [(JStat, [ClosureInfo], [StaticInfo])]
+ -> (JStat, [(JStat, [ClosureInfo], [StaticInfo])])
+dedupeBodies rtsDeps input = (renderBuildFunctions bfN bfCB, input')
+ where
+ (bfN, bfCB, input') = rewriteBodies globals hdefsR hdefs input
+ hdefs = M.fromListWith (\(s,ks1) (_,ks2) -> (s, ks1++ks2))
+ (map (\(k, s, bs) -> (bs, (s, [k]))) hdefs0)
+ hdefsR = M.fromList $ map (\(k, _, bs) -> (k, bs)) hdefs0
+ hdefs0 :: [(ShortText, Int, BS.ByteString)]
+ hdefs0 = concatMap (\(b,_,_) ->
+ (map (\(k,h) ->
+ let (s,fh, _deps) = finalizeHash' h
+ in (k, s, fh))
+ . hashDefinitions globals) b
+ )
+ input
+ globals = foldl' (flip S.delete) (findAllGlobals input) rtsDeps
+
+renderBuildFunctions :: [BuildFunction] -> [BuildFunction] -> JStat
+renderBuildFunctions normalBfs cycleBreakerBfs =
+ cycleBr1 <> mconcat (map renderBuildFunction normalBfs) <> cycleBr2
+ where
+ renderCbr f = mconcat (zipWith f cycleBreakerBfs [1..])
+ cbName :: Int -> ShortText
+ cbName = T.pack . ("h$$$cb"++) . show
+ cycleBr1 = renderCbr $ \bf n ->
+ let args = map (TxtI . T.pack . ('a':) . show) [1..bfArgs bf]
+ body = ReturnStat $ ApplExpr (ValExpr (JVar (TxtI $ cbName n)))
+ (map (ValExpr . JVar) args)
+ in DeclStat (TxtI (bfName bf)) <>
+ AssignStat (ValExpr (JVar (TxtI (bfName bf))))
+ (ValExpr (JFunc args body))
+ cycleBr2 = renderCbr $ \bf n -> renderBuildFunction (bf { bfName = cbName n })
+
+data BuildFunction = BuildFunction
+ { bfName :: !ShortText
+ , bfBuilder :: !Ident
+ , bfDeps :: [ShortText]
+ , bfArgs :: !Int
+ } deriving (Eq, Ord, Show)
+
+{-
+ Stack frame initialization order is important when code is reused:
+ all dependencies have to be ready when the closure is built.
+
+ This function sorts the initializers and returns an additional list
+ of cycle breakers, which are built in a two-step fashion
+ -}
+sortBuildFunctions :: [BuildFunction] -> ([BuildFunction], [BuildFunction])
+sortBuildFunctions bfs = (map snd normBFs, map snd cbBFs)
+ where
+ (normBFs, cbBFs) = partition (not.fst) . concatMap fromSCC $ sccs bfs
+ bfm :: Map ShortText BuildFunction
+ bfm = M.fromList (map (\x -> (bfName x, x)) bfs)
+ fromSCC :: G.SCC ShortText -> [(Bool, BuildFunction)]
+ fromSCC (G.AcyclicSCC x) = [(False, bfm M.! x)]
+ fromSCC (G.CyclicSCC xs) = breakCycles xs
+ sccs :: [BuildFunction] -> [G.SCC ShortText]
+ sccs b = G.stronglyConnComp $
+ map (\bf -> let n = bfName bf in (n, n, bfDeps bf)) b
+ {-
+ finding the maximum acyclic subgraph is the Minimum Feedback Arc Set problem,
+ which is NP-complete. We use an approximation here.
+ -}
+ breakCycles :: [ShortText] -> [(Bool, BuildFunction)]
+ breakCycles nodes =
+ (True, bfm M.! selected)
+ : concatMap fromSCC (sccs (map (bfm M.!) $ filter (/=selected) nodes))
+ where
+ outDeg, inDeg :: Map ShortText Int
+ outDeg = M.fromList $ map (\n -> (n, length (bfDeps (bfm M.! n)))) nodes
+ inDeg = M.fromListWith (+) (map (,1) . concatMap (bfDeps . (bfm M.!)) $ nodes)
+ -- ELS heuristic (Eades et. al.)
+ selected :: ShortText
+ selected = maximumBy (compare `on` (\x -> outDeg M.! x - inDeg M.! x)) nodes
+
+rewriteBodies :: Set ShortText
+ -> Map ShortText BS.ByteString
+ -> Map BS.ByteString (Int, [ShortText])
+ -> [LinkedUnit]
+ -> ([BuildFunction], [BuildFunction], [LinkedUnit])
+rewriteBodies globals idx1 idx2 input = (bfsNormal, bfsCycleBreaker, input')
+ where
+ (bfs1, input') = unzip (map rewriteBlock input)
+ (bfsNormal, bfsCycleBreaker) = sortBuildFunctions (concat bfs1)
+
+ -- this index only contains the entries we actually want to dedupe
+ idx2' :: Map BS.ByteString (Int, [ShortText])
+ idx2' = M.filter (\(s, xs) -> dedupeBody (length xs) s) idx2
+
+ rewriteBlock :: (JStat, [ClosureInfo], [StaticInfo])
+ -> ([BuildFunction], LinkedUnit)
+ rewriteBlock (st, cis, sis) =
+ let (bfs, st') = rewriteFunctions st
+ -- remove the declarations for things that we just deduped
+ st'' = removeDecls (S.fromList $ map bfName bfs) st'
+ in (bfs, (st'', cis, sis))
+
+ removeDecls :: Set ShortText -> JStat -> JStat
+ removeDecls t (BlockStat ss) = BlockStat (map (removeDecls t) ss)
+ removeDecls t (DeclStat (TxtI i))
+ | i `S.member` t = mempty
+ removeDecls _ s = s
+
+ rewriteFunctions :: JStat -> ([BuildFunction], JStat)
+ rewriteFunctions (BlockStat ss) =
+ let (bfs, ss') = unzip (map rewriteFunctions ss)
+ in (concat bfs, BlockStat ss')
+ rewriteFunctions (AssignStat (ValExpr (JVar (TxtI i)))
+ (ValExpr (JFunc args st)))
+ | Just h <- M.lookup i idx1
+ , Just (_s, his) <- M.lookup h idx2' =
+ let (bf, st') = rewriteFunction i h his args st in ([bf], st')
+ rewriteFunctions x = ([], x)
+
+ rewriteFunction :: ShortText
+ -> BS.ByteString
+ -> [ShortText]
+ -> [Ident]
+ -> JStat
+ -> (BuildFunction, JStat)
+ rewriteFunction i h his args body
+ | i == iFirst = (bf, createFunction i idx g args body)
+ | otherwise = (bf, mempty)
+ where
+ bf :: BuildFunction
+ bf = BuildFunction i (buildFunId idx) g (length args)
+ g :: [ShortText]
+ g = findGlobals globals body
+ iFirst = head his
+ Just idx = M.lookupIndex h idx2'
+
+ createFunction :: ShortText
+ -> Int
+ -> [ShortText]
+ -> [Ident]
+ -> JStat
+ -> JStat
+ createFunction _i idx g args body =
+ DeclStat bi <>
+ AssignStat (ValExpr (JVar bi))
+ (ValExpr (JFunc bargs bbody))
+ where
+ ng = length g
+ bi = buildFunId idx
+ bargs :: [Ident]
+ bargs = map (TxtI . T.pack . ("h$$$g"++) . show) [1..ng]
+ bgm :: Map ShortText Ident
+ bgm = M.fromList (zip g bargs)
+ bbody :: JStat
+ bbody = ReturnStat (ValExpr $ JFunc args ibody)
+ ibody :: JStat
+ ibody = identsS' (\ti@(TxtI i) -> fromMaybe ti (M.lookup i bgm)) body
+
+renderBuildFunction :: BuildFunction -> JStat
+renderBuildFunction (BuildFunction i bfid deps _nargs) =
+ DeclStat (TxtI i) <>
+ AssignStat (ValExpr (JVar (TxtI i)))
+ (ApplExpr (ValExpr (JVar bfid)) (map (ValExpr . JVar . TxtI) deps))
+
+dedupeBody :: Int -> Int -> Bool
+dedupeBody n size
+ | n < 2 = False
+ | size * n > 200 = True
+ | n > 6 = True
+ | otherwise = False
+
+buildFunId :: Int -> Ident
+buildFunId i = TxtI (T.pack $ "h$$$f" ++ show i)
+
+-- result is ordered, does not contain duplicates
+findGlobals :: Set ShortText -> JStat -> [ShortText]
+findGlobals globals stat = nub'
+ (filter isGlobal . map (\(TxtI i) -> i) $ identsS stat )
+ where
+ locals = S.fromList (findLocals stat)
+ isGlobal i = i `S.member` globals && i `S.notMember` locals
+
+findLocals :: JStat -> [ShortText]
+findLocals (BlockStat ss) = concatMap findLocals ss
+findLocals (DeclStat (TxtI i)) = [i]
+findLocals _ = []
+
+nub' :: Ord a => [a] -> [a]
+nub' = go S.empty
+ where
+ go _ [] = []
+ go s (x:xs)
+ | x `S.member` s = go s xs
+ | otherwise = x : go (S.insert x s) xs
+
+data HashIdx = HashIdx (Map ShortText Hash) (Map Hash ShortText)
+
+dedupe :: [ShortText]
+ -> [(JStat, [ClosureInfo], [StaticInfo])]
+ -> [(JStat, [ClosureInfo], [StaticInfo])]
+dedupe rtsDeps input
+-- | dumpHashIdx idx
+ =
+ map (\(st,cis,sis) -> dedupeBlock idx st cis sis) input
+ where
+ idx = HashIdx hashes hr
+ hashes0 = buildHashes rtsDeps input
+ hashes = foldl' (flip M.delete) hashes0 rtsDeps
+ hr = fmap pickShortest
+ (M.fromListWith (++) $
+ map (\(i, h) -> (h, [i])) (M.toList hashes))
+ pickShortest :: [ShortText] -> ShortText
+ pickShortest = minimumBy (compare `on` T.codepointLength)
+
+dedupeBlock :: HashIdx
+ -> JStat
+ -> [ClosureInfo]
+ -> [StaticInfo]
+ -> LinkedUnit
+dedupeBlock hi st ci si =
+ ( dedupeStat hi st
+ , mapMaybe (dedupeClosureInfo hi) ci
+ , mapMaybe (dedupeStaticInfo hi) si
+ )
+
+dedupeStat :: HashIdx -> JStat -> JStat
+dedupeStat hi = go
+ where
+ go (BlockStat ss) = BlockStat (map go ss)
+ go s@(DeclStat (TxtI i))
+ | not (isCanon hi i) = mempty
+ | otherwise = s
+ go (AssignStat v@(ValExpr (JVar (TxtI i))) e)
+ | not (isCanon hi i) = mempty
+ | otherwise = AssignStat v (identsE' (toCanonI hi) e)
+ -- rewrite identifiers in e
+ go s = identsS' (toCanonI hi) s
+
+dedupeClosureInfo :: HashIdx -> ClosureInfo -> Maybe ClosureInfo
+dedupeClosureInfo hi (ClosureInfo i rs n l ty st)
+ | isCanon hi i = Just (ClosureInfo i rs n l ty (dedupeCIStatic hi st))
+dedupeClosureInfo _ _ = Nothing
+
+dedupeStaticInfo :: HashIdx -> StaticInfo -> Maybe StaticInfo
+dedupeStaticInfo hi (StaticInfo i val ccs)
+ | isCanon hi i = Just (StaticInfo i (dedupeStaticVal hi val) ccs)
+dedupeStaticInfo _ _ = Nothing
+
+dedupeCIStatic :: HashIdx -> CIStatic -> CIStatic
+dedupeCIStatic hi (CIStaticRefs refs) = CIStaticRefs (nub $ map (toCanon hi) refs)
+
+dedupeStaticVal :: HashIdx -> StaticVal -> StaticVal
+dedupeStaticVal hi (StaticFun t args) =
+ StaticFun (toCanon hi t) (map (dedupeStaticArg hi) args)
+dedupeStaticVal hi (StaticThunk (Just (o, args))) =
+ StaticThunk (Just (toCanon hi o, map (dedupeStaticArg hi) args))
+dedupeStaticVal hi (StaticData dcon args) =
+ StaticData (toCanon hi dcon) (map (dedupeStaticArg hi) args)
+dedupeStaticVal hi (StaticList args lt) =
+ StaticList (map (dedupeStaticArg hi) args) (fmap (toCanon hi) lt)
+dedupeStaticVal _ v = v -- unboxed value or thunk with alt init, no rewrite needed
+
+dedupeStaticArg :: HashIdx -> StaticArg -> StaticArg
+dedupeStaticArg hi (StaticObjArg o)
+ = StaticObjArg (toCanon hi o)
+dedupeStaticArg hi (StaticConArg c args)
+ = StaticConArg (toCanon hi c)
+ (map (dedupeStaticArg hi) args)
+dedupeStaticArg _hi a@StaticLitArg{} = a
+
+isCanon :: HashIdx -> ShortText -> Bool
+isCanon (HashIdx a b) t
+ | Nothing <- la = True
+ | Just h <- la
+ , Just t' <- M.lookup h b = t == t'
+ | otherwise = False
+ where la = M.lookup t a
+
+toCanon :: HashIdx -> ShortText -> ShortText
+toCanon (HashIdx a b) t
+ | Just h <- M.lookup t a
+ , Just t' <- M.lookup h b = t'
+ | otherwise = t
+
+toCanonI :: HashIdx -> Ident -> Ident
+toCanonI hi (TxtI x) = TxtI (toCanon hi x)
+
+type Hash = (BS.ByteString, [ShortText])
+
+data HashBuilder = HashBuilder !BB.Builder ![ShortText]
+
+instance Monoid HashBuilder where
+ mempty = HashBuilder mempty mempty
+
+instance Semigroup HashBuilder where
+ (<>) (HashBuilder b1 l1) (HashBuilder b2 l2) =
+ HashBuilder (b1 <> b2) (l1 <> l2)
+
+{-
+dumpHashIdx :: HashIdx -> Bool
+dumpHashIdx hi@(HashIdx ma mb) =
+ let ks = M.keys ma
+ difCanon i = let i' = toCanon hi i
+ in if i == i' then Nothing else Just i'
+ writeHashIdx = do
+ putStrLn "writing hash idx"
+ T.writeFile "hashidx.txt"
+ (T.unlines . sort $ mapMaybe (\i -> fmap ((i <> " -> ") <>) (difCanon i)) ks)
+ putStrLn "writing full hash idx"
+ T.writeFile "hashIdxFull.txt"
+ (T.unlines . sort $ M.keys ma)
+ in unsafePerformIO writeHashIdx `seq` True
+-}
+-- debug thing
+{-
+dumpHashes' :: [(JStat, [ClosureInfo], [StaticInfo])] -> Bool
+dumpHashes' input =
+ let hashes = buildHashes input
+ writeHashes = do
+ putStrLn "writing hashes"
+ BL.writeFile "hashes.json" (Aeson.encode $ dumpHashes hashes)
+ in unsafePerformIO writeHashes `seq` True
+-}
+buildHashes :: [ShortText] -> [LinkedUnit] -> Map ShortText Hash
+buildHashes rtsDeps xss
+ -- - | dumpHashes0 hashes0
+ = fixHashes (fmap finalizeHash hashes0)
+ where
+ globals = foldl' (flip S.delete) (findAllGlobals xss) rtsDeps
+ hashes0 = M.unions (map buildHashesBlock xss)
+ buildHashesBlock (st, cis, sis) =
+ let hdefs = hashDefinitions globals st
+ hcis = map hashClosureInfo cis
+ hsis = map hashStaticInfo (filter (not . ignoreStatic) sis)
+ in M.fromList (combineHashes hdefs hcis ++ hsis)
+
+findAllGlobals :: [LinkedUnit] -> Set ShortText
+findAllGlobals xss = S.fromList $ concatMap f xss
+ where
+ f (_, cis, sis) =
+ map (\(ClosureInfo i _ _ _ _ _) -> i) cis ++
+ map (\(StaticInfo i _ _) -> i) sis
+
+fixHashes :: Map ShortText Hash -> Map ShortText Hash
+fixHashes hashes = fmap (second (map replaceHash)) hashes
+ where
+ replaceHash :: ShortText -> ShortText
+ replaceHash h = maybe h T.pack (M.lookup h finalHashes)
+ hashText bs = "h$$$" <> utf8DecodeByteString bs
+ sccs :: [[ShortText]]
+ sccs = map fromSCC $
+ G.stronglyConnComp (map (\(k, (_bs, deps)) -> (k, k, deps)) (M.toList hashes))
+ ks = M.keys hashes
+ invDeps = M.fromListWith (++) (concatMap mkInvDeps $ M.toList hashes)
+ mkInvDeps (k, (_, ds)) = map (,[k]) ds
+ finalHashes = fmap hashText (fixHashesIter 500 invDeps ks ks sccs hashes mempty)
+
+fromSCC :: G.SCC a -> [a]
+fromSCC (G.AcyclicSCC x) = [x]
+fromSCC (G.CyclicSCC xs) = xs
+
+fixHashesIter :: Int
+ -> Map ShortText [ShortText]
+ -> [ShortText]
+ -> [ShortText]
+ -> [[ShortText]]
+ -> Map ShortText Hash
+ -> Map ShortText BS.ByteString
+ -> Map ShortText BS.ByteString
+fixHashesIter n invDeps allKeys checkKeys sccs hashes finalHashes
+ -- - | unsafePerformIO (putStrLn ("fixHashesIter: " ++ show n)) `seq` False = undefined
+ | n < 0 = finalHashes
+ | not (null newHashes) = fixHashesIter (n-1) invDeps allKeys checkKeys' sccs hashes
+ (M.union finalHashes $ M.fromList newHashes)
+ -- - | unsafePerformIO (putStrLn ("fixHashesIter killing cycles:\n" ++ show rootSCCs)) `seq` False = undefined
+ | not (null rootSCCs) = fixHashesIter n {- -1 -} invDeps allKeys allKeys sccs hashes
+ (M.union finalHashes (M.fromList $ concatMap hashRootSCC rootSCCs))
+ | otherwise = finalHashes
+ where
+ checkKeys' | length newHashes > M.size hashes `div` 10 = allKeys
+ | otherwise = S.toList . S.fromList $ concatMap newHashDeps newHashes
+ newHashDeps (k, _) = fromMaybe [] (M.lookup k invDeps)
+ mkNewHash k | M.notMember k finalHashes
+ , Just (hb, htxt) <- M.lookup k hashes
+ , Just bs <- mapM (`M.lookup` finalHashes) htxt =
+ Just (k, makeFinalHash hb bs)
+ | otherwise = Nothing
+ newHashes :: [(ShortText, BS.ByteString)]
+ newHashes = mapMaybe mkNewHash checkKeys
+ rootSCCs :: [[ShortText]]
+ rootSCCs = filter isRootSCC sccs
+ isRootSCC :: [ShortText] -> Bool
+ isRootSCC scc = not (all (`M.member` finalHashes) scc) && all check scc
+ where
+ check n = let Just (_bs, out) = M.lookup n hashes
+ in all checkEdge out
+ checkEdge e = e `S.member` s || e `M.member` finalHashes
+ s = S.fromList scc
+ hashRootSCC :: [ShortText] -> [(ShortText,BS.ByteString)]
+ hashRootSCC scc
+ | any (`M.member` finalHashes) scc = panic "Gen2.Compactor.hashRootSCC: has finalized nodes"
+ | otherwise = map makeHash toHash
+ where
+ makeHash k = let Just (bs,deps) = M.lookup k hashes
+ luds = map lookupDep deps
+ in (k, makeFinalHash bs luds)
+ lookupDep :: ShortText -> BS.ByteString
+ lookupDep d
+ | Just b <- M.lookup d finalHashes = b
+ | Just i <- M.lookup d toHashIdx
+ = grpHash <> (utf8EncodeString . show $ i)
+ | otherwise
+ = panic $ "Gen2.Compactor.hashRootSCC: unknown key: " ++
+ T.unpack d
+ toHashIdx :: M.Map ShortText Integer
+ toHashIdx = M.fromList $ zip toHash [1..]
+ grpHash :: BS.ByteString
+ grpHash = BL.toStrict
+ . BB.toLazyByteString
+ $ mconcat (map (mkGrpHash . (hashes M.!)) toHash)
+ mkGrpHash (h, deps) =
+ let deps' = mapMaybe (`M.lookup` finalHashes) deps
+ in BB.byteString h <>
+ BB.int64LE (fromIntegral $ length deps') <>
+ mconcat (map BB.byteString deps')
+ toHash :: [ShortText]
+ toHash = sortBy (compare `on` fst . (hashes M.!)) scc
+
+makeFinalHash :: BS.ByteString -> [BS.ByteString] -> BS.ByteString
+makeFinalHash b bs = mconcat (b:bs)
+-- FIXME: Jeff (2022,03): I've removed the SHA256.hash function which would be
+-- producing this final bytestring. Do we need it? If so how to replace it?
+
+-- do not deduplicate thunks
+ignoreStatic :: StaticInfo -> Bool
+ignoreStatic (StaticInfo _ StaticThunk {} _) = True
+ignoreStatic _ = False
+
+-- combine hashes from x and y, leaving only those which have an entry in both
+combineHashes :: [(ShortText, HashBuilder)]
+ -> [(ShortText, HashBuilder)]
+ -> [(ShortText, HashBuilder)]
+combineHashes x y = M.toList $ M.intersectionWith (<>)
+ (M.fromList x)
+ (M.fromList y)
+
+{-
+dumpHashes0 :: Map ShortText HashBuilder -> Bool
+dumpHashes0 hashes = unsafePerformIO writeHashes `seq` True
+ where
+ hashLine (n, HashBuilder bb txt) =
+ n <> " ->\n " <>
+ escapeBS (BB.toLazyByteString bb) <> "\n [" <> T.intercalate " " txt <> "]\n"
+ escapeBS :: BL.ByteString -> T.Text
+ escapeBS = T.pack . concatMap escapeCH . BL.unpack
+ escapeCH c | c < 32 || c > 127 = '\\' : show c
+ | c == 92 = "\\\\"
+ | otherwise = [chr (fromIntegral c)]
+
+ writeHashes = do
+ putStrLn "writing hashes0"
+ T.writeFile "hashes0.dump" (T.unlines $ map hashLine (M.toList hashes))
+
+dumpHashes :: Map ShortText Hash -> Value
+dumpHashes idx = toJSON iidx
+ where
+ iidx :: Map ShortText [(Text, [ShortText])]
+ iidx = M.fromListWith (++) $
+ map (\(t, (b, deps)) -> (TE.decodeUtf8 (B16.encode b), [(t,deps)])) (M.toList idx)
+-}
+
+ht :: Int8 -> HashBuilder
+ht x = HashBuilder (BB.int8 x) []
+
+hi :: Int -> HashBuilder
+hi x = HashBuilder (BB.int64LE $ fromIntegral x) []
+
+hi' :: (Show a, Integral a) => a -> HashBuilder
+hi' x | x' > toInteger (maxBound :: Int64) || x' < toInteger (minBound :: Int64) =
+ panic $ "Gen2.Compactor.hi': integer out of range: " ++ show x
+ | otherwise = HashBuilder (BB.int64LE $ fromInteger x') []
+ where
+ x' = toInteger x
+
+hd :: Double -> HashBuilder
+hd d = HashBuilder (BB.doubleLE d) []
+
+htxt :: ShortText -> HashBuilder
+htxt x = HashBuilder (BB.int64LE (fromIntegral $ BS.length bs) <> BB.byteString bs) []
+ where
+ bs = utf8EncodeString $ T.unpack x
+
+hobj :: ShortText -> HashBuilder
+hobj x = HashBuilder (BB.int8 127) [x]
+
+hb :: BS.ByteString -> HashBuilder
+hb x = HashBuilder (BB.int64LE (fromIntegral $ BS.length x) <> BB.byteString x) []
+
+hashDefinitions :: Set ShortText -> JStat -> [(ShortText, HashBuilder)]
+hashDefinitions globals st =
+ let defs = findDefinitions st
+ in map (uncurry (hashSingleDefinition globals)) defs
+
+findDefinitions :: JStat -> [(Ident, JExpr)]
+findDefinitions (BlockStat ss) = concatMap findDefinitions ss
+findDefinitions (AssignStat (ValExpr (JVar i)) e) = [(i,e)]
+findDefinitions _ = []
+
+hashSingleDefinition :: Set ShortText -> Ident -> JExpr -> (ShortText, HashBuilder)
+hashSingleDefinition globals (TxtI i) expr = (i, ht 0 <> render st <> mconcat (map hobj globalRefs))
+ where
+ globalRefs = nub $ filter (`S.member` globals) (map (\(TxtI i) -> i) (identsE expr))
+ globalMap = M.fromList $ zip globalRefs (map (T.pack . ("h$$$global_"++) . show) [(1::Int)..])
+ expr' = identsE' (\i@(TxtI t) -> maybe i TxtI (M.lookup t globalMap)) expr
+ st = AssignStat (ValExpr (JVar (TxtI "dummy"))) expr'
+ render = htxt . T.pack. show . pretty
+
+
+-- FIXME: Jeff (2022,03): reduce the redundancy between these idents functions
+-- and the idents functions in GHC.JS.Transform These helper functions also
+-- exist in non-ticked for, e.g., @identsE@ in GHC.JS.Transform. These are
+-- essential Functor instances over the JS syntax tree. We rewrite them here for
+-- consumers like hashSingleDefinition. Had we used the Transform version we'll
+-- end up with a compiler error in @expr'@ since AssignStat takes an Expr, but
+-- Transform.IdentsE returns [Ident]
+identsE' :: (Ident -> Ident) -> JExpr -> JExpr
+identsE' f (ValExpr v) = ValExpr $! identsV' f v
+identsE' f (SelExpr e i) = SelExpr (identsE' f e) i -- do not rename properties
+identsE' f (IdxExpr e1 e2) = IdxExpr (identsE' f e1) (identsE' f e2)
+identsE' f (InfixExpr s e1 e2) = InfixExpr s (identsE' f e1) (identsE' f e2)
+identsE' f (UOpExpr o e) = UOpExpr o $! identsE' f e
+identsE' f (IfExpr e1 e2 e3) = IfExpr (identsE' f e1) (identsE' f e2) (identsE' f e3)
+identsE' f (ApplExpr e es) = ApplExpr (identsE' f e) (identsE' f <$> es)
+identsE' _ UnsatExpr{} = error "identsE': UnsatExpr"
+
+identsV' :: (Ident -> Ident) -> JVal -> JVal
+identsV' f (JVar i) = JVar $! f i
+identsV' f (JList xs) = JList $! (fmap . identsE') f xs
+identsV' _ d@JDouble{} = d
+identsV' _ i@JInt{} = i
+identsV' _ s@JStr{} = s
+identsV' _ r@JRegEx{} = r
+identsV' f (JHash m) = JHash $! (fmap . identsE') f m
+identsV' f (JFunc args s) = JFunc (fmap f args) (identsS' f s)
+identsV' _ UnsatVal{} = error "identsV': UnsatVal"
+
+identsS' :: (Ident -> Ident) -> JStat -> JStat
+identsS' f (DeclStat i) = DeclStat $! f i
+identsS' f (ReturnStat e) = ReturnStat $! identsE' f e
+identsS' f (IfStat e s1 s2) = IfStat (identsE' f e) (identsS' f s1) (identsS' f s2)
+identsS' f (WhileStat b e s) = WhileStat b (identsE' f e) (identsS' f s)
+identsS' f (ForInStat b i e s) = ForInStat b (f i) (identsE' f e) (identsS' f s)
+identsS' f (SwitchStat e xs s) = SwitchStat (identsE' f e) (fmap (traverseCase f) xs) (identsS' f s)
+ where traverseCase g (e,s) = (identsE' g e, identsS' g s)
+identsS' f (TryStat s1 i s2 s3) = TryStat (identsS' f s1) (f i) (identsS' f s2) (identsS' f s3)
+identsS' f (BlockStat xs) = BlockStat $! identsS' f <$> xs
+identsS' f (ApplStat e es) = ApplStat (identsE' f e) (identsE' f <$> es)
+identsS' f (UOpStat op e) = UOpStat op $! identsE' f e
+identsS' f (AssignStat e1 e2) = AssignStat (identsE' f e1) (identsE' f e2)
+identsS' _ UnsatBlock{} = error "identsS': UnsatBlock"
+identsS' f (LabelStat l s) = LabelStat l $! identsS' f s
+identsS' _ b@BreakStat{} = b
+identsS' _ c@ContinueStat{} = c
+
+hashClosureInfo :: ClosureInfo -> (ShortText, HashBuilder)
+hashClosureInfo (ClosureInfo civ cir _cin cil cit cis) =
+ (civ, ht 1 <> hashCIRegs cir <> hashCILayout cil <> hashCIType cit <> hashCIStatic cis)
+
+hashStaticInfo :: StaticInfo -> (ShortText, HashBuilder)
+hashStaticInfo (StaticInfo sivr sivl _sicc) =
+ (sivr, ht 2 <> hashStaticVal sivl)
+
+hashCIType :: CIType -> HashBuilder
+hashCIType (CIFun a r) = ht 1 <> hi a <> hi r
+hashCIType CIThunk = ht 2
+hashCIType (CICon c) = ht 3 <> hi c
+hashCIType CIPap = ht 4
+hashCIType CIBlackhole = ht 5
+hashCIType CIStackFrame = ht 6
+
+
+hashCIRegs :: CIRegs -> HashBuilder
+hashCIRegs CIRegsUnknown = ht 1
+hashCIRegs (CIRegs sk tys) = ht 2 <> hi sk <> hashList hashVT tys
+
+hashCILayout :: CILayout -> HashBuilder
+hashCILayout CILayoutVariable = ht 1
+hashCILayout (CILayoutUnknown size) = ht 2 <> hi size
+hashCILayout (CILayoutFixed n l) = ht 3 <> hi n <> hashList hashVT l
+
+hashCIStatic :: CIStatic -> HashBuilder
+hashCIStatic CIStaticRefs{} = mempty -- hashList hobj xs -- we get these from the code
+
+hashList :: (a -> HashBuilder) -> [a] -> HashBuilder
+hashList f xs = hi (length xs) <> mconcat (map f xs)
+
+hashVT :: VarType -> HashBuilder
+hashVT = hi . fromEnum
+
+hashStaticVal :: StaticVal -> HashBuilder
+hashStaticVal (StaticFun t args) = ht 1 <> hobj t <> hashList hashStaticArg args
+hashStaticVal (StaticThunk mtn) = ht 2 <> hashMaybe htobj mtn
+ where
+ htobj (o, args) = hobj o <> hashList hashStaticArg args
+hashStaticVal (StaticUnboxed su) = ht 3 <> hashStaticUnboxed su
+hashStaticVal (StaticData dcon args) = ht 4 <> hobj dcon <> hashList hashStaticArg args
+hashStaticVal (StaticList args lt) = ht 5 <> hashList hashStaticArg args <> hashMaybe hobj lt
+
+hashMaybe :: (a -> HashBuilder) -> Maybe a -> HashBuilder
+hashMaybe _ Nothing = ht 1
+hashMaybe f (Just x) = ht 2 <> f x
+
+hashStaticUnboxed :: StaticUnboxed -> HashBuilder
+hashStaticUnboxed (StaticUnboxedBool b) = ht 1 <> hi (fromEnum b)
+hashStaticUnboxed (StaticUnboxedInt iv) = ht 2 <> hi' iv
+hashStaticUnboxed (StaticUnboxedDouble sd) = ht 3 <> hashSaneDouble sd
+hashStaticUnboxed (StaticUnboxedString str) = ht 4 <> hb str
+hashStaticUnboxed (StaticUnboxedStringOffset str) = ht 5 <> hb str
+
+
+hashStaticArg :: StaticArg -> HashBuilder
+hashStaticArg (StaticObjArg t) = ht 1 <> hobj t
+hashStaticArg (StaticLitArg sl) = ht 2 <> hashStaticLit sl
+hashStaticArg (StaticConArg cn args) = ht 3 <> hobj cn <> hashList hashStaticArg args
+
+hashStaticLit :: StaticLit -> HashBuilder
+hashStaticLit (BoolLit b) = ht 1 <> hi (fromEnum b)
+hashStaticLit (IntLit iv) = ht 2 <> hi (fromIntegral iv)
+hashStaticLit NullLit = ht 3
+hashStaticLit (DoubleLit d) = ht 4 <> hashSaneDouble d
+hashStaticLit (StringLit tt) = ht 5 <> htxt tt
+hashStaticLit (BinLit bs) = ht 6 <> hb bs
+hashStaticLit (LabelLit bb ln) = ht 7 <> hi (fromEnum bb) <> htxt ln
+
+hashSaneDouble :: SaneDouble -> HashBuilder
+hashSaneDouble (SaneDouble sd) = hd sd
+
+finalizeHash :: HashBuilder -> Hash
+finalizeHash (HashBuilder hb tt) =
+-- FIXME: Jeff (2022,03): I've removed the SHA256.hash function which would be
+-- producing h. Do we need it? If so how to replace it?
+ let h = (BL.toStrict $ BB.toLazyByteString hb)
+ in h `seq` (h, tt)
+
+finalizeHash' :: HashBuilder -> (Int, BS.ByteString, [ShortText])
+finalizeHash' (HashBuilder hb tt) =
+ let b = BL.toStrict (BB.toLazyByteString hb)
+ bl = BS.length b
+-- FIXME: Jeff (2022,03): I've removed the SHA256.hash function which would be
+-- producing h. So it is purposeful that `h = b` looks unnecessary. Do we need
+-- it? If so how to replace it?
+ h = b
+ in h `seq` bl `seq` (bl, h, tt)
diff --git a/compiler/GHC/StgToJS/Linker/Dynamic.hs b/compiler/GHC/StgToJS/Linker/Dynamic.hs
new file mode 100644
index 0000000000..d9dfb8380d
--- /dev/null
+++ b/compiler/GHC/StgToJS/Linker/Dynamic.hs
@@ -0,0 +1,564 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE LambdaCase #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.StgToJS.Linker.Dynamic
+-- Copyright : (c) The University of Glasgow 2001
+-- License : BSD-style (see the file LICENSE)
+--
+-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io>
+-- Luite Stegeman <luite.stegeman@iohk.io>
+-- Sylvain Henry <sylvain.henry@iohk.io>
+-- Josh Meredith <josh.meredith@iohk.io>
+-- Stability : experimental
+--
+-- Various utilities for building and loading dynamic libraries, to make
+-- Template Haskell work in GHCJS
+--
+----------------------------- FIXMEs -------------------------------------------
+-- FIXME: Jeff (2022,04): This module may be completely redundant and consist of
+-- duplicate code. Before we can remove it we must understand how it alters the
+-- link code in the GHC.Linker directory. Thus for the time being we live with
+-- it. In particular cases where we have duplicated functions in
+-- GHC.Driver.Pipeline and GHC.Linker.Static, I've prefixed these with "js"
+-- except for @link@ and @link'@, for example GHC.Linker.Static.linkStaticLib
+-- becomes GHC.StgToJS.Linker.Dynamic.jsLinkStaticLib.
+--
+-- FIXME: Jeff (2022,04): In jsLinkBinary I've commented out a line that
+-- dispatches to different systools based on a boolean flag. This line seems to
+-- be a relic of the old ghc api but I left it in since it will require
+-- attention to be verified correct. I suspect that entire function is made
+-- redundant by the corresponding GHC.Linker.Static.linkBinary anyhow. Please
+-- see the fixme comment in jsLinkBinary
+--
+-- FIXME: Jeff (2022,04): You'll notice that the APIs for the linking functions,
+-- @link@, @link'@ etc are quite hairy with lots of inputs, and over half of
+-- those inputs are environments of some sort including DynFlags. Of course this
+-- is insanity. The API is forced due a let expression in
+-- @GHC.StgToJS.Linker.Dynamic.link'@ which requires all linking functions to
+-- have the same interface as GHC.Linker.Static.linkBinary. To Fix this we
+-- should begin removing these environments by refining JSLinkConfig. For
+-- example:
+-- 1. Move any required flags from StgToJSConfig to JSLinkConfig
+-- 2. Remove DynFlags by removing any opts needed for linking and add them to
+-- JSLinkConfig
+-- 3. Similar for HscEnv, we might need to decouple GHCs Linker from DynFlags in
+-- order to have a proper api
+-----------------------------------------------------------------------------
+
+module GHC.StgToJS.Linker.Dynamic
+ ( ghcjsLink
+ , ghcjsDoLink
+ ) where
+
+import GHC.StgToJS.Linker.Archive
+import GHC.StgToJS.Linker.Types
+import GHC.StgToJS.Linker.Utils
+import qualified GHC.StgToJS.Linker.Linker as JSLink
+
+import GHC.Linker.Dynamic
+import GHC.Linker.ExtraObj
+import GHC.Linker.MacOS
+import GHC.Linker.Static
+import GHC.Linker.Static.Utils
+import GHC.Linker.Types
+import GHC.Linker.Unit
+import GHC.Linker.Windows
+
+import GHC.Utils.Error
+import GHC.Utils.Misc
+import GHC.Unit.Module
+import GHC.Unit.Module.ModIface
+import GHC.Unit.Module.Deps
+import GHC.Utils.Outputable hiding ((<>))
+import GHC.Driver.Phases
+import GHC.Driver.Pipeline
+import GHC.Driver.Session
+
+import GHC.Types.Unique.DFM
+import GHC.Types.Basic
+import qualified GHC.SysTools as SysTools
+
+import GHC.Unit.Home.ModInfo
+import GHC.Unit.Info
+import GHC.Unit.Env
+import GHC.Unit.State
+import GHC.Iface.Recomp
+
+import GHC.Platform
+import Prelude
+
+import Control.Monad
+
+import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString as BS
+import Data.Functor ((<&>))
+import Data.List ( nub )
+import qualified GHC.Data.ShortText as T
+
+import System.Directory
+import System.FilePath
+import qualified GHC.Data.Maybe as Maybe
+import GHC.Platform.Ways
+import GHC.Utils.Logger
+import GHC.Utils.Panic
+import GHC.Driver.Env.Types
+import GHC.StgToJS.Types
+import GHC.Utils.TmpFs (TmpFs)
+import qualified Data.Set as Set
+
+---------------------------------------------------------------------------------
+-- Link libraries
+
+ghcjsLink :: GhcjsEnv
+ -> JSLinkConfig
+ -> StgToJSConfig
+ -> HscEnv
+ -> [FilePath] -- ^ extra JS files
+ -> Bool -- ^ build JavaScript?
+ -> GhcLink -- ^ what to link
+ -> Logger
+ -> TmpFs
+ -> UnitEnv
+ -> Bool
+ -> HomePackageTable
+ -> IO SuccessFlag
+ghcjsLink env lc_cfg cfg hsc_env extraJs buildJs ghcLink logger tmp_fs unit_env batch_attempt_linking pt
+ | ghcLink == LinkInMemory || ghcLink == NoLink =
+ return Succeeded
+ | ghcLink == LinkStaticLib || ghcLink == LinkDynLib =
+ if buildJs && Maybe.isJust (lcLinkJsLib lc_cfg)
+ then ghcjsLinkJsLib lc_cfg extraJs (hsc_dflags hsc_env) logger pt
+ else return Succeeded
+ | otherwise = do
+ when (buildJs && Maybe.isJust (lcLinkJsLib lc_cfg))
+ (void $ ghcjsLinkJsLib lc_cfg extraJs (hsc_dflags hsc_env) logger pt) -- FIXME Jeff: (2022,04): use return value and remove void
+ link' env lc_cfg cfg hsc_env extraJs buildJs logger tmp_fs unit_env batch_attempt_linking pt
+
+ghcjsLinkJsLib :: JSLinkConfig
+ -> [FilePath] -- ^ extra JS files
+ -> DynFlags
+ -> Logger
+ -> HomePackageTable
+ -> IO SuccessFlag
+ghcjsLinkJsLib settings jsFiles dflags _logger hpt
+ | Just jsLib <- lcLinkJsLib settings = do
+ let profSuff | WayProf `elem` ways dflags = "_p"
+ | otherwise = ""
+ libFileName = ("lib" ++ jsLib ++ profSuff) <.> "js_a"
+ inOutputDir file =
+ maybe file
+ (</>file)
+ (lcJsLibOutputDir settings `mplus` objectDir dflags)
+ outputFile = inOutputDir libFileName
+ jsFiles' = nub (lcJsLibSrcs settings ++ jsFiles)
+ meta = Meta (opt_P dflags)
+ jsEntries <- forM jsFiles' $ \file ->
+ (JsSource file,) . B.fromStrict <$> BS.readFile file
+ objEntries <- forM (eltsUDFM hpt) $ \hmi -> do
+ let mt = T.pack . moduleNameString . moduleName . mi_module . hm_iface $ hmi
+ files = maybe [] (\l -> [ o | DotO o <- linkableUnlinked l]) (hm_linkable hmi)
+ -- fixme archive does not handle multiple files for a module yet
+ forM files (fmap ((Object mt,) . B.fromStrict) . BS.readFile)
+ B.writeFile outputFile (buildArchive meta (concat objEntries ++ jsEntries))
+ -- we don't use shared js_so libraries ourselves, but Cabal expects that we
+ -- generate one when building with --dynamic-too. Just write an empty file
+ when (gopt Opt_BuildDynamicToo dflags || WayDyn `elem` ways dflags) $ do
+ let sharedLibFileName =
+ "lib" ++ jsLib ++ "-ghcjs" ++ getCompilerVersion ++ profSuff <.> "js_so"
+ sharedOutputFile = inOutputDir sharedLibFileName
+ -- keep strip happy
+ BS.writeFile sharedOutputFile =<< BS.readFile (topDir dflags </> "empty.o")
+ return Succeeded
+ | otherwise =
+ return Succeeded
+
+ghcjsLinkJsBinary :: GhcjsEnv
+ -> JSLinkConfig
+ -> StgToJSConfig
+ -> [FilePath]
+ -> Logger
+ -> TmpFs
+ -> HscEnv
+ -> UnitEnv
+ -> [FilePath]
+ -> [UnitId]
+ -> IO ()
+ghcjsLinkJsBinary env lc_cfg cfg jsFiles _logger _tmpfs hsc_env _unit_env objs dep_pkgs =
+ void $ JSLink.link hsc_env env lc_cfg cfg exe mempty dep_pkgs objs' jsFiles isRoot mempty
+ where
+ objs' = map ObjFile objs
+ isRoot _ = True
+ exe = jsExeFileName (hsc_dflags hsc_env)
+
+
+link' :: GhcjsEnv
+ -> JSLinkConfig
+ -> StgToJSConfig
+ -> HscEnv
+ -> [FilePath] -- extra js files
+ -> Bool -- building JavaScript
+ -> Logger -- Logger
+ -> TmpFs -- tmp file system
+ -> UnitEnv -- Unit Environment
+ -> Bool -- attempt linking in batch mode?
+ -> HomePackageTable -- what to link
+ -> IO SuccessFlag
+
+link' env lc_cfg cfg hsc_env extraJs buildJs logger tmpfs unit_env batch_attempt_linking hpt
+ | batch_attempt_linking
+ = do
+ let
+ dflags = hsc_dflags hsc_env
+ staticLink = case ghcLink dflags of
+ LinkStaticLib -> True
+ _ -> False
+
+ home_mod_infos = eltsUDFM hpt
+
+ -- the packages we depend on
+ pkg_deps = Set.toList . Set.unions
+ $ dep_direct_pkgs . mi_deps . hm_iface
+ <$> home_mod_infos
+
+ -- the linkables to link
+ linkables = map (Maybe.expectJust "link".hm_linkable) home_mod_infos
+ debugTraceMsg logger 3 (text "link: hmi ..." $$ vcat (map (ppr . mi_module . hm_iface) home_mod_infos))
+ debugTraceMsg logger 3 (text "link: pkgdeps ..." $$ vcat (map ppr pkg_deps))
+ debugTraceMsg logger 3 (text "link: linkables are ..." $$ vcat (map ppr linkables))
+
+ -- check for the -no-link flag
+ if isNoLink (ghcLink dflags)
+ then
+ do debugTraceMsg logger 3 (text "link(batch): linking omitted (-c flag given).")
+ return Succeeded
+ else
+ do
+ let getOfiles (LM _ _ us) = map nameOfObject (filter isObject us)
+ obj_files = concatMap getOfiles linkables
+ exe_file = exeFileName (targetPlatform dflags) staticLink (outputFile_ dflags)
+
+ linking_needed <- linkingNeeded logger dflags unit_env staticLink linkables pkg_deps
+ <&> (\case
+ NeedsRecompile _reason -> True
+ _ -> False)
+
+
+ if not (gopt Opt_ForceRecomp dflags) && not linking_needed
+ then
+ do
+ debugTraceMsg logger 2 (text exe_file <+> text "is up to date, linking not required.")
+ return Succeeded
+ else
+ do
+ unless buildJs $ compilationProgressMsg logger (text $ "ghcjs Linking " ++ exe_file ++ " ...")
+ -- Don't showPass in Batch mode; doLink will do that for us.
+
+ -- FIXME: Jeff (2022,03): this let expression relies on all
+ -- these functions implementing the same interface. Which leads
+ -- to a lot of unused parameters. This is bad! We should be
+ -- employing the principle of least priviledge with these
+ -- functions. Untangle this later!
+ let link = case ghcLink dflags of
+ LinkBinary -> if buildJs
+ then ghcjsLinkJsBinary env lc_cfg cfg extraJs logger tmpfs hsc_env unit_env
+ else linkBinary logger tmpfs (hsc_dflags hsc_env) unit_env
+ LinkStaticLib -> jsLinkStaticLib logger tmpfs hsc_env unit_env
+ LinkDynLib -> linkDynLibCheck logger tmpfs hsc_env unit_env
+ other -> panicBadLink other
+
+ _ <- link obj_files pkg_deps
+ debugTraceMsg logger 3 (text "link: done")
+
+ -- linkBinary only returns if it succeeds
+ return Succeeded
+
+ | otherwise
+ = do debugTraceMsg logger 3 (text "link(batch): upsweep (partially) failed OR" $$
+ text " Main.main not exported; not linking.")
+ return Succeeded
+
+
+panicBadLink :: GhcLink -> a
+panicBadLink other = panic ("link: GHC not built to link this way: " ++
+ show other)
+
+linkDynLibCheck :: Logger -> TmpFs -> HscEnv -> UnitEnv -> [FilePath] -> [UnitId] -> IO ()
+linkDynLibCheck logger tmpfs hsc_env unit_env o_files dep_packages
+ = do
+ let dflags = hsc_dflags hsc_env
+ when (haveRtsOptsFlags dflags) $
+ logOutput logger (text "Warning: -rtsopts and -with-rtsopts have no effect with -shared."
+ $$ text " Call hs_init_ghc() from your main() function to set these options.")
+
+ linkDynLib logger tmpfs (hsc_dflags hsc_env) unit_env o_files dep_packages
+
+-- FIXME: Jeff: (2022,04): This function is possibly redundant. Compare to
+-- GHC.Linker.Static.linkStaticLib and decide and remove
+jsLinkStaticLib ::Logger -> TmpFs -> HscEnv -> UnitEnv -> [FilePath] -> [UnitId] -> IO ()
+jsLinkStaticLib logger tmpfs hsc_env _unit_env o_files dep_packages
+ = -- XXX looks like this needs to be updated
+{-
+ = do
+ when (platformOS (targetPlatform dflags) `notElem` [OSiOS, OSDarwin]) $
+ throwGhcExceptionIO (ProgramError "Static archive creation only supported on Darwin/OS X/iOS")
+-}
+ jsLinkBinary' True logger tmpfs (hsc_dflags hsc_env) (hsc_unit_env hsc_env) o_files dep_packages
+
+-- FIXME: Jeff: (2022,04): This function may be a duplicate functions from
+-- GHC.Linker.Static.linkBinary. Decide if that is the case and remove
+jsLinkBinary' :: Bool -> Logger -> TmpFs -> DynFlags -> UnitEnv -> [FilePath] -> [UnitId] -> IO ()
+jsLinkBinary' staticLink logger tmpfs dflags unit_env o_files dep_packages = do
+ let platform = targetPlatform dflags
+ mySettings = settings dflags
+ verbFlags = getVerbFlags dflags
+ output_file = outputFile_ dflags
+ output_fn = exeFileName platform staticLink output_file
+
+ -- get the full list of packages to link with, by combining the
+ -- explicit packages with the auto packages and all of their
+ -- dependencies, and eliminating duplicates.
+
+ full_output_fn <- if isAbsolute output_fn
+ then return output_fn
+ else do d <- getCurrentDirectory
+ return $ normalise (d </> output_fn)
+ pkg_lib_paths <- collectLibraryDirs (ways dflags)
+ <$> mayThrowUnitErr (preloadUnitsInfo' unit_env dep_packages)
+
+ let pkg_lib_path_opts = concatMap get_pkg_lib_path_opts pkg_lib_paths
+ get_pkg_lib_path_opts l
+ | osElfTarget (platformOS platform) &&
+ dynLibLoader dflags == SystemDependent &&
+ WayDyn `elem` ways dflags
+ = let libpath = if gopt Opt_RelativeDynlibPaths dflags
+ then "$ORIGIN" </>
+ (l `makeRelativeTo` full_output_fn)
+ else l
+ -- See Note [-Xlinker -rpath vs -Wl,-rpath]
+ rpath = if gopt Opt_RPath dflags
+ then ["-Xlinker", "-rpath", "-Xlinker", libpath]
+ else []
+ -- Solaris 11's linker does not support -rpath-link option. It silently
+ -- ignores it and then complains about next option which is -l<some
+ -- dir> as being a directory and not expected object file, E.g
+ -- ld: elf error: file
+ -- /tmp/ghc-src/libraries/base/dist-install/build:
+ -- elf_begin: I/O error: region read: Is a directory
+ rpathlink = if platformOS platform == OSSolaris2
+ then []
+ else ["-Xlinker", "-rpath-link", "-Xlinker", l]
+ in ["-L" ++ l] ++ rpathlink ++ rpath
+ | osMachOTarget (platformOS platform) &&
+ dynLibLoader dflags == SystemDependent &&
+ WayDyn `elem` ways dflags &&
+ gopt Opt_RPath dflags
+ = let libpath = if gopt Opt_RelativeDynlibPaths dflags
+ then "@loader_path" </>
+ (l `makeRelativeTo` full_output_fn)
+ else l
+ in ("-L" ++ l) : ["-Xlinker", "-rpath", "-Xlinker", libpath]
+ | otherwise = ["-L" ++ l]
+
+ let
+ dead_strip
+ | gopt Opt_WholeArchiveHsLibs dflags = []
+ | otherwise = ["-Wl,-dead_strip" | osSubsectionsViaSymbols (platformOS platform)]
+ let lib_paths = libraryPaths dflags
+ let lib_path_opts = map ("-L"++) lib_paths
+
+ extraLinkObj <- Maybe.fromMaybe mempty
+ <$> mkExtraObjToLinkIntoBinary logger tmpfs dflags (ue_units unit_env)
+ noteLinkObjs <- mkNoteObjsToLinkIntoBinary logger tmpfs dflags unit_env dep_packages
+
+ let
+ (pre_hs_libs, post_hs_libs)
+ | gopt Opt_WholeArchiveHsLibs dflags
+ = if platformOS platform == OSDarwin
+ then (["-Wl,-all_load"], [])
+ -- OS X does not have a flag to turn off -all_load
+ else (["-Wl,--whole-archive"], ["-Wl,--no-whole-archive"])
+ | otherwise
+ = ([],[])
+
+ pkg_link_opts <- do
+ (package_hs_libs, extra_libs, other_flags) <- getUnitLinkOpts dflags unit_env dep_packages
+ return $ if staticLink
+ then package_hs_libs -- If building an executable really means making a static
+ -- library (e.g. iOS), then we only keep the -l options for
+ -- HS packages, because libtool doesn't accept other options.
+ -- In the case of iOS these need to be added by hand to the
+ -- final link in Xcode.
+ else other_flags ++ dead_strip
+ ++ pre_hs_libs ++ package_hs_libs ++ post_hs_libs
+ ++ extra_libs
+ -- -Wl,-u,<sym> contained in other_flags
+ -- needs to be put before -l<package>,
+ -- otherwise Solaris linker fails linking
+ -- a binary with unresolved symbols in RTS
+ -- which are defined in base package
+ -- the reason for this is a note in ld(1) about
+ -- '-u' option: "The placement of this option
+ -- on the command line is significant.
+ -- This option must be placed before the library
+ -- that defines the symbol."
+
+ -- frameworks
+ pkg_framework_opts <- getUnitFrameworkOpts unit_env dep_packages
+ let framework_opts = getFrameworkOpts dflags platform
+
+ -- probably _stub.o files
+ let extra_ld_inputs = ldInputs dflags
+
+ -- Here are some libs that need to be linked at the *end* of
+ -- the command line, because they contain symbols that are referred to
+ -- by the RTS. We can't therefore use the ordinary way opts for these.
+ let
+ debug_opts | WayDebug `elem` ways dflags = [
+
+
+
+ ]
+ | otherwise = []
+
+ thread_opts | WayThreaded `elem` ways dflags = [
+
+
+
+ ]
+ | otherwise = []
+
+ rc_objs <- maybeCreateManifest logger tmpfs dflags output_fn
+
+ -- FIXME: Jeff (2022,04): jsLinkBinary' is only ever called with staticLink ==
+ -- True. However, if it were False I'm unsure how to create the TmpFS
+ -- parameter required by SysTools.runLink. Is this necessary for the
+ -- js-backend? Fix this when we know more.
+
+ -- let link = if staticLink
+ -- then SysTools.runLibtool
+ -- else SysTools.runLink
+ let link = SysTools.runLibtool -- <--- fix this line in particular
+
+ link logger dflags (
+ map SysTools.Option verbFlags
+ ++ [ SysTools.Option "-o"
+ , SysTools.FileOption "" output_fn
+ ]
+ ++ map SysTools.Option (
+ []
+
+ -- See Note [No PIE when linking]
+ ++ picCCOpts dflags
+
+ -- Permit the linker to auto link _symbol to _imp_symbol.
+ -- This lets us link against DLLs without needing an "import library".
+ ++ (["-Wl,--enable-auto-import" | platformOS platform == OSMinGW32])
+
+ -- '-no_compact_unwind'
+ -- C++/Objective-C exceptions cannot use optimised
+ -- stack unwinding code. The optimised form is the
+ -- default in Xcode 4 on at least x86_64, and
+ -- without this flag we're also seeing warnings
+ -- like
+ -- ld: warning: could not create compact unwind for .LFB3: non-standard register 5 being saved in prolog
+ -- on x86.
+ ++ ([ "-Wl,-no_compact_unwind"
+ | sLdSupportsCompactUnwind mySettings
+ && not staticLink
+ && (platformOS platform == OSDarwin)
+ && case platformArch platform of
+ ArchX86 -> True
+ ArchX86_64 -> True
+ ArchARM {} -> True
+ ArchAArch64 -> True
+ _ -> False])
+
+ -- '-Wl,-read_only_relocs,suppress'
+ -- ld gives loads of warnings like:
+ -- ld: warning: text reloc in _base_GHCziArr_unsafeArray_info to _base_GHCziArr_unsafeArray_closure
+ -- when linking any program. We're not sure
+ -- whether this is something we ought to fix, but
+ -- for now this flags silences them.
+ ++ (["-Wl,-read_only_relocs,suppress"
+ | platformOS platform == OSDarwin
+ && platformArch platform == ArchX86
+ && not staticLink
+ ])
+
+ ++ (["-Wl,--gc-sections"
+ | sLdIsGnuLd mySettings
+ && not (gopt Opt_WholeArchiveHsLibs dflags)
+ ])
+
+ ++ o_files
+ ++ lib_path_opts)
+ ++ extra_ld_inputs
+ ++ map SysTools.Option (
+ rc_objs
+ ++ framework_opts
+ ++ pkg_lib_path_opts
+ ++ extraLinkObj:noteLinkObjs
+ ++ pkg_link_opts
+ ++ pkg_framework_opts
+ ++ debug_opts
+ ++ thread_opts
+ ))
+
+
+ghcjsDoLink :: GhcjsEnv -> JSLinkConfig -> StgToJSConfig -> HscEnv -> Phase -> [FilePath] -> IO ()
+ghcjsDoLink env lc_cfg cfg hsc_env stop_phase o_files
+ | not (isStopLn stop_phase)
+ = return () -- We stopped before the linking phase
+ {-| native
+ = case ghcLink dflags of
+ NoLink -> return ()
+ LinkBinary -> linkBinary dflags o_files []
+ LinkDynLib -> linkDynLibCheck dflags o_files []
+ other -> panicBadLink other -}
+ -- | isJust (gsLinkJsLib settings)
+ -- = void $ ghcjsLinkJsLib settings o_files dflags emptyHomePackageTable
+ | otherwise = do
+ -- void $ ghcjsLink env settings o_files True (ghcLink dflags) dflags True emptyHomePackageTable
+ let dflags = hsc_dflags hsc_env
+ case ghcLink (hsc_dflags hsc_env) of
+ NoLink -> return ()
+ LinkBinary -> do
+ putStrLn $ "ghcjsDoLink: " ++ show (ghcLink dflags) ++ " " ++ show o_files
+ let output_fn = exeFileName (targetPlatform dflags) False (outputFile_ dflags)
+ let extra_ld_inputs = [ f | FileOption _ f <- ldInputs dflags ]
+ putStrLn $ "ghcjsDoLink: " ++ show extra_ld_inputs
+
+ full_output_fn <- if isAbsolute output_fn
+ then return output_fn
+ else do d <- getCurrentDirectory
+ return $ normalise (d </> output_fn)
+ let dep_packages = case preloadUnitsInfo $ hsc_unit_env hsc_env of
+ Maybe.Succeeded us -> us
+ Maybe.Failed err -> pprPanic "Panic in ghcjsDoLink: " (ppr err)
+
+
+ let isRoot _ = True
+ objs' = map ObjFile o_files
+ dep_package_ids = map unitId dep_packages
+ jsFiles = [] -- XXX check whether we need to sort the obj_files for js-files
+
+ putStrLn $ "ghcjsDoLink: " ++ show (map unitIdString dep_package_ids)
+
+ void $
+ JSLink.link
+ hsc_env
+ env
+ lc_cfg
+ cfg
+ (full_output_fn <.> "jsexe") -- output file or dir
+ mempty -- include path for home package
+ dep_package_ids -- packages to link
+ objs' -- object we're currently linking
+ jsFiles -- extra js files to include
+ isRoot -- functions from the objects to use as roots
+ mempty -- extra symbols to link in
+
+ _other -> panicBadLink _other
diff --git a/compiler/GHC/StgToJS/Linker/Linker.hs b/compiler/GHC/StgToJS/Linker/Linker.hs
new file mode 100644
index 0000000000..e8facd322a
--- /dev/null
+++ b/compiler/GHC/StgToJS/Linker/Linker.hs
@@ -0,0 +1,887 @@
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.StgToJS.Linker.Linker
+-- Copyright : (c) The University of Glasgow 2001
+-- License : BSD-style (see the file LICENSE)
+--
+-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io>
+-- Luite Stegeman <luite.stegeman@iohk.io>
+-- Sylvain Henry <sylvain.henry@iohk.io>
+-- Josh Meredith <josh.meredith@iohk.io>
+-- Stability : experimental
+--
+-- GHCJS linker, collects dependencies from the object files (.js_o, js_p_o),
+-- which contain linkable units with dependency information
+--
+----------------------------- FIXMEs -------------------------------------------
+-- FIXME: Jeff (2022,03): Finish module description. Specifically:
+-- 1. What are the important modules this module uses
+-- 2. Who is the consumer for this module (hint: DynamicLinking)
+-- 3. What features are missing due to the implementation in this module? For
+-- example, Are we blocked from linking foreign imports due to some code in this
+-- module?
+--
+-- - add ForeignRefs imports in @link@
+-- - factor out helper functions in @link'@
+-- - remove @head@ function in @link'@
+-- - remove @ue_unsafeHomeUnit@ function in @link'@
+-- - use newtypes instead of strings for output directories in @writeRunner@
+-- - add support for windows in @writeRunner@
+-- - resolve strange unpack call in @writeExterns@ the right thing to do here
+-- might be to just remove it
+-- - fix: @collectDeps@ inputs a [UnitId], but [] is unordered yet comments in
+-- @collectDeps@ indicate a specific ordering is needed. This ordering
+-- should be enforced in some data structure other than [] which is
+-- obviously ordered but in an undefined and ad-hoc way
+-- - fix: For most of the Linker I pass around UnitIds, I (Jeff) am unsure if
+-- these should really be modules. Or to say this another way is UnitId the
+-- right abstraction level? Or is Module? Or some other unit thing?
+-- - fix: Gen2.GHCJS used NFData instances over a lot of types. Replicating
+-- these instances would mean adding a Generic and NFData instance to some
+-- internal GHC types. I (Jeff) do not think we want to do that. Instead, we
+-- should use strict data structures as a default and then implement lazy
+-- ones where it makes sense and only if it makes sense. IMHO Gen2.GHCJS was
+-- overly lazy and we should avoid repeating that here. Let profiling be our
+-- guide during our performance refactoring.
+-- - Employ the type system more effectively for @readSystemDeps'@, in
+-- particular get rid of the string literals
+-- - fix foldl' memory leak in @staticDeps@
+-- - move @mkSymb@
+-----------------------------------------------------------------------------
+
+module GHC.StgToJS.Linker.Linker where
+
+import GHC.StgToJS.Linker.Types
+import GHC.StgToJS.Linker.Utils
+import GHC.StgToJS.Linker.Compactor
+
+import GHC.StgToJS.Rts.Rts
+
+import GHC.JS.Syntax
+
+import GHC.StgToJS.Object
+import GHC.StgToJS.Types hiding (LinkableUnit)
+import GHC.StgToJS.UnitUtils
+import GHC.StgToJS.Printer
+
+import qualified GHC.SysTools.Ar as Ar
+import GHC.Utils.Encoding
+import GHC.Utils.Outputable (ppr, text)
+import GHC.Utils.Panic
+import GHC.Unit.State
+import GHC.Unit.Env
+import GHC.Unit.Home
+import GHC.Unit.Types
+import GHC.Utils.Error
+import GHC.Platform.Ways
+import GHC.Driver.Env.Types
+import GHC.Data.ShortText (ShortText)
+import qualified GHC.Data.ShortText as T
+
+import Control.Concurrent.MVar
+import Control.Monad
+
+import Data.Array
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as BC
+import qualified Data.ByteString.Lazy.Char8 as BLC
+import qualified Data.ByteString.Lazy as BL
+import Data.Function (on)
+import Data.Int
+import Data.IntSet (IntSet)
+import qualified Data.IntSet as IS
+import Data.IORef
+import Data.List ( partition, nub, foldl', intercalate, group, sort
+ , groupBy, isSuffixOf, find, intersperse
+ )
+import Data.Map.Strict (Map)
+import qualified Data.Map.Strict as M
+import Data.Maybe
+import Data.Set (Set)
+import qualified Data.Set as S
+
+import GHC.Generics (Generic)
+
+import System.FilePath (splitPath, (<.>), (</>), dropExtension)
+import System.IO
+import System.Directory ( createDirectoryIfMissing
+ , doesFileExist
+ , getCurrentDirectory
+ , Permissions(..)
+ , setPermissions
+ , getPermissions
+ )
+
+import Prelude
+import GHC.Driver.Session (targetWays_, settings)
+import GHC.Settings (sTopDir)
+import GHC.Unit.Module.Name
+import GHC.Unit.Module (moduleStableString)
+
+-- number of bytes linked per module
+type LinkerStats = Map Module Int64
+
+-- | result of a link pass
+data LinkResult = LinkResult
+ { linkOut :: BL.ByteString -- ^ compiled Haskell code
+ , linkOutStats :: LinkerStats -- ^ statistics about generated code
+ , linkOutMetaSize :: Int64 -- ^ size of packed metadata in generated code
+ , linkForeignRefs :: [ForeignJSRef] -- ^ foreign code references in compiled haskell code
+ , linkLibRTS :: [FilePath] -- ^ library code to load with the RTS
+ , linkLibA :: [FilePath] -- ^ library code to load after RTS
+ , linkLibAArch :: [FilePath] -- ^ library code to load from archives after RTS
+ , linkBase :: Base -- ^ base metadata to use if we want to link incrementally against this result
+ } deriving (Generic)
+
+newtype ArchiveState = ArchiveState { loadedArchives :: IORef (Map FilePath Ar.Archive) }
+
+emptyArchiveState :: IO ArchiveState
+emptyArchiveState = ArchiveState <$> newIORef M.empty
+
+-- | link and write result to disk (jsexe directory)
+link :: HscEnv
+ -> GhcjsEnv
+ -> JSLinkConfig
+ -> StgToJSConfig
+ -> FilePath -- ^ output file/directory
+ -> [FilePath] -- ^ include path for home package
+ -> [UnitId] -- ^ packages to link
+ -> [LinkedObj] -- ^ the object files we're linking
+ -> [FilePath] -- ^ extra js files to include
+ -> (ExportedFun -> Bool) -- ^ functions from the objects to use as roots (include all their deps)
+ -> Set ExportedFun -- ^ extra symbols to link in
+ -> IO ()
+link hsc_env env lc_cfg cfg out include pkgs objFiles jsFiles isRootFun extraStaticDeps
+ | lcNoJSExecutables lc_cfg = return ()
+ | otherwise = do
+ LinkResult lo lstats lmetasize _lfrefs llW lla llarch lbase <-
+ link' hsc_env env lc_cfg cfg out include pkgs objFiles jsFiles
+ isRootFun extraStaticDeps
+ let genBase = isJust (lcGenBase lc_cfg)
+ jsExt | genBase = "base.js"
+ | otherwise = "js"
+ createDirectoryIfMissing False out
+ BL.writeFile (out </> "out" <.> jsExt) lo
+ unless (lcOnlyOut lc_cfg) $ do
+ let frefsFile = if genBase then "out.base.frefs" else "out.frefs"
+ -- FIXME: Jeff (2022,03): GHCJS used Aeson to encode Foreign
+ -- references as StaticDeps to a Bytestring and then write these out
+ -- to a tmp file for linking. We do not have access to Aeson so
+ -- we'll need to find an alternative coding strategy to write these
+ -- out. See the commented instance for FromJSON StaticDeps below.
+ -- - this line called out to the FromJSon Instance
+ -- jsonFrefs = Aeson.encode lfrefs
+ jsonFrefs = mempty
+ dflags = hsc_dflags hsc_env
+
+ BL.writeFile (out </> frefsFile <.> "json") jsonFrefs
+ BL.writeFile (out </> frefsFile <.> "js")
+ ("h$checkForeignRefs(" <> jsonFrefs <> ");")
+ unless (lcNoStats lc_cfg) $ do
+ let statsFile = if genBase then "out.base.stats" else "out.stats"
+ writeFile (out </> statsFile) (linkerStats lmetasize lstats)
+ unless (lcNoRts lc_cfg) $ do
+ withRts <- mapM (tryReadShimFile dflags) llW
+ BL.writeFile (out </> "rts.js") (BLC.pack (T.unpack rtsDeclsText)
+ <> BL.fromChunks withRts
+ <> BLC.pack (T.unpack $ rtsText cfg))
+ lla' <- mapM (tryReadShimFile dflags) lla
+ llarch' <- mapM (readShimsArchive dflags) llarch
+ BL.writeFile (out </> "lib" <.> jsExt)
+ (BL.fromChunks $ llarch' ++ lla')
+ if genBase
+ then generateBase out lbase
+ else when ( not (lcOnlyOut lc_cfg)
+ && not (lcNoRts lc_cfg)
+ && not (usingBase lc_cfg)
+ )
+ $ do
+ let top = sTopDir . settings . hsc_dflags $ hsc_env
+ _ <- combineFiles lc_cfg top out
+ writeHtml top out
+ writeRunMain top out
+ writeRunner lc_cfg out
+ writeWebAppManifest top out
+ writeExterns out
+
+-- | link in memory
+link' :: HscEnv
+ -> GhcjsEnv
+ -> JSLinkConfig
+ -> StgToJSConfig
+ -> String -- ^ target (for progress message)
+ -> [FilePath] -- ^ include path for home package
+ -> [UnitId] -- ^ packages to link
+ -> [LinkedObj] -- ^ the object files we're linking
+ -> [FilePath] -- ^ extra js files to include
+ -> (ExportedFun -> Bool) -- ^ functions from the objects to use as roots (include all their deps)
+ -> Set ExportedFun -- ^ extra symbols to link in
+ -> IO LinkResult
+link' hsc_env env lc_cfg cfg target _include pkgs objFiles jsFiles isRootFun extraStaticDeps
+ = do
+ -- FIXME: Jeff (2022,04): This function has several helpers that should be
+ -- factored out. In its current condition it is hard to read exactly whats
+ -- going on and why.
+ (objDepsMap, objRequiredUnits) <- loadObjDeps objFiles
+
+ let rootSelector | Just baseMod <- lcGenBase lc_cfg =
+ \(ExportedFun m _s) -> m == baseMod
+ | otherwise = isRootFun
+ roots = S.fromList . filter rootSelector $ concatMap (M.keys . depsHaskellExported . fst) (M.elems objDepsMap)
+ -- FIXME: Jeff (2022,03): Remove head. opt for NonEmptyList. Every
+ -- head is a time bomb waiting to explode
+ rootMods = map (moduleNameString . moduleName . head) . group . sort . map funModule . S.toList $ roots
+ objPkgs = map moduleUnitId $ nub (M.keys objDepsMap)
+
+ _ <- compilationProgressMsg (hsc_logger hsc_env) . text $
+ case lcGenBase lc_cfg of
+ Just baseMod -> "Linking base bundle " ++ target ++ " (" ++ moduleNameString (moduleName baseMod) ++ ")"
+ _ -> "Linking " ++ target ++ " (" ++ intercalate "," rootMods ++ ")"
+ base <- case lcUseBase lc_cfg of
+ NoBase -> return emptyBase
+ BaseFile file -> loadBase file
+ BaseState b -> return b
+ (rdPkgs, rds) <- rtsDeps hsc_env pkgs
+ -- c <- newMVar M.empty
+ let rtsPkgs = map stringToUnitId ["@rts", "@rts_" ++ waysTag (targetWays_ $ hsc_dflags hsc_env)]
+ pkgs' :: [UnitId]
+ pkgs' = nub (rtsPkgs ++ rdPkgs ++ reverse objPkgs ++ reverse pkgs)
+ pkgs'' = filter (not . isAlreadyLinked base) pkgs'
+ ue_state = ue_units $ hsc_unit_env hsc_env
+ -- pkgLibPaths = mkPkgLibPaths pkgs'
+ -- getPkgLibPaths :: UnitId -> ([FilePath],[String])
+ -- getPkgLibPaths k = fromMaybe ([],[]) (lookup k pkgLibPaths)
+ (archsDepsMap, archsRequiredUnits) <- loadArchiveDeps env =<< getPackageArchives cfg (map snd $ mkPkgLibPaths ue_state pkgs')
+ pkgArchs <- getPackageArchives cfg (map snd $ mkPkgLibPaths ue_state pkgs'')
+ (allDeps, code) <-
+ collectDeps (objDepsMap `M.union` archsDepsMap)
+ (pkgs' ++ [homeUnitId (ue_unsafeHomeUnit $ hsc_unit_env hsc_env)]) -- FIXME: dont use unsafe
+ (baseUnits base)
+ (roots `S.union` rds `S.union` extraStaticDeps)
+ (archsRequiredUnits ++ objRequiredUnits)
+ let (outJs, metaSize, compactorState, stats) =
+ renderLinker lc_cfg cfg (baseCompactorState base) rds code
+ base' = Base compactorState (nub $ basePkgs base ++ pkgs'')
+ (allDeps `S.union` baseUnits base)
+ (alreadyLinkedBefore, alreadyLinkedAfter) <- getShims [] (filter (isAlreadyLinked base) pkgs')
+ (shimsBefore, shimsAfter) <- getShims jsFiles pkgs''
+ return $ LinkResult outJs stats metaSize
+ (concatMap (\(_,_,_,_,_,r) -> r) code)
+ (filter (`notElem` alreadyLinkedBefore) shimsBefore)
+ (filter (`notElem` alreadyLinkedAfter) shimsAfter)
+ pkgArchs base'
+ where
+ isAlreadyLinked :: Base -> UnitId -> Bool
+ isAlreadyLinked b uid = uid `elem` basePkgs b
+
+ mkPkgLibPaths :: UnitState -> [UnitId] -> [(UnitId, ([FilePath],[String]))]
+ mkPkgLibPaths u_st
+ = map (\k -> ( k
+ , (getInstalledPackageLibDirs u_st k
+ , getInstalledPackageHsLibs u_st k)
+ ))
+
+renderLinker :: JSLinkConfig
+ -> StgToJSConfig
+ -> CompactorState
+ -> Set ExportedFun
+ -> [(Module, JStat, ShortText, [ClosureInfo], [StaticInfo], [ForeignJSRef])] -- ^ linked code per module
+ -> (BL.ByteString, Int64, CompactorState, LinkerStats)
+renderLinker settings cfg renamerState rtsDeps code =
+ let
+ (_renamerState', compacted, meta) = compact settings cfg renamerState (map funSymbol $ S.toList rtsDeps) (map (\(_,s,_,ci,si,_) -> (s,ci,si)) code)
+ pe = (<>"\n") . show . pretty
+ rendered = fmap pe compacted
+ renderedMeta = pe meta
+ renderedExports = concatMap T.unpack . filter (not . T.null) $ map (\(_,_,rs,_,_,_) -> rs) code
+ mkStat (m,_,_,_,_,_) b = (m, BL.length . BLC.pack $ b)
+ in ( BL.fromStrict $ BC.pack $ mconcat [mconcat rendered, renderedMeta, renderedExports]
+ , BL.length $ BL.fromStrict $ BC.pack renderedMeta
+ , renamerState
+ , M.fromList $ zipWith mkStat code rendered
+ )
+
+linkerStats :: Int64 -- ^ code size of packed metadata
+ -> LinkerStats -- ^ code size per module
+ -> String
+linkerStats meta s =
+ intercalate "\n\n" [packageStats, moduleStats, metaStats] <> "\n\n"
+ where
+ ps = M.fromListWith (+) . map (\(m,s) -> (moduleName m,s)) . M.toList $ s
+ pad :: Int -> String -> String
+ pad n t = let l = length t
+ in if l < n then t <> replicate (n-l) ' ' else t
+
+ pkgMods :: [[(Module,Int64)]]
+ pkgMods = groupBy ((==) `on` fst) (M.toList s)
+
+ showMod :: (Module, Int64) -> String
+ showMod (m,s) = pad 40 (" " <> moduleStableString m <> ":") <> show s
+
+ packageStats :: String
+ packageStats = "code size summary per package:\n\n"
+ <> concatMap (\(p,s) -> pad 25 (show p <> ":") <> show s) (M.toList ps)
+
+ moduleStats :: String
+ moduleStats = "code size per module:\n\n" <> unlines (map (concatMap showMod) pkgMods)
+
+ metaStats :: String
+ metaStats = "packed metadata: " <> show meta
+
+splitPath' :: FilePath -> [FilePath]
+splitPath' = map (filter (`notElem` ("/\\"::String))) . splitPath
+
+getPackageArchives :: StgToJSConfig -> [([FilePath],[String])] -> IO [FilePath]
+getPackageArchives cfg pkgs =
+ filterM doesFileExist [ p </> "lib" ++ l ++ profSuff <.> "a"
+ | (paths, libs) <- pkgs, p <- paths, l <- libs ]
+ where
+ -- XXX the profiling library name is probably wrong now
+ profSuff | csProf cfg = "_p"
+ | otherwise = ""
+
+-- fixme the wired-in package id's we get from GHC we have no version
+getShims :: [FilePath] -> [UnitId] -> IO ([FilePath], [FilePath])
+getShims = panic "Panic from getShims: Shims not implemented! no to shims!"
+-- getShims dflags extraFiles pkgDeps = do
+-- (w,a) <- collectShims (getLibDir dflags </> "shims")
+-- (map (convertPkg dflags) pkgDeps)
+-- extraFiles' <- mapM canonicalizePath extraFiles
+-- return (w, a++extraFiles')
+
+{- | convenience: combine rts.js, lib.js, out.js to all.js that can be run
+ directly with node.js or SpiderMonkey jsshell
+ -}
+combineFiles :: JSLinkConfig
+ -> FilePath -- ^ top level dir
+ -> FilePath
+ -> IO ()
+combineFiles cfg top fp = do
+ files <- mapM (B.readFile.(fp</>)) ["rts.js", "lib.js", "out.js"]
+ runMain <- if lcNoHsMain cfg
+ then pure mempty
+ else B.readFile (top </> "runmain.js")
+ writeBinaryFile (fp</>"all.js") (mconcat (files ++ [runMain]))
+
+-- | write the index.html file that loads the program if it does not exit
+writeHtml :: FilePath -- ^ top level library directory
+ -> FilePath -- ^ output directory
+ -> IO ()
+writeHtml top out = do
+ e <- doesFileExist htmlFile
+ unless e $
+ B.readFile (top </>"template.html") >>= B.writeFile htmlFile
+ where
+ htmlFile = out </> "index.html"
+
+-- | write the runmain.js file that will be run with defer so that it runs after
+-- index.html is loaded
+writeRunMain :: FilePath -- ^ top level library directory
+ -> FilePath -- ^ output directory
+ -> IO ()
+writeRunMain top out = do
+ e <- doesFileExist runMainFile
+ unless e $
+ B.readFile (top </> "runmain.js") >>= B.writeFile runMainFile
+ where
+ runMainFile = out </> "runmain.js"
+
+-- FIXME: Jeff (2022,03): Use Newtypes instead of Strings for these directories
+writeRunner :: JSLinkConfig -- ^ Settings
+ -> FilePath -- ^ Output directory
+ -> IO ()
+writeRunner _settings out =
+ -- FIXME: Jeff (2022,03): why was the buildRunner check removed? If we don't
+ -- need to check then does the flag need to exist?
+ {-when (lcBuildRunner _settings) $ -} do
+ cd <- getCurrentDirectory
+ let runner = cd </> addExeExtension (dropExtension out)
+ srcFile = out </> "all" <.> "js"
+ -- nodeSettings <- readNodeSettings dflags
+ nodePgm :: B.ByteString
+ nodePgm = "node" -- XXX we don't read nodeSettings.json anymore, we should somehow know how to find node?
+
+ ---------------------------------------------
+ -- FIXME: Jeff (2022,03): Add support for windows. Detect it and act on it here:
+ -- if Platform.isWindows
+ -- then do
+ -- copyFile (topDir dflags </> "bin" </> "wrapper" <.> "exe")
+ -- runner
+ -- writeFile (runner <.> "options") $ unlines
+ -- [ T.pack nodePgm -- T.pack (nodeProgram nodeSettings)
+ -- , T.pack ("{{EXEPATH}}" </> out </> "all" <.> "js")
+ -- ]
+ -- else do
+ ---------------------------------------------
+ src <- B.readFile (cd </> srcFile)
+ B.writeFile runner ("#!/usr/bin/env " <> nodePgm <> "\n" <> src)
+ perms <- getPermissions runner
+ setPermissions runner (perms {executable = True})
+
+-- | write the manifest.webapp file that for firefox os
+writeWebAppManifest :: FilePath -- ^ top directory
+ -> FilePath -- ^ output directory
+ -> IO ()
+writeWebAppManifest top out = do
+ e <- doesFileExist manifestFile
+ unless e $ B.readFile (top </> "manifest.webapp") >>= B.writeFile manifestFile
+ where
+ manifestFile = out </> "manifest.webapp"
+
+rtsExterns :: ShortText
+rtsExterns =
+ "// GHCJS RTS externs for closure compiler ADVANCED_OPTIMIZATIONS\n\n" <>
+ mconcat (map (\x -> "/** @type {*} */\nObject.d" <> T.pack (show x) <> ";\n")
+ [(7::Int)..16384])
+
+writeExterns :: FilePath -> IO ()
+writeExterns out = writeFile (out </> "all.js.externs")
+ $ T.unpack rtsExterns -- FIXME: Jeff (2022,03): Why write rtsExterns as
+ -- ShortText just to unpack?
+
+-- | get all functions in a module
+modFuns :: Deps -> [ExportedFun]
+modFuns (Deps _m _r e _b) = M.keys e
+
+-- | get all dependencies for a given set of roots
+getDeps :: Map Module Deps -- ^ loaded deps
+ -> Set LinkableUnit -- ^ don't link these blocks
+ -> Set ExportedFun -- ^ start here
+ -> [LinkableUnit] -- ^ and also link these
+ -> IO (Set LinkableUnit)
+getDeps loaded_deps base fun startlu = go' S.empty (S.fromList startlu) (S.toList fun)
+ where
+ go :: Set LinkableUnit
+ -> Set LinkableUnit
+ -> IO (Set LinkableUnit)
+ go result open = case S.minView open of
+ Nothing -> return result
+ Just (lu@(lmod,n), open') ->
+ case M.lookup lmod loaded_deps of
+ Nothing -> pprPanic "getDeps.go: object file not loaded for: " (pprModule lmod)
+ Just (Deps _ _ _ b) ->
+ let block = b!n
+ result' = S.insert lu result
+ in go' result'
+ (addOpen result' open' $
+ map (lmod,) (blockBlockDeps block)) (blockFunDeps block)
+
+ go' :: Set LinkableUnit
+ -> Set LinkableUnit
+ -> [ExportedFun]
+ -> IO (Set LinkableUnit)
+ go' result open [] = go result open
+ go' result open (f:fs) =
+ let key = funModule f
+ in case M.lookup key loaded_deps of
+ Nothing -> pprPanic "getDeps.go': object file not loaded for: " $ pprModule key
+ Just (Deps _m _r e _b) ->
+ let lun :: Int
+ lun = fromMaybe (pprPanic "exported function not found: " $ pprModule key)
+ (M.lookup f e)
+ lu = (key, lun)
+ in go' result (addOpen result open [lu]) fs
+
+ addOpen :: Set LinkableUnit -> Set LinkableUnit -> [LinkableUnit]
+ -> Set LinkableUnit
+ addOpen result open newUnits =
+ let alreadyLinked s = S.member s result ||
+ S.member s open ||
+ S.member s base
+ in open `S.union` S.fromList (filter (not . alreadyLinked) newUnits)
+
+-- FIXME: Jeff: (2022,03): if the order of the [UnitId] list matters after
+-- ghc-prim then we should be using an Ordered Set or something
+-- similar since the implementation of this function uses a lot of
+-- expensive operations on this list and a lot of
+-- serialization/deserialization
+-- FIXME: Jeff (2022,03): Should [UnitId] be [Module]?
+-- | collect dependencies for a set of roots
+collectDeps :: Map Module (Deps, DepsLocation) -- ^ Dependency map
+ -> [UnitId] -- ^ packages, code linked in this order
+ -> Set LinkableUnit -- ^ do not include these
+ -> Set ExportedFun -- ^ roots
+ -> [LinkableUnit] -- ^ more roots
+ -> IO ( Set LinkableUnit
+ , [(Module, JStat, ShortText, [ClosureInfo], [StaticInfo], [ForeignJSRef])]
+ )
+collectDeps mod_deps packages base roots units = do
+ allDeps <- getDeps (fmap fst mod_deps) base roots units
+ -- read ghc-prim first, since we depend on that for static initialization
+ let packages' = uncurry (++) $ partition (== primUnitId) (nub packages)
+
+ units_by_module :: Map Module IntSet
+ units_by_module = M.fromListWith IS.union $
+ map (\(m,n) -> (m, IS.singleton n)) (S.toList allDeps)
+
+ mod_deps_bypkg :: Map UnitId [(Deps, DepsLocation)]
+ mod_deps_bypkg = M.mapKeys moduleUnitId
+ $ M.fromListWith (++)
+ (map (\(m,v) -> (m,[v])) (M.toList mod_deps))
+
+ ar_state <- emptyArchiveState
+ code <- fmap (catMaybes . concat) . forM packages' $ \pkg ->
+ mapM (uncurry $ extractDeps ar_state units_by_module)
+ (fromMaybe [] $ M.lookup pkg mod_deps_bypkg)
+ return (allDeps, code)
+
+extractDeps :: ArchiveState
+ -> Map Module IntSet
+ -> Deps
+ -> DepsLocation
+ -> IO (Maybe (Module, JStat, ShortText, [ClosureInfo], [StaticInfo], [ForeignJSRef]))
+extractDeps ar_state units deps loc =
+ case M.lookup mod units of
+ Nothing -> return Nothing
+ Just modUnits -> do
+ let selector n _ = n `IS.member` modUnits || isGlobalUnit n
+ x <- case loc of
+ ObjectFile o -> collectCode =<< readObjectFileKeys selector o
+ ArchiveFile a -> collectCode
+ . readObjectKeys (a ++ ':':moduleNameString (moduleName mod)) selector
+ =<< readArObject ar_state mod a
+ -- error ("Ar.readObject: " ++ a ++ ':' : T.unpack mod))
+ -- Ar.readObject (mkModuleName $ T.unpack mod) a)
+ InMemory n b -> collectCode $ readObjectKeys n selector b
+ -- evaluate (rnf x) -- See FIXME Re: NFData instance on Safety and
+ -- ForeignJSRefs below
+ return x
+ where
+ mod = depsModule deps
+ -- FIXME: Jeff (2022,03): remove this hacky reimplementation of unlines
+ newline = T.pack "\n"
+ unlines' = intersperse newline . map oiRaw
+ collectCode l = let x = ( mod
+ , mconcat (map oiStat l)
+ , mconcat (unlines' l)
+ , concatMap oiClInfo l
+ , concatMap oiStatic l
+ , concatMap oiFImports l)
+ -- FIXME: (2022,04): this evaluate and rnf require an NFData
+ -- instance on ForeignJSRef which in turn requries a NFData
+ -- instance on Safety. Does this even make sense? We'll skip
+ -- this for now.
+
+ -- in evaluate (rnf x) >> return (Just x)
+
+ in return (Just x)
+
+readArObject :: ArchiveState -> Module -> FilePath -> IO BL.ByteString
+readArObject ar_state mod ar_file = do
+ loaded_ars <- readIORef (loadedArchives ar_state)
+ (Ar.Archive entries) <- case M.lookup ar_file loaded_ars of
+ Just a -> pure a
+ Nothing -> do
+ a <- Ar.loadAr ar_file
+ modifyIORef (loadedArchives ar_state) (M.insert ar_file a)
+ pure a
+ let tag = moduleNameTag $ moduleName mod
+ matchTag entry
+ | Right hdr <- getHeader (BL.fromStrict $ Ar.filedata entry)
+ = hdrModuleName hdr == tag
+ | otherwise
+ = False
+
+ -- XXX this shouldn't be an exception probably
+ pure $ maybe (error $ "could not find object for module "
+ ++ moduleNameString (moduleName mod)
+ ++ " in "
+ ++ ar_file)
+ (BL.fromStrict . Ar.filedata) (find matchTag entries)
+ -- mapM_ (\e -> putStrLn ("found file: " ++ Ar.filename e)) entries
+
+{- | Static dependencies are symbols that need to be linked regardless
+ of whether the linked program refers to them. For example
+ dependencies that the RTS uses or symbols that the user program
+ refers to directly
+ -}
+newtype StaticDeps =
+ StaticDeps { unStaticDeps :: [(ShortText, ShortText)] -- module/symbol
+ }
+
+noStaticDeps :: StaticDeps
+noStaticDeps = StaticDeps []
+
+{- | The input file format for static deps is a yaml document with a
+ package/module/symbol tree where symbols can be either a list or
+ just a single string, for example:
+
+ base:
+ GHC.Conc.Sync: reportError
+ Control.Exception.Base: nonTermination
+ ghcjs-prim:
+ GHCJS.Prim:
+ - JSVal
+ - JSException
+ -}
+-- instance FromJSON StaticDeps where
+-- parseJSON (Object v) = StaticDeps . concat <$> mapM (uncurry parseMod) (HM.toList v)
+-- where
+-- parseMod p (Object v) = concat <$> mapM (uncurry (parseSymb p)) (HM.toList v)
+-- parseMod _ _ = mempty
+-- parseSymb p m (String s) = pure [(p,m,s)]
+-- parseSymb p m (Array v) = mapM (parseSingleSymb p m) (V.toList v)
+-- parseSymb _ _ _ = mempty
+-- parseSingleSymb p m (String s) = pure (p,m,s)
+-- parseSingleSymb _ _ _ = mempty
+-- parseJSON _ = mempty
+
+-- | dependencies for the RTS, these need to be always linked
+rtsDeps :: HscEnv -> [UnitId] -> IO ([UnitId], Set ExportedFun)
+rtsDeps hsc_env pkgs = readSystemDeps hsc_env pkgs "rtsdeps.yaml"
+
+-- | dependencies for the Template Haskell, these need to be linked when running
+-- Template Haskell (in addition to the RTS deps)
+thDeps :: HscEnv -> [UnitId] -> IO ([UnitId], Set ExportedFun)
+thDeps hsc_env pkgs = readSystemDeps hsc_env pkgs "thdeps.yaml"
+
+-- FIXME: Jeff (2022,03): fill in the ?
+-- | A helper function to read system dependencies that are hardcoded via a file
+-- path.
+readSystemDeps :: HscEnv -- ^ HS Env
+ -> [UnitId] -- ^ Packages to ??
+ -> FilePath -- ^ File to read
+ -> IO ([UnitId], Set ExportedFun)
+readSystemDeps hsc_env pkgs file = do
+ (deps_pkgs, deps_funs) <- readSystemDeps' hsc_env file
+ pure ( filter (`S.member` linked_pkgs) deps_pkgs
+ , S.filter (\fun ->
+ moduleUnitId (funModule fun) `S.member` linked_pkgs) deps_funs
+ )
+
+ where
+ -- FIXME: Jeff (2022,03): Each time we _do not_ use a list like a stack we
+ -- gain evidence that we should be using a different data structure. @pkgs@
+ -- is the list in question here
+ linked_pkgs = S.fromList pkgs
+
+
+readSystemDeps' :: HscEnv
+ -> FilePath
+ -> IO ([UnitId], Set ExportedFun)
+readSystemDeps' hsc_env file
+ -- hardcode contents to get rid of yaml dep
+ -- XXX move runTHServer to some suitable wired-in package
+ -- FIXME: Jeff (2022,03): Use types not string matches, These should be
+ -- wired-in just like in GHC and thus we should make them top level
+ -- definitions
+ | file == "thdeps.yaml" = pure ( [stringToUnitId "base"]
+ , S.fromList $ d "base" "GHCJS.Prim.TH.Eval" ["runTHServer"])
+ | file == "rtsdeps.yaml" = pure ( [stringToUnitId "base"
+ , stringToUnitId "ghc-prim"
+ , stringToUnitId "integer-wired-in"
+ ]
+ , S.fromList $ concat
+ [ d "base" "GHC.Conc.Sync" ["reportError"]
+ , d "base" "Control.Exception.Base" ["nonTermination"]
+ , d "base" "GHC.Exception.Type" ["SomeException"]
+ , d "base" "GHC.TopHandler" ["runMainIO", "topHandler"]
+ , d "base" "GHC.Base" ["$fMonadIO"]
+ , d "base" "GHC.Maybe" ["Nothing", "Just"]
+ , d "base" "GHC.Ptr" ["Ptr"]
+ , d "ghc-prim" "GHC.Types" [":", "[]"]
+ , d "ghc-prim" "GHC.Tuple" ["(,)", "(,,)", "(,,,)", "(,,,,)", "(,,,,,)","(,,,,,,)", "(,,,,,,,)", "(,,,,,,,,)", "(,,,,,,,,,)"]
+ , d "integer-wired-in" "GHC.Integer.Type" ["S#", "Jp#", "Jn#"]
+ , d "ghc-prim" "GHC.Types" [ "JSVal" ]
+ , d "base" "GHCJS.Prim" ["JSException", "$fShowJSException", "$fExceptionJSException", "resolve", "resolveIO", "toIO"]
+ , d "base" "GHCJS.Prim.Internal" ["wouldBlock", "blockedIndefinitelyOnMVar", "blockedIndefinitelyOnSTM", "ignoreException", "setCurrentThreadResultException", "setCurrentThreadResultValue"]
+ ]
+ )
+ | otherwise = pure (mempty, mempty)
+ where
+
+ d :: String -> String -> [String] -> [ExportedFun]
+ d pkg mod symbols = map (let pkg_module = mkJsModule pkg
+ in ExportedFun pkg_module
+ . mkHaskellSym pkg_module (T.pack mod)
+ . T.pack)
+ symbols
+ zenc = T.pack . zEncodeString . T.unpack
+
+ mkHaskellSym :: Module -> ShortText -> ShortText -> ShortText
+ mkHaskellSym mod m s = "h$" <> zenc (T.pack (encodeModule hsc_env mod)
+ <> ":"
+ <> m
+ <> "."
+ <> s)
+ mkJsModule :: String -> GenModule Unit
+ mkJsModule pkg = mkModule (RealUnit (Definite (stringToUnitId pkg))) (mkModuleName pkg)
+
+{-
+ b <- readBinaryFile (getLibDir dflags </> file)
+ wi <- readSystemWiredIn dflags
+ case Yaml.decodeEither b of
+ Left err -> panic $ "could not read " ++ depsName ++
+ " dependencies from " ++ file ++ ":\n" ++ err
+ Right sdeps ->
+ let (StaticDeps unresolved, pkgs, funs) = staticDeps dflags wi sdeps
+ in case unresolved of
+ ((p,_,_):_) ->
+ panic $ "Package `" ++ T.unpack p ++ "' is required for " ++
+ requiredFor ++ ", but was not found"
+ _ ->
+ -- putStrLn "system dependencies:"
+ -- print (map installedUnitIdString pkgs, funs)
+ return (pkgs, funs)
+
+-}
+
+readSystemWiredIn :: HscEnv -> IO [(ShortText, UnitId)]
+readSystemWiredIn _ = pure [] -- XXX
+{-
+readSystemWiredIn dflags = do
+ b <- B.readFile filename
+ case Yaml.decodeEither b of
+ Left _err -> error $ "could not read wired-in package keys from " ++ filename
+ Right m -> return . M.toList
+ . M.union ghcWiredIn -- GHC wired-in package keys override those in the file
+ . fmap stringToUnitId $ m
+ where
+ filename = getLibDir dflags </> "wiredinkeys" <.> "yaml"
+ ghcWiredIn :: Map Text UnitId
+ ghcWiredIn = M.fromList $ map (\k -> (T.pack (installedUnitIdString k), k))
+ (map toUnitId wiredInUnitIds)
+ -}
+{- | read a static dependencies specification and give the roots
+
+ if dependencies come from a versioned (non-hardwired) package
+ that is linked multiple times, then the returned dependencies
+ will all come from the same version, but it's undefined which one.
+ -}
+
+type SDep = (ShortText, ShortText) -- ^ module/symbol
+
+staticDeps :: HscEnv
+ -> [(ShortText, Module)] -- ^ wired-in package names / keys
+ -> StaticDeps -- ^ deps from yaml file
+ -> (StaticDeps, Set UnitId, Set ExportedFun)
+ -- ^ the StaticDeps contains the symbols
+ -- for which no package could be found
+staticDeps hsc_env wiredin sdeps = mkDeps sdeps
+ where
+ zenc = T.pack . zEncodeString . T.unpack
+ u_st = ue_units $ hsc_unit_env hsc_env
+ mkDeps (StaticDeps ds) =
+ -- FIXME: Jeff (2022,03): this foldl' will leak memory due to the tuple
+ -- and in the list in the fst position because the list is neither spine
+ -- nor value strict. So the WHNF computed by foldl' will by a 3-tuple with
+ -- 3 thunks and the WHNF for the list will be a cons cell
+ let (u, p, r) = foldl' resolveDep ([], S.empty, S.empty) ds
+ in (StaticDeps u, closePackageDeps u_st p, r)
+ resolveDep :: ([SDep], Set UnitId, Set ExportedFun)
+ -> SDep
+ -> ([SDep], Set UnitId, Set ExportedFun)
+ resolveDep (unresolved, pkgs, resolved) dep@(mod_name, s) =
+ -- lookup our module in wiredin names
+ case lookup mod_name wiredin of
+ -- we didn't find the module in wiredin so add to unresolved
+ Nothing -> ( dep : unresolved, pkgs, resolved)
+ -- this is a wired in module
+ Just mod ->
+ let mod_uid = moduleUnitId mod
+ in case lookupUnitId u_st mod_uid of
+ -- couldn't find the uid for this wired in package so explode
+ Nothing -> pprPanic ("Package key for wired-in dependency could not be found.`"
+ ++ "I looked for: "
+ ++ T.unpack mod_name
+ ++ " receieved " ++ moduleNameString (moduleName mod)
+ ++ " but could not find: " ++ unitString mod_uid
+ ++ " in the UnitState."
+ ++ " Here is too much info for you: ")
+ $ pprWithUnitState u_st (ppr mod)
+ -- we are all good, add the uid to the package set, construct
+ -- its symbols on the fly and add the module to exported symbol
+ -- set
+ Just _ -> ( unresolved
+ , S.insert mod_uid pkgs
+ , S.insert (ExportedFun mod
+ $ mkSymb mod mod_name s) resolved
+ )
+ -- confusingly with the new ghc api we now use Module where we formerly had
+ -- Package, so this becomes Module -> Module -> Symbol where the first
+ -- Module is GHC's module type and the second is the SDep Moudle read as a
+ -- ShortText
+ -- FIXME: Jeff (2022,03): should mkSymb be in the UnitUtils?
+ mkSymb :: Module -> ShortText -> ShortText -> ShortText
+ mkSymb p m s =
+ "h$" <> zenc (T.pack (encodeModule hsc_env p) <> ":" <> m <> "." <> s)
+
+closePackageDeps :: UnitState -> Set UnitId -> Set UnitId
+closePackageDeps u_st pkgs
+ | S.size pkgs == S.size pkgs' = pkgs
+ | otherwise = closePackageDeps u_st pkgs'
+ where
+ pkgs' = pkgs `S.union` S.fromList (concatMap deps $ S.toList pkgs)
+ notFound = error "closePackageDeps: package not found"
+ deps :: UnitId -> [UnitId]
+ deps = unitDepends
+ . fromMaybe notFound
+ . lookupUnitId u_st
+
+-- read all dependency data from the to-be-linked files
+loadObjDeps :: [LinkedObj] -- ^ object files to link
+ -> IO (Map Module (Deps, DepsLocation), [LinkableUnit])
+loadObjDeps objs = prepareLoadedDeps <$> mapM readDepsFile' objs
+
+loadArchiveDeps :: GhcjsEnv
+ -> [FilePath]
+ -> IO ( Map Module (Deps, DepsLocation)
+ , [LinkableUnit]
+ )
+loadArchiveDeps env archives = modifyMVar (linkerArchiveDeps env) $ \m ->
+ case M.lookup archives' m of
+ Just r -> return (m, r)
+ Nothing -> loadArchiveDeps' archives >>= \r -> return (M.insert archives' r m, r)
+ where
+ archives' = S.fromList archives
+
+loadArchiveDeps' :: [FilePath]
+ -> IO ( Map Module (Deps, DepsLocation)
+ , [LinkableUnit]
+ )
+loadArchiveDeps' archives = do
+ archDeps <- forM archives $ \file -> do
+ (Ar.Archive entries) <- Ar.loadAr file
+ pure (mapMaybe (readEntry file) entries)
+ return (prepareLoadedDeps $ concat archDeps)
+ where
+ readEntry :: FilePath -> Ar.ArchiveEntry -> Maybe (Deps, DepsLocation)
+ readEntry ar_file ar_entry
+ | isObjFile (Ar.filename ar_entry) =
+ fmap (,ArchiveFile ar_file)
+ (readDepsMaybe (ar_file ++ ':':Ar.filename ar_entry) (BL.fromStrict $ Ar.filedata ar_entry))
+ | otherwise = Nothing
+
+
+isObjFile :: FilePath -> Bool
+isObjFile file = ".o" `isSuffixOf` file || -- vanilla
+ "_o" `isSuffixOf` file -- some "Way", like .p_o
+
+prepareLoadedDeps :: [(Deps, DepsLocation)]
+ -> ( Map Module (Deps, DepsLocation)
+ , [LinkableUnit]
+ )
+prepareLoadedDeps deps =
+ let req = concatMap (requiredUnits . fst) deps
+ depsMap = M.fromList $ map (\d -> (depsModule (fst d), d)) deps
+ in (depsMap, req)
+
+requiredUnits :: Deps -> [LinkableUnit]
+requiredUnits d = map (depsModule d,) (IS.toList $ depsRequired d)
+
+-- read dependencies from an object that might have already been into memory
+-- pulls in all Deps from an archive
+readDepsFile' :: LinkedObj -> IO (Deps, DepsLocation)
+readDepsFile' (ObjLoaded name bs) = pure . (,InMemory name bs) $
+ readDeps name bs
+readDepsFile' (ObjFile file) =
+ (,ObjectFile file) <$> readDepsFile file
+
+generateBase :: FilePath -> Base -> IO ()
+generateBase outDir b =
+ BL.writeFile (outDir </> "out.base.symbs") (renderBase b)
+
diff --git a/compiler/GHC/StgToJS/Linker/Types.hs b/compiler/GHC/StgToJS/Linker/Types.hs
new file mode 100644
index 0000000000..742c0926c1
--- /dev/null
+++ b/compiler/GHC/StgToJS/Linker/Types.hs
@@ -0,0 +1,581 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PartialTypeSignatures #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.StgToJS.Linker.Types
+-- Copyright : (c) The University of Glasgow 2001
+-- License : BSD-style (see the file LICENSE)
+--
+-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io>
+-- Luite Stegeman <luite.stegeman@iohk.io>
+-- Sylvain Henry <sylvain.henry@iohk.io>
+-- Josh Meredith <josh.meredith@iohk.io>
+-- Stability : experimental
+--
+-- A base bundle is used for incremental linking. it contains information about
+-- the symbols that have already been linked. These symbols are not included
+-- again in the incrementally linked program.
+--
+-- The Base data structure contains the information we need to do incremental
+-- linking against a base bundle.
+--
+-- base file format:
+-- - GHCJSBASE
+-- - [renamer state]
+-- - [linkedPackages]
+-- - [packages]
+-- - [modules]
+-- - [symbols]
+--
+-- The base contains a CompactorState for consistent renaming of private names
+-- and packed initialization of info tables and static closures.
+
+----------------------------- FIXMEs -------------------------------------------
+-- - Find a better data structure for linkerArchiveDeps
+-- - Specialize Functor instances for helpers
+-- - Better name for Base
+-- - Remove unsafeShowSDoc
+-- - Better implementation for Monoid JSLinkConfig
+-- - Should we use (Messages String) or parameterize over (Messages e) in ThRunner?
+-- - Fix name collision between LinkableUnit type in this module and the LinkableUnit type in StgToJS.Types
+-----------------------------------------------------------------------------
+
+module GHC.StgToJS.Linker.Types where
+
+import GHC.JS.Syntax
+import GHC.StgToJS.Object
+import GHC.StgToJS.Types (ClosureInfo, StaticInfo)
+
+import GHC.Unit.Types
+import GHC.Utils.Panic
+import GHC.Utils.Outputable hiding ((<>))
+import GHC.Data.ShortText (ShortText)
+import qualified GHC.Data.ShortText as T
+import GHC.Driver.Env.Types (HscEnv)
+import GHC.Types.Error (Messages)
+
+import Control.Monad
+
+import Data.Array
+import qualified Data.Binary as DB
+import qualified Data.Binary.Get as DB
+import qualified Data.Binary.Put as DB
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Lazy as BL
+import Data.Map.Strict (Map)
+import qualified Data.Map.Strict as M
+import Data.Set (Set)
+import qualified Data.Set as S
+import qualified Data.IntMap as I
+
+import Control.Concurrent.MVar
+import qualified Control.Exception as E
+
+import System.IO
+import System.Process
+
+import Prelude
+
+newLocals :: [Ident]
+newLocals = filter (not . isJsKeyword) $
+ map (TxtI . T.pack) $
+ map (:[]) chars0 ++ concatMap mkIdents [1..]
+ where
+ mkIdents n = [c0:cs | c0 <- chars0, cs <- replicateM n chars]
+ chars0 = ['a'..'z']++['A'..'Z']
+ chars = chars0++['0'..'9']
+
+renamedVars :: [Ident]
+renamedVars = map (\(TxtI xs) -> TxtI ("h$$"<>xs)) newLocals
+
+--------------------------------------------------------------------------------
+-- CompactorState
+--------------------------------------------------------------------------------
+
+-- FIXME: Jeff (2022,03): These maps should be newtyped so we cannot confuse
+-- them and thus accidently construct hard to understand bugs. When we newtype
+-- we should use deriving via to avoid boilerplate
+data CompactorState = CompactorState
+ { csIdentSupply :: [Ident] -- ^ ident supply for new names
+ , csNameMap :: !(M.Map ShortText Ident) -- ^ renaming mapping for internal names
+ , csEntries :: !(M.Map ShortText Int) -- ^ entry functions (these get listed in the metadata init
+ -- array)
+ , csNumEntries :: !Int
+ , csStatics :: !(M.Map ShortText Int) -- ^ mapping of global closure -> index in current block,
+ -- for static initialisation
+ , csNumStatics :: !Int -- ^ number of static entries
+ , csLabels :: !(M.Map ShortText Int) -- ^ non-Haskell JS labels
+ , csNumLabels :: !Int -- ^ number of labels
+ , csParentEntries :: !(M.Map ShortText Int) -- ^ entry functions we're not linking, offset where parent
+ -- gets [0..n], grandparent [n+1..k] etc
+ , csParentStatics :: !(M.Map ShortText Int) -- ^ objects we're not linking in base bundle
+ , csParentLabels :: !(M.Map ShortText Int) -- ^ non-Haskell JS labels in parent
+ , csStringTable :: !StringTable
+ } deriving (Show)
+
+data StringTable = StringTable
+ { stTableIdents :: !(Array Int ShortText)
+ , stOffsets :: !(M.Map ByteString (Int, Int)) -- ^ content of the table
+ , stIdents :: !(M.Map ShortText (Either Int Int)) -- ^ identifiers in the table
+ } deriving (Show)
+
+instance DB.Binary StringTable where
+ put (StringTable tids offs idents) = do
+ DB.put tids
+ DB.put (M.toList offs)
+ DB.put (M.toList idents)
+ get = StringTable <$> DB.get
+ <*> fmap M.fromList DB.get
+ <*> fmap M.fromList DB.get
+
+emptyStringTable :: StringTable
+emptyStringTable = StringTable (listArray (0,-1) []) M.empty M.empty
+
+-- FIXME: Jeff: (2022,03): Each of these helper functions carry a Functor f
+-- constraint. We should specialize these once we know how they are used
+entries :: Functor f
+ => (M.Map ShortText Int -> f (M.Map ShortText Int))
+ -> CompactorState
+ -> f CompactorState
+entries f cs = fmap (\x -> cs { csEntries = x }) (f $ csEntries cs)
+{-# INLINE entries #-}
+
+identSupply :: Functor f
+ => ([Ident] -> f [Ident])
+ -> CompactorState
+ -> f CompactorState
+identSupply f cs = fmap (\x -> cs { csIdentSupply = x }) (f $ csIdentSupply cs)
+{-# INLINE identSupply #-}
+
+labels :: Functor f
+ => (M.Map ShortText Int -> f (M.Map ShortText Int))
+ -> CompactorState
+ -> f CompactorState
+labels f cs = fmap (\x -> cs { csLabels = x }) (f $ csLabels cs)
+{-# INLINE labels #-}
+
+nameMap :: Functor f
+ => (M.Map ShortText Ident -> f (M.Map ShortText Ident))
+ -> CompactorState
+ -> f CompactorState
+nameMap f cs = fmap (\x -> cs { csNameMap = x }) (f $ csNameMap cs)
+{-# INLINE nameMap #-}
+
+numEntries :: Functor f
+ => (Int -> f Int)
+ -> CompactorState
+ -> f CompactorState
+numEntries f cs = fmap (\x -> cs { csNumEntries = x }) (f $ csNumEntries cs)
+{-# INLINE numEntries #-}
+
+numLabels :: Functor f
+ => (Int -> f Int)
+ -> CompactorState
+ -> f CompactorState
+numLabels f cs = fmap (\x -> cs { csNumLabels = x }) (f $ csNumLabels cs)
+{-# INLINE numLabels #-}
+
+numStatics :: Functor f
+ => (Int -> f Int)
+ -> CompactorState
+ -> f CompactorState
+numStatics f cs = fmap (\x -> cs { csNumStatics = x }) (f $ csNumStatics cs)
+{-# INLINE numStatics #-}
+
+parentEntries :: Functor f
+ => (M.Map ShortText Int -> f (M.Map ShortText Int))
+ -> CompactorState
+ -> f CompactorState
+parentEntries f cs = fmap (\x -> cs { csParentEntries = x }) (f $ csParentEntries cs)
+{-# INLINE parentEntries #-}
+
+parentLabels :: Functor f
+ => (M.Map ShortText Int -> f (M.Map ShortText Int))
+ -> CompactorState
+ -> f CompactorState
+parentLabels f cs = fmap (\x -> cs { csParentLabels = x }) (f $ csParentLabels cs)
+{-# INLINE parentLabels #-}
+
+parentStatics :: Functor f
+ => (M.Map ShortText Int -> f (M.Map ShortText Int))
+ -> CompactorState
+ -> f CompactorState
+parentStatics f cs = fmap (\x -> cs { csParentStatics = x }) (f $ csParentStatics cs)
+{-# INLINE parentStatics #-}
+
+statics :: Functor f
+ => (M.Map ShortText Int -> f (M.Map ShortText Int))
+ -> CompactorState
+ -> f CompactorState
+statics f cs = fmap (\x -> cs { csStatics = x }) (f $ csStatics cs)
+{-# INLINE statics #-}
+
+stringTable :: Functor f
+ => (StringTable -> f StringTable)
+ -> CompactorState
+ -> f CompactorState
+stringTable f cs = fmap (\x -> cs { csStringTable = x }) (f $ csStringTable cs)
+{-# INLINE stringTable #-}
+
+emptyCompactorState :: CompactorState
+emptyCompactorState = CompactorState renamedVars
+ mempty
+ mempty
+ 0
+ mempty
+ 0
+ mempty
+ 0
+ mempty
+ mempty
+ mempty
+ emptyStringTable
+
+-- | make a base state from a CompactorState: empty the current symbols sets,
+-- move everything to the parent
+makeCompactorParent :: CompactorState -> CompactorState
+makeCompactorParent (CompactorState is nm es nes ss nss ls nls pes pss pls sts)
+ = CompactorState is
+ nm
+ M.empty 0
+ M.empty 0
+ M.empty 0
+ (M.union (fmap (+nes) pes) es)
+ (M.union (fmap (+nss) pss) ss)
+ (M.union (fmap (+nls) pls) ls)
+ sts
+
+-- Helper functions used in Linker.Compactor. We live with some redundant code
+-- to avoid the lens mayhem in Gen2 GHCJS. TODO: refactor to avoid redundant
+-- code
+addStaticEntry :: ShortText -- ^ The static entry to add
+ -> CompactorState -- ^ the old state
+ -> CompactorState -- ^ the new state
+addStaticEntry new cs =
+ -- check if we have seen new before
+ let cur_statics = csStatics cs
+ go = M.lookup new cur_statics >> M.lookup new (csParentStatics cs)
+ in case go of
+ Just _ -> cs -- we have so return
+ Nothing -> let cnt = csNumStatics cs -- we haven't so do the business
+ newStatics = M.insert new cnt cur_statics
+ newCnt = cnt + 1
+ in cs {csStatics = newStatics, csNumStatics = newCnt}
+
+addEntry :: ShortText -- ^ The entry function to add
+ -> CompactorState -- ^ the old state
+ -> CompactorState -- ^ the new state
+addEntry new cs =
+ let cur_entries = csEntries cs
+ go = M.lookup new cur_entries >> M.lookup new (csParentEntries cs)
+ in case go of
+ Just _ -> cs
+ Nothing -> let cnt = csNumEntries cs
+ newEntries = M.insert new cnt cur_entries
+ newCnt = cnt + 1
+ in cs {csEntries = newEntries, csNumEntries = newCnt}
+
+addLabel :: ShortText -- ^ The label to add
+ -> CompactorState -- ^ the old state
+ -> CompactorState -- ^ the new state
+addLabel new cs =
+ let cur_lbls = csLabels cs
+ go = M.lookup new cur_lbls >> M.lookup new (csParentLabels cs)
+ in case go of
+ Just _ -> cs
+ Nothing -> let cnt = csNumLabels cs
+ newLabels = M.insert new cnt cur_lbls
+ newCnt = cnt + 1
+ in cs {csEntries = newLabels, csNumLabels = newCnt}
+--------------------------------------------------------------------------------
+-- Base
+--------------------------------------------------------------------------------
+
+-- FIXME: Jeff (2022,03): Pick a better name than Base, and should baseUnits be
+-- Set UnitId and basePkgs be [PackageId]? I'm unsure if this should hold
+-- UnitIds or UnitInfos or PackageIds or PackageNames
+-- | The Base bundle. Used for incremental linking it maintains the compactor
+-- state the base packages and units.
+data Base = Base { baseCompactorState :: CompactorState
+ , basePkgs :: [UnitId]
+ , baseUnits :: Set (Module, Int)
+ }
+
+instance DB.Binary Base where
+ get = getBase "<unknown file>"
+ put = putBase
+
+showBase :: Base -> String
+showBase b = unlines
+ [ "Base:"
+ , " packages: " ++ showSDocUnsafe (ppr (basePkgs b)) -- FIXME: Jeff (2022,03): Either use the sdoc context in the StgToJS
+ -- config or find a better way than showSDocUnsafe
+ , " number of units: " ++ show (S.size $ baseUnits b)
+ , " renaming table size: " ++
+ show (M.size . csNameMap . baseCompactorState $ b)
+ ]
+
+emptyBase :: Base
+emptyBase = Base emptyCompactorState [] S.empty
+
+putBase :: Base -> DB.Put
+putBase (Base cs packages funs) = do
+ DB.putByteString "GHCJSBASE"
+ DB.putLazyByteString versionTag
+ putCs cs
+ putList DB.put packages
+ -- putList putPkg pkgs
+ putList DB.put mods
+ putList putFun (S.toList funs)
+ where
+ pi :: Int -> DB.Put
+ pi = DB.putWord32le . fromIntegral
+ uniq :: Ord a => [a] -> [a]
+ uniq = S.toList . S.fromList -- FIXME: Ick! Just use the Set in the first place!
+ -- pkgs = uniq (map fst $ S.toList funs)
+ -- pkgsM = M.fromList (zip pkgs [(0::Int)..])
+ mods = uniq (map fst $ S.toList funs)
+ modsM = M.fromList (zip mods [(0::Int)..])
+ putList f xs = pi (length xs) >> mapM_ f xs
+ -- serialise the compactor state
+ putCs (CompactorState [] _ _ _ _ _ _ _ _ _ _ _) =
+ panic "putBase: putCs exhausted renamer symbol names"
+ putCs (CompactorState (ns:_) nm es _ ss _ ls _ pes pss pls sts) = do
+ DB.put ns
+ DB.put (M.toList nm)
+ DB.put (M.toList es)
+ DB.put (M.toList ss)
+ DB.put (M.toList ls)
+ DB.put (M.toList pes)
+ DB.put (M.toList pss)
+ DB.put (M.toList pls)
+ DB.put sts
+ -- putPkg mod = DB.put mod
+ -- fixme group things first
+ putFun (m,s) = --pi (pkgsM M.! p) >>
+ pi (modsM M.! m) >> DB.put s
+
+getBase :: FilePath -> DB.Get Base
+getBase file = getBase'
+ where
+ gi :: DB.Get Int
+ gi = fromIntegral <$> DB.getWord32le
+ getList f = DB.getWord32le >>= \n -> replicateM (fromIntegral n) f
+ getFun ms = (,) <$>
+ -- ((ps!) <$> gi) <*>
+ ((ms!) <$> gi) <*> DB.get
+ la xs = listArray (0, length xs - 1) xs
+ -- getPkg = DB.get
+ getCs = do
+ n <- DB.get
+ nm <- M.fromList <$> DB.get
+ es <- M.fromList <$> DB.get
+ ss <- M.fromList <$> DB.get
+ ls <- M.fromList <$> DB.get
+ pes <- M.fromList <$> DB.get
+ pss <- M.fromList <$> DB.get
+ pls <- M.fromList <$> DB.get
+ CompactorState (dropWhile (/=n) renamedVars)
+ nm
+ es
+ (M.size es)
+ ss
+ (M.size ss)
+ ls
+ (M.size ls)
+ pes
+ pss
+ pls <$> DB.get
+ getBase' = do
+ hdr <- DB.getByteString 9
+ when (hdr /= "GHCJSBASE")
+ (panic $ "getBase: invalid base file: " <> file)
+ vt <- DB.getLazyByteString (fromIntegral versionTagLength)
+ when (vt /= versionTag)
+ (panic $ "getBase: incorrect version: " <> file)
+ cs <- makeCompactorParent <$> getCs
+ linkedPackages <- getList DB.get
+ -- pkgs <- la <$> getList getPkg
+ mods <- la <$> getList DB.get
+ funs <- getList (getFun mods)
+ return (Base cs linkedPackages $ S.fromList funs)
+
+-- | lazily render the base metadata into a bytestring
+renderBase :: Base -> BL.ByteString
+renderBase = DB.runPut . putBase
+
+-- | lazily load base metadata from a file, see @UseBase@.
+loadBase :: FilePath -> IO Base
+loadBase file = DB.runGet (getBase file) <$> BL.readFile file
+
+-- | There are 3 ways the linker can use @Base@. We can not use it, and thus not
+-- do any incremental linking. We can load it from a file, where we assume that
+-- the symbols from the bundle and their dependencies have already been loaded.
+-- In this case We must save the CompactorState so that we can do consistent
+-- renaming. Or we can use a Base that is already in memory.
+--
+-- Incremental linking greatly improves link time and can also be used in
+-- multi-page or repl-type applications to serve most of the code from a static
+-- location, reloading only the small parts that are actually different.
+data UseBase = NoBase -- ^ don't use incremental linking
+ | BaseFile FilePath -- ^ load base from file
+ | BaseState Base -- ^ use this base
+
+instance Show UseBase where
+ show NoBase = "NoBase"
+ show BaseFile {} = "BaseFile"
+ show BaseState {} = "BaseState"
+
+instance Monoid UseBase where
+ mempty = NoBase
+
+instance Semigroup UseBase where
+ x <> NoBase = x
+ _ <> x = x
+
+--------------------------------------------------------------------------------
+-- Linker Config
+-- TODO: Jeff: (2022,03): Move to separate module? Linker.Config? Or Merge with StgToJSConfig?
+--------------------------------------------------------------------------------
+
+data JSLinkConfig =
+ JSLinkConfig { lcNativeExecutables :: Bool
+ , lcNativeToo :: Bool
+ , lcBuildRunner :: Bool
+ , lcNoJSExecutables :: Bool
+ , lcNoHsMain :: Bool
+ , lcStripProgram :: Maybe FilePath
+ , lcLogCommandLine :: Maybe FilePath
+ , lcGhc :: Maybe FilePath
+ , lcOnlyOut :: Bool
+ , lcNoRts :: Bool
+ , lcNoStats :: Bool
+ , lcGenBase :: Maybe Module -- ^ base module
+ , lcUseBase :: UseBase
+ , lcLinkJsLib :: Maybe String
+ , lcJsLibOutputDir :: Maybe FilePath
+ , lcJsLibSrcs :: [FilePath]
+ , lcDedupe :: Bool
+ }
+
+usingBase :: JSLinkConfig -> Bool
+usingBase s | NoBase <- lcUseBase s = False
+ | otherwise = True
+
+-- | we generate a runnable all.js only if we link a complete application,
+-- no incremental linking and no skipped parts
+generateAllJs :: JSLinkConfig -> Bool
+generateAllJs s
+ | NoBase <- lcUseBase s = not (lcOnlyOut s) && not (lcNoRts s)
+ | otherwise = False
+
+{-
+ -- FIXME: Jeff (2022,03): This instance is supposed to capture overriding
+ -- settings, where one group comes from the environment (env vars, config
+ -- files) and the other from the command line. (env `mappend` cmdLine) should
+ -- give the combined settings, but it doesn't work very well. find something
+ -- better.
+ -}
+instance Monoid JSLinkConfig where
+ -- FIXME: Jeff (2022,03): Adding no hs main to config, should False be default
+ -- here?
+ mempty = JSLinkConfig False False False False False
+ Nothing Nothing Nothing False
+ False False Nothing NoBase
+ Nothing Nothing mempty False
+
+instance Semigroup JSLinkConfig where
+ (<>) (JSLinkConfig ne1 nn1 bc1 nj1 noHs1 sp1 lc1 gh1 oo1 nr1 ns1 gb1 ub1 ljsl1 jslo1 jslsrc1 dd1)
+ (JSLinkConfig ne2 nn2 bc2 nj2 noHs2 sp2 lc2 gh2 oo2 nr2 ns2 gb2 ub2 ljsl2 jslo2 jslsrc2 dd2) =
+ JSLinkConfig (ne1 || ne2)
+ (nn1 || nn2)
+ (bc1 || bc2)
+ (nj1 || nj2)
+ (noHs1 || noHs2)
+ (sp1 `mplus` sp2)
+ (lc1 `mplus` lc2)
+ (gh1 `mplus` gh2)
+ (oo1 || oo2)
+ (nr1 || nr2)
+ (ns1 || ns2)
+ (gb1 `mplus` gb2)
+ (ub1 <> ub2)
+ (ljsl1 <> ljsl2)
+ (jslo1 <> jslo2)
+ (jslsrc1 <> jslsrc2)
+ (dd1 || dd2)
+
+--------------------------------------------------------------------------------
+-- Linker Environment
+-- TODO: Jeff: (2022,03): Move to separate module, same as Config?
+--------------------------------------------------------------------------------
+-- | A LinkableUnit is a pair of a module and the index of the block in the
+-- object file
+-- FIXME: Jeff: (2022,03): Refactor to avoid name collision between
+-- StgToJS.Linker.Types.LinkableUnit and StgToJS.Types.LinkableUnit
+type LinkableUnit = (Module, Int)
+
+type LinkedUnit = (JStat, [ClosureInfo], [StaticInfo])
+
+-- TODO: Jeff: (2022,03): Where to move LinkedObj
+-- | An object file that's either already in memory (with name) or on disk
+data LinkedObj = ObjFile FilePath -- ^ load from this file
+ | ObjLoaded String BL.ByteString -- ^ already loaded: description and payload
+ deriving (Eq, Ord, Show)
+
+data GhcjsEnv = GhcjsEnv
+ { compiledModules :: MVar (Map Module ByteString) -- ^ keep track of already compiled modules so we don't compile twice for dynamic-too
+ , thRunners :: MVar THRunnerState -- (Map String ThRunner) -- ^ template haskell runners
+ , thSplice :: MVar Int
+ -- FIXME: Jeff a Map keyed on a Set is going to be quite costly. The Eq
+ -- instance over Sets _can_ be fast if the sets are different sizes, this
+ -- would be O(1), however if they are equal size then we incur a costly
+ -- converstion to an Ascending List O(n) and then perform the element wise
+ -- check hence O(mn) where m is the cost of the element check. Thus, we should
+ -- fix this data structure and use something more efficient, HashMap if
+ -- available, IntMap if possible. Nested maps, in particular, seem like a
+ -- design smell.
+ , linkerArchiveDeps :: MVar (Map (Set FilePath)
+ (Map Module (Deps, DepsLocation)
+ , [LinkableUnit]
+ )
+ )
+ , pluginState :: MVar (Maybe HscEnv)
+ }
+
+data THRunnerState = THRunnerState
+ { activeRunners :: Map String THRunner
+ , idleRunners :: [THRunner]
+ }
+
+data THRunner =
+ THRunner { thrProcess :: ProcessHandle
+ , thrHandleIn :: Handle
+ , thrHandleErr :: Handle
+ , thrBase :: MVar Base
+ -- FIXME: Jeff (2022,03): Is String the right type here? I chose it
+ -- because it was easy but I am unsure what the needs of its consumer
+ -- are.
+ , thrRecover :: MVar [Messages String]
+ , thrExceptions :: MVar (I.IntMap E.SomeException)
+ }
+
+consIdleRunner :: THRunner -> THRunnerState -> THRunnerState
+consIdleRunner r s = s { idleRunners = r : idleRunners s }
+
+unconsIdleRunner :: THRunnerState -> Maybe (THRunner, THRunnerState)
+unconsIdleRunner s
+ | (r:xs) <- idleRunners s = Just (r, s { idleRunners = xs })
+ | otherwise = Nothing
+
+deleteActiveRunner :: String -> THRunnerState -> THRunnerState
+deleteActiveRunner m s =
+ s { activeRunners = M.delete m (activeRunners s) }
+
+insertActiveRunner :: String -> THRunner -> THRunnerState -> THRunnerState
+insertActiveRunner m runner s =
+ s { activeRunners = M.insert m runner (activeRunners s) }
+
+emptyTHRunnerState :: THRunnerState
+emptyTHRunnerState = THRunnerState mempty mempty
diff --git a/compiler/GHC/StgToJS/Linker/Utils.hs b/compiler/GHC/StgToJS/Linker/Utils.hs
new file mode 100644
index 0000000000..81598f07a4
--- /dev/null
+++ b/compiler/GHC/StgToJS/Linker/Utils.hs
@@ -0,0 +1,101 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.StgToJS.Linker.Utils
+-- Copyright : (c) The University of Glasgow 2001
+-- License : BSD-style (see the file LICENSE)
+--
+-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io>
+-- Luite Stegeman <luite.stegeman@iohk.io>
+-- Sylvain Henry <sylvain.henry@iohk.io>
+-- Josh Meredith <josh.meredith@iohk.io>
+-- Stability : experimental
+--
+-- Various utilies used in the JS Linker
+--
+----------------------------- FIXMEs -------------------------------------------
+-- - implement Windows check for @addExeExtension@
+-- - resolve macOS comment in @writeBinaryFile@
+-- - remove redundant function @jsExeFileName@
+-----------------------------------------------------------------------------
+
+module GHC.StgToJS.Linker.Utils where
+
+import System.FilePath
+import qualified Data.ByteString as B
+import Data.ByteString (ByteString)
+import System.IO (withBinaryFile, IOMode(WriteMode))
+
+import GHC.Driver.Session
+import GHC.Settings.Config (cProjectVersion)
+
+import GHC.Data.ShortText
+import GHC.Unit.State
+import GHC.Unit.Types
+import GHC.Utils.Panic
+
+import Prelude
+import GHC.Platform
+import Data.List (isPrefixOf)
+
+addExeExtension :: FilePath -> FilePath
+addExeExtension = id
+{- FIXME: Jeff (2022,03): with FIXME: after Windows FIXME in Linker, fix this too
+ | Platform.isWindows = (<.> "exe")
+ | otherwise = id
+-}
+
+{-
+ macOS has trouble writing more than 2GiB at once to a file
+ (tested with 10.14.6), and the base library doesn't work around this
+ problem yet (tested with GHC 8.6), so we work around it here.
+
+ in this workaround we write a binary file in chunks of 1 GiB
+ FIXME: Jeff (2022,03): Is this still true?
+ -}
+writeBinaryFile :: FilePath -> ByteString -> IO ()
+writeBinaryFile file bs =
+ withBinaryFile file WriteMode $ \h -> mapM_ (B.hPut h) (chunks bs)
+ where
+ -- split the ByteString into a nonempty list of chunks of at most 1GiB
+ chunks :: ByteString -> [ByteString]
+ chunks b =
+ let (b1, b2) = B.splitAt 1073741824 b
+ in b1 : if B.null b1 then [] else chunks b2
+
+getInstalledPackageLibDirs :: UnitState -> UnitId -> [FilePath]
+getInstalledPackageLibDirs us = fmap unpack . maybe mempty unitLibraryDirs . lookupUnitId us
+
+getInstalledPackageHsLibs :: UnitState -> UnitId -> [String]
+getInstalledPackageHsLibs us = fmap unpack . maybe mempty unitLibraries . lookupUnitId us
+
+tryReadShimFile :: DynFlags -> FilePath -> IO B.ByteString
+tryReadShimFile = panic "tryReadShimFile: Shims not yet implemented!"
+
+readShimsArchive :: DynFlags -> FilePath -> IO B.ByteString
+readShimsArchive = panic "readShimsArchive: Shims not yet implemented!"
+
+getCompilerVersion :: String
+getCompilerVersion = cProjectVersion
+
+jsexeExtension :: String
+jsexeExtension = "jsexe"
+
+-- FIXME: Jeff (2022,04): remove this function since it is a duplicate of
+-- GHC.Linker.Static.Utils.exeFileName
+jsExeFileName :: DynFlags -> FilePath
+jsExeFileName dflags
+ | Just s <- outputFile_ dflags =
+ -- unmunge the extension
+ let s' = dropPrefix "js_" (drop 1 $ takeExtension s)
+ -- FIXME: add this check when support for Windows check is added
+ in if Prelude.null s' -- || (Platform.isWindows && map toLower s' == "exe")
+ then dropExtension s <.> jsexeExtension
+ else dropExtension s <.> s'
+ | otherwise =
+ if platformOS (targetPlatform dflags) == OSMinGW32
+ then "main.jsexe"
+ else "a.jsexe"
+ where
+ dropPrefix prefix xs
+ | prefix `isPrefixOf` xs = drop (length prefix) xs
+ | otherwise = xs
diff --git a/compiler/GHC/StgToJS/Object.hs b/compiler/GHC/StgToJS/Object.hs
index bb986926b1..11e62e2eb1 100644
--- a/compiler/GHC/StgToJS/Object.hs
+++ b/compiler/GHC/StgToJS/Object.hs
@@ -1,32 +1,53 @@
-{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE TypeSynonymInstances #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE DeriveGeneric #-}
-{-# LANGUAGE Rank2Types #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-
--- | Serialization/deserialization of binary .o files for the JavaScript backend
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE Rank2Types #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
+
+-- only for DB.Binary instances on Module see FIXME below
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.StgToJS.Object
+-- Copyright : (c) The University of Glasgow 2001
+-- License : BSD-style (see the file LICENSE)
--
--- The .o files contain dependency information and generated code.
+-- Maintainer : Sylvain Henry <sylvain.henry@iohk.io>
+-- Jeffrey Young <jeffrey.young@iohk.io>
+-- Luite Stegeman <luite.stegeman@iohk.io>
+-- Josh Meredith <josh.meredith@iohk.io>
+-- Stability : experimental
--
--- All strings are mapped to a central string table, which helps reduce
--- file size and gives us efficient hash consing on read
+-- Serialization/deserialization of binary .o files for the JavaScript backend
+-- The .o files contain dependency information and generated code.
+-- All strings are mapped to a central string table, which helps reduce
+-- file size and gives us efficient hash consing on read
--
--- Binary intermediate JavaScript object files:
--- serialized [Text] -> ([ClosureInfo], JStat) blocks
---
--- file layout:
--- - header ["GHCJSOBJ", length of symbol table, length of dependencies, length of index]
--- - compiler version tag
--- - symbol table
--- - dependency info
--- - closureinfo index
--- - closureinfo data (offsets described by index)
+-- Binary intermediate JavaScript object files:
+-- serialized [Text] -> ([ClosureInfo], JStat) blocks
--
+-- file layout:
+-- - header ["GHCJSOBJ", length of symbol table, length of dependencies, length of index]
+-- - compiler version tag
+-- - symbol table
+-- - dependency info
+-- - closureinfo index
+-- - closureinfo data (offsets described by index)
+
+-- FIXME: Jeff (2022,03): There are orphan instances for DB.Binary Module and
+-- ModuleName. These are needed in StgToJS.Linker.Types for @Base@ serialization
+-- in @putBase@. We end up in this situation because Base now holds a @Module@
+-- type instead of GHCJS's previous @Package@ type. In addition to this GHC uses
+-- GHC.Utils.Binary for binary instances rather than Data.Binary (even though
+-- Data.Binary is a boot lib) so to fix the situation we must:
+-- - 1. Choose to use GHC.Utils.Binary or Data.Binary
+-- - 2. Remove Objectable since this is redundant
+-- - 3. Adapt the Linker types, like Base to the new Binary methods
+-----------------------------------------------------------------------------
+
module GHC.StgToJS.Object
( object
, object'
@@ -47,7 +68,11 @@ module GHC.StgToJS.Object
, Header(..), getHeader, moduleNameTag
, SymbolTable
, ObjUnit (..)
- , Deps (..), BlockDeps (..)
+ -- FIXME: Jeff (2022,03): These exports are just for Base use in Linker.Types
+ , Objectable(..)
+ , PutS
+ -- end exports for Linker.Types
+ , Deps (..), BlockDeps (..), DepsLocation (..)
, ExpFun (..), ExportedFun (..)
, versionTag, versionTagLength
)
@@ -112,9 +137,15 @@ data Deps = Deps
{ depsModule :: !Module -- ^ module
, depsRequired :: !IntSet -- ^ blocks that always need to be linked when this object is loaded (e.g. everything that contains initializer code or foreign exports)
, depsHaskellExported :: !(Map ExportedFun Int) -- ^ exported Haskell functions -> block
- , depsBlocks :: !(Array Int BlockDeps) -- ^ info about each block
+ , depsBlocks :: !(Array Int BlockDeps) -- ^ info about each block
} deriving (Generic)
+-- | Where are the dependencies
+data DepsLocation = ObjectFile FilePath -- ^ In an object file at path
+ | ArchiveFile FilePath -- ^ In a Ar file at path
+ | InMemory String ByteString -- ^ In memory
+ deriving (Eq, Show)
+
data BlockDeps = BlockDeps
{ blockBlockDeps :: [Int] -- ^ dependencies on blocks in this object
, blockFunDeps :: [ExportedFun] -- ^ dependencies on exported symbols in other objects
@@ -368,7 +399,7 @@ readDepsEither name bs =
-- | call with contents of the file
-readDeps :: String -> ByteString -> Deps
+readDeps :: String -> B.ByteString -> Deps
readDeps name bs =
case readDepsEither name bs of
Left err -> error ("readDeps: not a valid GHCJS object: " ++ name ++ "\n " ++ err)
@@ -736,6 +767,34 @@ instance Objectable Module where
put (Module unit mod_name) = put unit >> put mod_name
get = Module <$> get <*> get
+instance DB.Binary Module where
+ put (Module unit mod_name) = DB.put unit >> DB.put mod_name
+ get = Module <$> DB.get <*> DB.get
+
+instance DB.Binary ModuleName where
+ put (ModuleName fs) = DB.put fs
+ get = ModuleName <$> DB.get
+
+instance DB.Binary Unit where
+ put = \case
+ RealUnit (Definite uid) -> DB.put (0 :: Int) >> DB.put uid
+ VirtUnit uid -> DB.put (1 :: Int) >> DB.put uid
+ HoleUnit -> DB.put (2 :: Int)
+ get = DB.get >>= \case
+ (0 :: Int) -> RealUnit . Definite <$> DB.get
+ 1 -> VirtUnit <$> DB.get
+ _ -> pure HoleUnit
+
+instance DB.Binary UnitId where
+ put (UnitId fs) = DB.put fs
+ get = UnitId <$> DB.get
+
+instance DB.Binary InstantiatedUnit where
+ put indef = do
+ DB.put (instUnitInstanceOf indef)
+ DB.put (instUnitInsts indef)
+ get = mkInstantiatedUnitSorted <$> DB.get <*> DB.get
+
instance Objectable ModuleName where
put (ModuleName fs) = put fs
get = ModuleName <$> get
@@ -754,6 +813,10 @@ instance Objectable FastString where
put fs = put (unpackFS fs)
get = mkFastString <$> get
+instance DB.Binary FastString where
+ put fs = DB.put (unpackFS fs)
+ get = mkFastString <$> DB.get
+
instance Objectable UnitId where
put (UnitId fs) = put fs
get = UnitId <$> get
diff --git a/compiler/GHC/StgToJS/Printer.hs b/compiler/GHC/StgToJS/Printer.hs
new file mode 100644
index 0000000000..21e0dfa252
--- /dev/null
+++ b/compiler/GHC/StgToJS/Printer.hs
@@ -0,0 +1,165 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : GHC.StgToJS.Printer
+-- Copyright : (c) The University of Glasgow 2001
+-- License : BSD-style (see the file LICENSE)
+--
+-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io>
+-- Luite Stegeman <luite.stegeman@iohk.io>
+-- Sylvain Henry <sylvain.henry@iohk.io>
+-- Stability : experimental
+--
+-- Custom prettyprinter for JS AST uses the JS PPr module for most of
+-- the work
+--
+--
+-----------------------------------------------------------------------------
+module GHC.StgToJS.Printer
+ ( pretty
+ , ghcjsRenderJs
+ , prettyBlock
+ ) where
+
+
+import GHC.JS.Syntax
+import GHC.JS.Ppr
+
+import qualified GHC.Data.ShortText as T
+import GHC.Utils.Ppr as PP
+
+import qualified Data.Map as M
+
+import Data.Char (isAlpha,isDigit)
+
+import GHC.Prelude
+
+pretty :: JStat -> Doc
+pretty = jsToDocR ghcjsRenderJs
+
+ghcjsRenderJs :: RenderJs
+ghcjsRenderJs = defaultRenderJs { renderJsV = ghcjsRenderJsV
+ , renderJsS = ghcjsRenderJsS
+ }
+
+-- attempt to resugar some of the common constructs
+ghcjsRenderJsS :: RenderJs -> JStat -> Doc
+ghcjsRenderJsS r (BlockStat xs) = prettyBlock r (flattenBlocks xs)
+ghcjsRenderJsS r s = renderJsS defaultRenderJs r s
+
+-- don't quote keys in our object literals, so closure compiler works
+ghcjsRenderJsV :: RenderJs -> JVal -> Doc
+ghcjsRenderJsV r (JHash m)
+ | M.null m = text "{}"
+ | otherwise = braceNest . PP.fsep . punctuate comma .
+ map (\(x,y) -> quoteIfRequired x <> PP.colon <+> jsToDocR r y) $ M.toList m
+ where
+ quoteIfRequired :: T.ShortText -> Doc
+ quoteIfRequired x
+ | isUnquotedKey x' = text x'
+ | otherwise = PP.squotes (text x')
+ -- FIXME: Jeff (2022,03): remove the deserialization to String. We are only
+ -- converting from ShortText to String here to call @all@ and @tail@.
+ where x' = T.unpack x
+
+ isUnquotedKey :: String -> Bool
+ isUnquotedKey x | null x = False
+ | all isDigit x = True
+ | otherwise = validFirstIdent (head x)
+ && all validOtherIdent (tail x)
+
+
+ -- fixme, this will quote some idents that don't really need to be quoted
+ validFirstIdent c = c == '_' || c == '$' || isAlpha c
+ validOtherIdent c = isAlpha c || isDigit c
+ghcjsRenderJsV r v = renderJsV defaultRenderJs r v
+
+prettyBlock :: RenderJs -> [JStat] -> Doc
+prettyBlock r xs = vcat $ map addSemi (prettyBlock' r xs)
+
+-- recognize common patterns in a block and convert them to more idiomatic/concise javascript
+prettyBlock' :: RenderJs -> [JStat] -> [Doc]
+-- resugar for loops with/without var declaration
+prettyBlock' r ( (DeclStat i)
+ : (AssignStat (ValExpr (JVar i')) v0)
+ : (WhileStat False p (BlockStat bs))
+ : xs
+ )
+ | i == i' && not (null flat) && isForUpdStat (last flat)
+ = mkFor r True i v0 p (last flat) (init flat) : prettyBlock' r xs
+ where
+ flat = flattenBlocks bs
+prettyBlock' r ( (AssignStat (ValExpr (JVar i)) v0)
+ : (WhileStat False p (BlockStat bs))
+ : xs
+ )
+ | not (null flat) && isForUpdStat (last flat)
+ = mkFor r False i v0 p (last flat) (init flat) : prettyBlock' r xs
+ where
+ flat = flattenBlocks bs
+
+-- global function (does not preserve semantics but works for GHCJS)
+prettyBlock' r ( (DeclStat i)
+ : (AssignStat (ValExpr (JVar i')) (ValExpr (JFunc is b)))
+ : xs
+ )
+ | i == i' = (text "function" <+> jsToDocR r i
+ <> parens (fsep . punctuate comma . map (jsToDocR r) $ is)
+ $$ braceNest' (jsToDocR r b)
+ ) : prettyBlock' r xs
+-- declare/assign
+prettyBlock' r ( (DeclStat i)
+ : (AssignStat (ValExpr (JVar i')) v)
+ : xs
+ )
+ | i == i' = (text "var" <+> jsToDocR r i <+> char '=' <+> jsToDocR r v) : prettyBlock' r xs
+
+-- modify/assign operators (fixme this should be more general, but beware of side effects like PPostExpr)
+prettyBlock' r ( (AssignStat (ValExpr (JVar i)) (InfixExpr AddOp (ValExpr (JVar i')) (ValExpr (JInt 1))))
+ : xs
+ )
+ | i == i' = (text "++" <> jsToDocR r i) : prettyBlock' r xs
+prettyBlock' r ( (AssignStat (ValExpr (JVar i)) (InfixExpr SubOp (ValExpr (JVar i')) (ValExpr (JInt 1))))
+ : xs
+ )
+ | i == i' = (text "--" <> jsToDocR r i) : prettyBlock' r xs
+prettyBlock' r ( (AssignStat (ValExpr (JVar i)) (InfixExpr AddOp (ValExpr (JVar i')) e))
+ : xs
+ )
+ | i == i' = (jsToDocR r i <+> text "+=" <+> jsToDocR r e) : prettyBlock' r xs
+prettyBlock' r ( (AssignStat (ValExpr (JVar i)) (InfixExpr SubOp (ValExpr (JVar i')) e))
+ : xs
+ )
+ | i == i' = (jsToDocR r i <+> text "-=" <+> jsToDocR r e) : prettyBlock' r xs
+
+
+prettyBlock' r (x:xs) = jsToDocR r x : prettyBlock' r xs
+prettyBlock' _ [] = []
+
+-- build the for block
+mkFor :: RenderJs -> Bool -> Ident -> JExpr -> JExpr -> JStat -> [JStat] -> Doc
+mkFor r decl i v0 p s1 sb = text "for" <> forCond <+> braceNest'' (jsToDocR r $ BlockStat sb)
+ where
+ c0 | decl = text "var" <+> jsToDocR r i <+> char '=' <+> jsToDocR r v0
+ | otherwise = jsToDocR r i <+> char '=' <+> jsToDocR r v0
+ forCond = parens $ hcat $ interSemi
+ [ c0
+ , jsToDocR r p
+ , parens (jsToDocR r s1)
+ ]
+
+-- check if a statement is suitable to be converted to something in the for(;;x) position
+isForUpdStat :: JStat -> Bool
+isForUpdStat UOpStat {} = True
+isForUpdStat AssignStat {} = True
+isForUpdStat ApplStat {} = True
+isForUpdStat _ = False
+
+interSemi :: [Doc] -> [Doc]
+interSemi [] = [PP.empty]
+interSemi [s] = [s]
+interSemi (x:xs) = x <> text ";" : interSemi xs
+
+addSemi :: Doc -> Doc
+addSemi x = x <> text ";"
diff --git a/compiler/GHC/StgToJS/Rts/Rts.hs b/compiler/GHC/StgToJS/Rts/Rts.hs
index 91dd117e78..57f24ea86c 100644
--- a/compiler/GHC/StgToJS/Rts/Rts.hs
+++ b/compiler/GHC/StgToJS/Rts/Rts.hs
@@ -38,6 +38,7 @@ import GHC.StgToJS.Apply
import GHC.StgToJS.Closure
import GHC.StgToJS.Heap
import GHC.StgToJS.Monad
+import GHC.StgToJS.Printer
import GHC.StgToJS.Profiling
import GHC.StgToJS.Regs
import GHC.StgToJS.Types
@@ -380,6 +381,12 @@ rtsDecls = jsSaturate (Just "h$RTSD") $
, declRegs
, declRets]
+rtsText :: StgToJSConfig -> T.ShortText
+rtsText = T.pack . show . pretty . rts
+
+rtsDeclsText :: T.ShortText
+rtsDeclsText = T.pack . show . pretty $ rtsDecls
+
rts :: StgToJSConfig -> JStat
rts = jsSaturate (Just "h$RTS") . rts'
diff --git a/compiler/GHC/StgToJS/Types.hs b/compiler/GHC/StgToJS/Types.hs
index 02b09a0a7d..1c32e60555 100644
--- a/compiler/GHC/StgToJS/Types.hs
+++ b/compiler/GHC/StgToJS/Types.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
@@ -26,9 +27,12 @@ import GHC.Data.ShortText
import GHC.Unit.Module
import qualified Data.Map as M
-import Data.Set (Set)
+import Data.Set (Set)
import qualified Data.ByteString as BS
-import Data.Monoid
+import Data.Monoid
+import Data.Typeable (Typeable)
+import GHC.Generics (Generic)
+import Control.DeepSeq
type G = State GenState
@@ -79,14 +83,18 @@ data ClosureInfo = ClosureInfo
, ciType :: CIType -- ^ type of the object, with extra info where required
, ciStatic :: CIStatic -- ^ static references of this object
}
- deriving (Eq, Ord)
+ deriving stock (Eq, Ord, Show, Generic)
+
+instance NFData ClosureInfo
data CIRegs
= CIRegsUnknown
| CIRegs { ciRegsSkip :: Int -- ^ unused registers before actual args start
, ciRegsTypes :: [VarType] -- ^ args
}
- deriving (Eq, Ord)
+ deriving stock (Eq, Ord, Show, Generic)
+
+instance NFData CIRegs
data CILayout
= CILayoutVariable -- layout stored in object itself, first position from the start
@@ -97,7 +105,9 @@ data CILayout
{ layoutSize :: !Int -- closure size in array positions, including entry
, layout :: [VarType]
}
- deriving (Eq, Ord)
+ deriving stock (Eq, Ord, Show, Generic)
+
+instance NFData CILayout
data CIType
= CIFun { citArity :: !Int -- ^ function arity
@@ -108,15 +118,16 @@ data CIType
| CIPap
| CIBlackhole
| CIStackFrame
- deriving (Eq, Ord)
+ deriving stock (Eq, Ord, Show, Generic)
+
+instance NFData CIType
-- | Static references that must be kept alive
newtype CIStatic = CIStaticRefs { staticRefs :: [ShortText] }
- deriving stock (Eq, Ord)
- deriving newtype (Semigroup, Monoid)
+ deriving stock (Eq, Ord, Generic)
+ deriving newtype (Semigroup, Monoid, Show)
--- TODO: Jeff (2022,03): Make ToJExpr derivable? will need Default Signatures
--- and depends on the increase in compilation time
+instance NFData CIStatic
-- | static refs: array = references, null = nothing to report
-- note: only works after all top-level objects have been created
@@ -136,7 +147,9 @@ data VarType
| RtsObjV -- some RTS object from GHCJS (for example TVar#, MVar#, MutVar#, Weak#)
| ObjV -- some JS object, user supplied, be careful around these, can be anything
| ArrV -- boxed array
- deriving (Eq, Ord, Enum, Bounded)
+ deriving stock (Eq, Ord, Enum, Bounded, Show, Generic)
+
+instance NFData VarType
instance ToJExpr VarType where
toJExpr = toJExpr . fromEnum
@@ -172,7 +185,9 @@ data StaticInfo = StaticInfo
{ siVar :: !ShortText -- ^ global object
, siVal :: !StaticVal -- ^ static initialization
, siCC :: !(Maybe Ident) -- ^ optional CCS name
- }
+ } deriving stock (Eq, Ord, Show, Typeable, Generic)
+
+instance NFData StaticInfo
data StaticVal
= StaticFun !ShortText [StaticArg]
@@ -186,7 +201,9 @@ data StaticVal
-- ^ regular datacon app
| StaticList [StaticArg] (Maybe ShortText)
-- ^ list initializer (with optional tail)
- deriving (Eq, Ord)
+ deriving stock (Eq, Ord, Show, Generic)
+
+instance NFData StaticVal
data StaticUnboxed
= StaticUnboxedBool !Bool
@@ -194,13 +211,17 @@ data StaticUnboxed
| StaticUnboxedDouble !SaneDouble
| StaticUnboxedString !BS.ByteString
| StaticUnboxedStringOffset !BS.ByteString
- deriving (Eq, Ord)
+ deriving stock (Eq, Ord, Show, Generic)
+
+instance NFData StaticUnboxed
data StaticArg
= StaticObjArg !ShortText -- ^ reference to a heap object
| StaticLitArg !StaticLit -- ^ literal
| StaticConArg !ShortText [StaticArg] -- ^ unfloated constructor
- deriving (Eq, Ord, Show)
+ deriving stock (Eq, Ord, Show, Generic)
+
+instance NFData StaticArg
instance Outputable StaticArg where
ppr x = text (show x)
@@ -213,11 +234,23 @@ data StaticLit
| StringLit !ShortText
| BinLit !BS.ByteString
| LabelLit !Bool !ShortText -- ^ is function pointer, label (also used for string / binary init)
- deriving (Eq, Ord, Show)
+ deriving (Eq, Ord, Show, Generic)
+
+instance NFData StaticLit
instance Outputable StaticLit where
ppr x = text (show x)
+
+instance ToJExpr StaticLit where
+ toJExpr (BoolLit b) = toJExpr b
+ toJExpr (IntLit i) = toJExpr i
+ toJExpr NullLit = null_
+ toJExpr (DoubleLit d) = toJExpr (unSaneDouble d)
+ toJExpr (StringLit t) = app (pack "h$str") [toJExpr t]
+ toJExpr (BinLit b) = app (pack "h$rstr") [toJExpr (map toInteger (BS.unpack b))]
+ toJExpr (LabelLit _isFun lbl) = var lbl
+
data ForeignJSRef = ForeignJSRef
{ foreignRefSrcSpan :: !ShortText
, foreignRefPattern :: !ShortText
@@ -225,7 +258,7 @@ data ForeignJSRef = ForeignJSRef
, foreignRefCConv :: !CCallConv
, foreignRefArgs :: ![ShortText]
, foreignRefResult :: !ShortText
- }
+ } deriving stock (Generic)
-- | data used to generate one ObjUnit in our object file
data LinkableUnit = LinkableUnit
@@ -268,8 +301,6 @@ data ExprResult
newtype ExprValData = ExprValData [JExpr]
deriving newtype (Eq, Ord, Show)
-
-
-- closure types
data ClosureType = Thunk | Fun | Pap | Con | Blackhole | StackFrame
deriving (Show, Eq, Ord, Enum, Bounded)
diff --git a/compiler/GHC/StgToJS/UnitUtils.hs b/compiler/GHC/StgToJS/UnitUtils.hs
index 61886f43f0..75c3c616d5 100644
--- a/compiler/GHC/StgToJS/UnitUtils.hs
+++ b/compiler/GHC/StgToJS/UnitUtils.hs
@@ -4,13 +4,22 @@ module GHC.StgToJS.UnitUtils
( unitModuleString
, moduleGlobalSymbol
, moduleExportsSymbol
- )
-where
+ , getPackageName
+ , encodeModule
+ , ghcjsPrimUnit
+ , ghcjsThUnit
+ ) where
import GHC.Data.ShortText as ST
import GHC.Unit.Module
+import GHC.Unit.Info
+import GHC.Unit.State
+import GHC.Unit.Env
+import GHC.Unit.Home
import GHC.Utils.Encoding
+import GHC.Driver.Env.Types
+import GHC.Driver.Session
import GHC.Prelude
@@ -36,3 +45,75 @@ moduleExportsSymbol m = mconcat
, ST.pack (zEncodeString $ unitModuleString m)
, "_<exports>"
]
+
+-- FIXME: Use FastString
+encodeModule :: HscEnv -> Module -> String
+encodeModule env k
+ | isGhcjsPrimUnit env (moduleUnitId k) = "ghcjs-prim"
+ | isGhcjsThUnit env (moduleUnitId k) = "ghcjs-th"
+ | otherwise = unitModuleString k
+
+{-
+ some packages are wired into GHCJS, but not GHC
+ make sure we don't version them in the output
+ since the RTS uses thins from them
+-}
+
+-- FIXME: Jeff (2022,03): I've swapped DynFlags for HscEnv to gain access to the
+-- UnitState for these checks. Unsure if this is a great idea or even workable.
+-- In either case it will proliferate DynFlags throughout the Linker. So the fix
+-- should be to add flags to the Linker config so we do not need to carry HscEnv
+-- or DynFlags around.
+isGhcjsPrimUnit :: HscEnv -> UnitId -> Bool
+isGhcjsPrimUnit env pkgKey
+ = pn == "ghcjs-prim" || -- FIXME: Jeff (2022,03): use UnitID only instead of
+ -- a hacky String comparison, same for
+ -- @isGhcjsThUnit@
+ (GHC.Prelude.null pn && pkgKey == home_uid &&
+ elem "-DBOOTING_PACKAGE=ghcjs-prim" (opt_P $ hsc_dflags env))
+ where
+ pn = unitIdString . ue_current_unit $ hsc_unit_env env
+ -- FIXME: Jeff (2022,03): remove call to unsafe. Only using this because I
+ -- am unsure when exactly the home unit for the GhcJS prims gets
+ -- instantiated
+ home_uid = homeUnitId . ue_unsafeHomeUnit $ hsc_unit_env env
+
+isGhcjsThUnit :: HscEnv -> UnitId -> Bool
+isGhcjsThUnit env pkgKey
+ = pn == "ghcjs-th" ||
+ (GHC.Prelude.null pn && pkgKey == home_uid &&
+ elem "-DBOOTING_PACKAGE=ghcjs-th" (opt_P $ hsc_dflags env))
+ where
+ home_uid = homeUnitId . ue_unsafeHomeUnit $ hsc_unit_env env
+ pn = unitIdString . ue_current_unit $ hsc_unit_env env
+
+-- FIXME: Jeff (2022,03): These return a UnitId, but I think they should be
+-- @RealUnit (Definite UnitId). Per the description of @GenUnit@ in
+-- Ghc.Unit.Types: a RealUnit is a UnitId that is closed or fully instantiated.
+-- These should be fully instantiated, and Definite. See Note [Wired-in units]
+-- in GHC.Unit.Types for a similar scenario for the NCG
+ghcjsPrimUnit :: UnitState -> UnitId
+ghcjsPrimUnit env =
+ case prims of
+ ((_,k):_) -> k
+ _ -> error "Package `ghcjs-prim' is required to link executables"
+ where
+ prims = filter ((=="ghcjs-prim").fst)
+ (searchModule env (mkModuleName "GHCJS.Prim"))
+
+ghcjsThUnit :: UnitState -> UnitId
+ghcjsThUnit env =
+ case prims of
+ ((_,k):_) -> k
+ _ -> error "Package `ghcjs-th' is required to link executables"
+ where
+ prims = filter ((=="ghcjs-th").fst)
+ (searchModule env (mkModuleName "GHCJS.Prim.TH.Eval"))
+
+searchModule :: UnitState -> ModuleName -> [(String, UnitId)]
+searchModule env =
+ fmap ((\k -> (getPackageName env k, k)) . moduleUnitId . fst)
+ . lookupModuleInAllUnits env
+
+getPackageName :: UnitState -> UnitId -> String
+getPackageName u_st = maybe "" unitPackageNameString . lookupUnitId u_st
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index a9f9570e1e..9be8f83da0 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -642,6 +642,7 @@ Library
GHC.StgToJS.Object
GHC.StgToJS.Prim
GHC.StgToJS.Profiling
+ GHC.StgToJS.Printer
GHC.StgToJS.Regs
GHC.StgToJS.Rts.Types
GHC.StgToJS.Rts.Rts
@@ -651,6 +652,12 @@ Library
GHC.StgToJS.Types
GHC.StgToJS.UnitUtils
GHC.StgToJS.Utils
+ GHC.StgToJS.Linker.Compactor
+ GHC.StgToJS.Linker.Dynamic
+ GHC.StgToJS.Linker.Linker
+ GHC.StgToJS.Linker.Types
+ GHC.StgToJS.Linker.Utils
+ GHC.StgToJS.Linker.Archive
GHC.Stg.Unarise
GHC.SysTools
GHC.SysTools.Ar