diff options
author | doyougnu <jeffrey.young@iohk.io> | 2022-03-19 19:59:20 -0400 |
---|---|---|
committer | doyougnu <jeffrey.young@iohk.io> | 2022-06-13 13:42:38 -0400 |
commit | 5fa834b3db4619346d9205ad38f9c5837945a08c (patch) | |
tree | 0a75dcbe090552e1e837e4e804703d33b8800db1 | |
parent | 14fb7ba21d47c53c52f2d66c072dd18b228216d5 (diff) | |
download | haskell-5fa834b3db4619346d9205ad38f9c5837945a08c.tar.gz |
JS.Backend: Add JS specific Linker
JS: initialize Linker, DynamicLinking
JS.Printer: adapted to GHC Head
JS.Printer: some cleanup and init Printer
StgToJS.Printer: Compiles
JS.Linker: Add types, expose JS keywords
JS.Syntax: add Binary instance on Ident's
JS.Linker: Migrate more Types to Data.Binary
JS.Linker.Types: compiles and adapted to GHC Head
JS.Linker.Types: compiles
JS.Linker.Types: add UseBase type
JS.Linker: Comments and Cleanup
JS.Linker.Types: add TH types, Env type, DepsLoc
JS.Linker: more FIXMEs numerous Linker fixes
JS.Linker: removed Text references
JS.UnitUtils: add package related helper functions
JS.Linker: more DynFlags removal
JS.Linker: Time for semantic errors
JS.Linker: DynFlags finally removed
JS.Linker: 107 compile errors to go
JS.Linker.Utils: initialized, adapted to GHC Head
JS.Linker.Utils: initialize Utils module
JS.Linker.Utils: more utils
JS.Rts: move rtsText to Rts
JS.Linker: linkerStats implemented
JS.Compactor: compiles, adapted to GHC Head
JS.Compactor: have to retrofit compact for linker
JS.Linker.Compactor: unwinding lenses
JS.Linker.Compactor: comments over addItem
JS.Linker.Compactor: Lenses removed
JS.Linker.Compactor: SHA256 removed
JS.Linker.Compactor: only missing instances left
JS.Linker.Compactor: compiles
JS.Linker: compiles, adapted to ghc Head
JS.Linker: More progress
JS.Linker: link in memory compiles
JS.Linker: just shims left
JS.Linker.DynamicLinking compiles: adapted to head
JS.Linker.DynamicLinking: initialization
JS.Linker.DynamicLinking: compiles up to Variants
JS.Variants: initialize
JS.Linker: numerous and various fixes
JS.Linker.DynamicLinking: only small errors left
JS.Linker.Archive: compiles, adapted to GHC Head
JS.Linker: initialize Archive compat module
JS.Linker.Archive: minor fixes
JS.Linker.DynamicLinking: compiles
JS.Linker: cleanup, remove Variants, add comments
fixup: more cleanup
JS.Linker: more cleanup and comments
-rw-r--r-- | compiler/GHC/JS/Ppr.hs | 11 | ||||
-rw-r--r-- | compiler/GHC/JS/Syntax.hs | 36 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Arg.hs | 62 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/DataCon.hs | 12 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Linker/Archive.hs | 194 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Linker/Compactor.hs | 1437 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Linker/Dynamic.hs | 564 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Linker/Linker.hs | 887 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Linker/Types.hs | 581 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Linker/Utils.hs | 101 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Object.hs | 119 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Printer.hs | 165 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Rts/Rts.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/Types.hs | 69 | ||||
-rw-r--r-- | compiler/GHC/StgToJS/UnitUtils.hs | 85 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 7 |
16 files changed, 4256 insertions, 81 deletions
diff --git a/compiler/GHC/JS/Ppr.hs b/compiler/GHC/JS/Ppr.hs index e46c8534ef..223d4d3e7a 100644 --- a/compiler/GHC/JS/Ppr.hs +++ b/compiler/GHC/JS/Ppr.hs @@ -6,7 +6,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE BlockArguments #-} -{-# LANGUAGE PatternSynonyms #-} -- | Pretty-printing JavaScript module GHC.JS.Ppr @@ -19,6 +18,10 @@ module GHC.JS.Ppr , RenderJs(..) , jsToDoc , pprStringLit + , flattenBlocks + , braceNest + , braceNest' + , braceNest'' ) where @@ -79,6 +82,12 @@ braceNest x = char '{' <+> nest 2 x $$ char '}' braceNest' :: Doc -> Doc braceNest' x = nest 2 (char '{' $+$ x) $$ char '}' +-- FIXME: Jeff (2022,03): better naming of braceNest'' functions. Stop the +-- madness! +-- somewhat more compact (egyptian style) braces +braceNest'' :: Doc -> Doc +braceNest'' x = nest 2 (char '{' $$ x) $$ char '}' + class JsToDoc a where jsToDocR :: RenderJs -> a -> Doc instance JsToDoc JStat where jsToDocR r = renderJsS r r instance JsToDoc JExpr where jsToDocR r = renderJsE r r diff --git a/compiler/GHC/JS/Syntax.hs b/compiler/GHC/JS/Syntax.hs index 37104d8d9f..d70e06d801 100644 --- a/compiler/GHC/JS/Syntax.hs +++ b/compiler/GHC/JS/Syntax.hs @@ -48,6 +48,8 @@ module GHC.JS.Syntax , pseudoSaturate -- * Utility , SaneDouble(..) + -- * Keywords + , isJsKeyword ) where import GHC.Prelude @@ -56,16 +58,18 @@ import Control.DeepSeq import Data.Function import qualified Data.Map as M +import qualified Data.Set as Set import Data.Data import Data.Word import qualified Data.Semigroup as Semigroup import GHC.Generics +import Data.Binary import GHC.Utils.Outputable (Outputable (..)) import qualified GHC.Utils.Outputable as O import qualified GHC.Data.ShortText as ST -import GHC.Data.ShortText (ShortText) +import GHC.Data.ShortText (ShortText()) import GHC.Utils.Monad.State.Strict -- FIXME: Jeff (2022,03): This state monad is strict, but uses a lazy list as @@ -319,9 +323,35 @@ instance Show SaneDouble where -------------------------------------------------------------------------------- -- Identifiers -------------------------------------------------------------------------------- --- We use ShortText for identifier in JS backend +-- We use ShortText for identifiers in JS backend -- | Identifiers newtype Ident = TxtI { itxt:: ShortText} - deriving (Show, Typeable, Ord, Eq, Generic, NFData) + deriving stock (Show, Typeable, Ord, Eq, Generic) + deriving newtype (Binary, NFData) -- FIXME: Jeff (2022,03): ShortText uses Data.Binary + -- rather than GHC.Utils.Binary. What is the + -- difference? See related FIXME in StgToJS.Object + +-------------------------------------------------------------------------------- +-- JS Keywords +-------------------------------------------------------------------------------- +-- | The set of Javascript keywords +jsKeywords :: Set.Set Ident +jsKeywords = Set.fromList $ TxtI <$> + [ "break", "case", "catch", "continue", "debugger" + , "default", "delete", "do", "else", "finally", "for" + , "function", "if", "in", "instanceof", "new", "return" + , "switch", "this", "throw", "try", "typeof", "var", "void" + , "while", "with" + , "class", "enum", "export", "extends", "import", "super" + , "const" + , "implements", "interface", "let", "package", "private" + , "protected" + , "public", "static", "yield" + , "null", "true", "false" + ] + +-- | Check if provided Ident is a JS keyword +isJsKeyword :: Ident -> Bool +isJsKeyword = flip Set.member jsKeywords diff --git a/compiler/GHC/StgToJS/Arg.hs b/compiler/GHC/StgToJS/Arg.hs index 4404aa17c2..7d005e41a9 100644 --- a/compiler/GHC/StgToJS/Arg.hs +++ b/compiler/GHC/StgToJS/Arg.hs @@ -1,5 +1,6 @@ {-# LANGUAGE LambdaCase #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} -- For ToJExpr StaticArg, see FIXME module GHC.StgToJS.Arg ( genArg , genStaticArg @@ -78,30 +79,30 @@ genArg a = case a of case lookupUFM unFloat i of Nothing -> reg Just expr -> unfloated expr - where - -- if our argument is a joinid, it can be an unboxed tuple - r :: HasDebugCallStack => VarType - r = uTypeVt . stgArgType $ a - reg - | isVoid r = return [] - | i == trueDataConId = return [true_] - | i == falseDataConId = return [false_] - | isMultiVar r = mapM (jsIdN i) [1..varSize r] - | otherwise = (:[]) <$> jsId i - - unfloated :: HasDebugCallStack => CgStgExpr -> G [JExpr] - unfloated = \case - StgLit l -> genLit l - StgConApp dc _n args _ - | isBoolDataCon dc || isUnboxableCon dc - -> (:[]) . allocUnboxedCon dc . concat <$> mapM genArg args - | null args -> (:[]) <$> jsId (dataConWorkId dc) - | otherwise -> do - as <- concat <$> mapM genArg args - e <- enterDataCon dc - cs <- getSettings - return [allocDynamicE cs e as Nothing] -- FIXME: ccs - x -> pprPanic "genArg: unexpected unfloated expression" (pprStgExpr panicStgPprOpts x) + where + -- if our argument is a joinid, it can be an unboxed tuple + r :: HasDebugCallStack => VarType + r = uTypeVt . stgArgType $ a + reg + | isVoid r = return [] + | i == trueDataConId = return [true_] + | i == falseDataConId = return [false_] + | isMultiVar r = mapM (jsIdN i) [1..varSize r] + | otherwise = (:[]) <$> jsId i + + unfloated :: HasDebugCallStack => CgStgExpr -> G [JExpr] + unfloated = \case + StgLit l -> genLit l + StgConApp dc _n args _ + | isBoolDataCon dc || isUnboxableCon dc + -> (:[]) . allocUnboxedCon dc . concat <$> mapM genArg args + | null args -> (:[]) <$> jsId (dataConWorkId dc) + | otherwise -> do + as <- concat <$> mapM genArg args + e <- enterDataCon dc + inl_alloc <- csInlineAlloc <$> getSettings + return [allocDynamicE inl_alloc e as Nothing] -- FIXME: ccs + x -> pprPanic "genArg: unexpected unfloated expression" (pprStgExpr panicStgPprOpts x) genIdArg :: HasDebugCallStack => Id -> G [JExpr] genIdArg i = genArg (StgVarArg i) @@ -192,3 +193,16 @@ allocateStaticList xs a@(StgVarArg i) pprPanic "allocateStaticList: invalid argument (tail)" (ppr (xs, r)) allocateStaticList _ _ = panic "allocateStaticList: unexpected literal in list" +-- FIXME: Jeff (2022,03): Fix this orphan instance. It is consumed by +-- Linker.Linker but requires allocDynamicE, hence its presence in this file. If +-- we put it in StgToJS.Types (where StaticArg is defined) then we'll end up in +-- an obvious module cycle. We could put it in DataCon but then we lose cohesion +-- in that module (i.e., why should the DataCon module be exporting this +-- instance?). It seems to be that this module should be the one that defines +-- StaticArg, but I leave that for a refactor later. +instance ToJExpr StaticArg where + toJExpr (StaticLitArg l) = toJExpr l + toJExpr (StaticObjArg t) = ValExpr (JVar (TxtI t)) + toJExpr (StaticConArg c args) = + -- FIXME: cost-centre stack + allocDynamicE False (ValExpr . JVar . TxtI $ c) (map toJExpr args) Nothing diff --git a/compiler/GHC/StgToJS/DataCon.hs b/compiler/GHC/StgToJS/DataCon.hs index f8dd175ac0..eb5b152783 100644 --- a/compiler/GHC/StgToJS/DataCon.hs +++ b/compiler/GHC/StgToJS/DataCon.hs @@ -70,9 +70,13 @@ allocUnboxedCon con = \case | isUnboxableCon con -> x xs -> pprPanic "allocUnboxedCon: not an unboxed constructor" (ppr (con,xs)) -allocDynamicE :: StgToJSConfig -> JExpr -> [JExpr] -> Maybe JExpr -> JExpr -allocDynamicE s entry free cc - | csInlineAlloc s || length free > 24 = newClosure $ Closure +allocDynamicE :: Bool -- ^ csInlineAlloc from StgToJSConfig + -> JExpr + -> [JExpr] + -> Maybe JExpr + -> JExpr +allocDynamicE inline_alloc entry free cc + | inline_alloc || length free > 24 = newClosure $ Closure { clEntry = entry , clField1 = fillObj1 , clField2 = fillObj2 @@ -92,7 +96,7 @@ allocDynamicE s entry free cc allocDynamic :: StgToJSConfig -> Bool -> Ident -> JExpr -> [JExpr] -> Maybe JExpr -> JStat allocDynamic s haveDecl to entry free cc = - dec to `mappend` (toJExpr to |= allocDynamicE s entry free cc) + dec to `mappend` (toJExpr to |= allocDynamicE (csInlineAlloc s) entry free cc) where dec i | haveDecl = DeclStat i | otherwise = mempty diff --git a/compiler/GHC/StgToJS/Linker/Archive.hs b/compiler/GHC/StgToJS/Linker/Archive.hs new file mode 100644 index 0000000000..d0d911e8d1 --- /dev/null +++ b/compiler/GHC/StgToJS/Linker/Archive.hs @@ -0,0 +1,194 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE TupleSections #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.StgToJS.Archive +-- Copyright : (c) The University of Glasgow 2001 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : Sylvain Henry <sylvain.henry@iohk.io> +-- Jeffrey Young <jeffrey.young@iohk.io> +-- Luite Stegeman <luite.stegeman@iohk.io> +-- Josh Meredith <josh.meredith@iohk.io> +-- Stability : experimental +-- +-- FIXME: Jeff(2022,04): Remove this module completely, its only consumer is +-- GHC.StgToJS.Linker.Dynamic and is likely no longer necessary with the new +-- GHC Api. I simply decided adapting this module was faster/easier than +-- removing it and figuring out GHC.StgToJS.Linker.Dynamic with the new API +----------------------------------------------------------------------------- +module GHC.StgToJS.Linker.Archive + ( Entry(..), Index, IndexEntry(..), Meta(..) + , buildArchive + , readMeta, readIndex + , readSource, readAllSources + , readObject, withObject, withAllObjects + ) where + +import Control.Monad + +import Data.Binary +import Data.Binary.Get +import Data.Binary.Put +import Data.ByteString.Lazy (ByteString) +import qualified Data.ByteString.Lazy as B +import Data.Data +import Data.Int +import GHC.Data.ShortText (ShortText) +import qualified GHC.Data.ShortText as T + +import GHC.Generics hiding (Meta) + +import System.IO +import Prelude + +import GHC.Unit.Module + +import GHC.StgToJS.Object ( versionTag, versionTagLength ) + + +-- entry, offset in data section, length +type Index = [IndexEntry] + +data IndexEntry = IndexEntry { ieEntry :: Entry + , ieOffset :: Int64 + , ieLength :: Int64 + } deriving (Show, Typeable, Generic) + +instance Binary IndexEntry + +data Entry = Object ShortText -- module name + | JsSource FilePath + deriving (Show, Typeable, Generic) + +instance Binary Entry + +data Meta = Meta { metaCppOptions :: [String] + } deriving (Show, Typeable, Generic) + +instance Binary Meta + +-- sizes of the sections in bytes +data Sections = Sections { sectionIndex :: !Word64 + , sectionMeta :: !Word64 + , sectionData :: !Word64 + } deriving (Eq, Ord, Generic) + +instance Binary Sections where + put (Sections i m d) = putWord64le i >> putWord64le m >> putWord64le d + get = Sections <$> getWord64le <*> getWord64le <*> getWord64le + +sectionsLength :: Int +sectionsLength = 24 + +buildArchive :: Meta -> [(Entry, ByteString)] -> ByteString +buildArchive meta entries = + versionTag <> sections <> index <> meta' <> entries' + where + bl = fromIntegral . B.length + sections = runPut . put $ Sections (bl index) (bl meta') (bl entries') + meta' = runPut (put meta) + index = runPut . put $ scanl1 (\(IndexEntry _ o l) (IndexEntry e _ l') -> IndexEntry e (o+l) l') $ + map (\(e,b) -> IndexEntry e 0 (B.length b)) entries + entries' = mconcat (map snd entries) + +readMeta :: FilePath -> IO Meta +readMeta file = withBinaryFile file ReadMode $ \h -> do + sections <- hReadHeader ("readMeta " ++ file) h + hSeek h RelativeSeek (toInteger $ sectionIndex sections) + m <- B.hGet h (fromIntegral $ sectionMeta sections) + return $! runGet get m + +readIndex :: FilePath -> IO Index +readIndex file = + withArchive "readIndex" file $ \_sections index _h -> return index + +readSource :: FilePath -> FilePath -> IO ByteString +readSource source file = withArchive "readSource" file $ + withEntry ("readSource " ++ file) + ("source file " ++ source) + selectSrc + (\h l -> B.hGet h $ fromIntegral l) + where + selectSrc (JsSource src) = src == source + selectSrc _ = False + +readAllSources :: FilePath -> IO [(FilePath, ByteString)] +readAllSources file = withArchive "readAllSources" file $ \sections index h -> + forM [ (o, l, src) | IndexEntry (JsSource src) o l <- index ] $ \(o, l, src) -> do + hSeek h AbsoluteSeek (fromIntegral $ dataSectionStart sections + fromIntegral o) + (src,) <$> B.hGet h (fromIntegral l) + +readObject :: ModuleName -> FilePath -> IO ByteString +readObject m file = withArchive "readObject" file $ + withModuleObject ("readObject " ++ file) m (\h l -> B.hGet h $ fromIntegral l) + +-- | seeks to the starting position of the object in the file +withObject :: ModuleName -> FilePath -> (Handle -> Int64 -> IO a) -> IO a +withObject m file f = withArchive "withObject" file $ + withModuleObject ("withObject " ++ file) m f + + +withAllObjects :: FilePath -> (ModuleName -> Handle -> Int64 -> IO a) -> IO [a] +withAllObjects file f = withArchive "withAllObjects" file $ \sections index h -> + forM [ (o, l, mn) | IndexEntry (Object mn) o l <- index ] $ \(o, l, mn) -> do + hSeek h AbsoluteSeek (fromIntegral $ dataSectionStart sections + fromIntegral o) + f (mkModuleName (T.unpack mn)) h l + +--------------------------------------------------------------------------------- + +withArchive :: String -> FilePath -> (Sections -> Index -> Handle -> IO a) -> IO a +withArchive name file f = withBinaryFile file ReadMode $ \h -> do + let name' = name ++ " " ++ file + putStrLn ("reading archive: " ++ name ++ " -> " ++ file) + sections <- hReadHeader name' h + index <- hReadIndex name' sections h + f sections index h + +-- | seeks to start of entry data in file, then runs the action +-- exactly one matching entry is expected +withEntry :: String -> String + -> (Entry -> Bool) -> (Handle -> Int64 -> IO a) + -> Sections -> Index -> Handle + -> IO a +withEntry name entryName p f sections index h = + case filter (p . ieEntry) index of + [] -> error (name ++ ": cannot find " ++ entryName) + [IndexEntry _ o l] -> do + hSeek h AbsoluteSeek (dataSectionStart sections + toInteger o) + f h (fromIntegral l) + _ -> error (name ++ ": multiple matches for " ++ entryName) + +withModuleObject :: String -> ModuleName -> (Handle -> Int64 -> IO a) + -> Sections -> Index -> Handle + -> IO a +withModuleObject name m f = + withEntry name ("object for module " ++ ms) selectEntry f + where + ms = moduleNameString m + mt = T.pack ms + selectEntry (Object m') = mt == m' + selectEntry _ = False + +-- | expects Handle to be positioned at the start of the header +-- Handle is positioned at start of index after return +hReadHeader :: String -> Handle -> IO Sections +hReadHeader name h = do + ts <- B.hGet h (versionTagLength + sectionsLength) + when (B.take (fromIntegral versionTagLength) ts /= versionTag) + (error $ name ++ ": version tag mismatch") + return $! runGet get (B.drop (fromIntegral versionTagLength) ts) + +-- | expects Handle to be positioned at the start of the index +-- Handle is positioned at start of metadata section after return +hReadIndex :: String -> Sections -> Handle -> IO Index +hReadIndex _name s h = do + i <- B.hGet h (fromIntegral $ sectionIndex s) + return $! runGet get i + +-- start of data section in file +dataSectionStart :: Sections -> Integer +dataSectionStart s = toInteger (versionTagLength + sectionsLength) + + toInteger (sectionIndex s + sectionMeta s) diff --git a/compiler/GHC/StgToJS/Linker/Compactor.hs b/compiler/GHC/StgToJS/Linker/Compactor.hs new file mode 100644 index 0000000000..3348dc01da --- /dev/null +++ b/compiler/GHC/StgToJS/Linker/Compactor.hs @@ -0,0 +1,1437 @@ +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.StgToJS.Linker +-- Copyright : (c) The University of Glasgow 2001 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : Luite Stegeman <luite.stegeman@iohk.io> +-- Sylvain Henry <sylvain.henry@iohk.io> +-- Jeffrey Young <jeffrey.young@iohk.io> +-- Josh Meredith <josh.meredith@iohk.io> +-- Stability : experimental +-- +-- The compactor does link-time optimization. It is much simpler than the +-- Optimizer, no fancy dataflow analysis here. +-- +-- Optimizations: +-- - rewrite all variables starting with h$$ to shorter names, these are internal names +-- - write all function metadata compactly +-- +-- TODO: Jeff (2022,03): I've adapted this to ghcHEAD but have not actually +-- implemented the compactor. The key work function is @packString@ which +-- currently explodes if called. The todo is to fix this, and actually implement +-- the compactor once we have a linker that actually works. +----------------------------------------------------------------------------- + +module GHC.StgToJS.Linker.Compactor + ( compact + ) where + + +import GHC.Utils.Panic +import GHC.Utils.Misc + + +import Control.Applicative +import GHC.Utils.Monad.State.Strict +import Data.Function + +import qualified Data.Binary.Get as DB +import qualified Data.Binary.Put as DB +import qualified Data.Bits as Bits +import Data.Bits (shiftL, shiftR) +import Data.Bifunctor (second) +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BSC +import qualified Data.ByteString.Builder as BB +import Data.Char (chr) +import qualified Data.Graph as G +import qualified Data.Map.Strict as M +import Data.Map (Map) +import Data.Int +import Data.List +import Data.Maybe +import qualified Data.Set as S +import Data.Set (Set) +import GHC.Data.ShortText (ShortText) +import qualified GHC.Data.ShortText as T + + +import GHC.JS.Syntax +import GHC.JS.Make +import GHC.JS.Transform +import GHC.StgToJS.Printer (pretty) +import GHC.StgToJS.Types +import GHC.StgToJS.Linker.Types +import GHC.StgToJS.CoreUtils +import GHC.StgToJS.Closure +import GHC.StgToJS.Arg() + +import Prelude +import GHC.Utils.Encoding + + +-- | collect global objects (data / CAFs). rename them and add them to the table +collectGlobals :: [StaticInfo] + -> State CompactorState () +collectGlobals = mapM_ (\(StaticInfo i _ _) -> renameObj i) + +debugShowStat :: (JStat, [ClosureInfo], [StaticInfo]) -> String +debugShowStat (_s, cis, sis) = + "closures:\n" ++ + unlines (map show cis) ++ + "\nstatics:" ++ + unlines (map show sis) ++ + "\n\n" + +{- create a single string initializer for all StaticUnboxedString references + in the code, and rewrite all references to point to it + + if incremental linking is used, each increment gets its own packed string + blob. if a string value already exists in an earlier blob it is not added + again + -} +packStrings :: HasDebugCallStack + => JSLinkConfig + -> CompactorState + -> [LinkedUnit] + -> (CompactorState, [LinkedUnit]) +packStrings _settings _cstate _code = panic "Compactor.packstrings not yet implemented!" + -- let allStatics :: [StaticInfo] + -- allStatics = concatMap (\(_,_,x) -> x) code + + -- origStringTable :: StringTable + -- origStringTable = cstate ^. stringTable + + -- allStrings :: Set ByteString + -- allStrings = S.fromList $ + -- filter (not . isExisting) + -- (mapMaybe (staticString . siVal) allStatics) + -- where + -- isExisting bs = isJust (M.lookup bs $ stOffsets origStringTable) + + -- staticString :: StaticVal -> Maybe ByteString + -- staticString (StaticUnboxed (StaticUnboxedString bs)) = Just bs + -- staticString (StaticUnboxed (StaticUnboxedStringOffset bs)) = Just bs + -- staticString _ = Nothing + + -- allStringsList :: [ByteString] + -- allStringsList = S.toList allStrings + + -- -- we may see two kinds of null characters + -- -- - string separator, packed as \0 + -- -- - within a string, packed as \cz\0 + -- -- we transform the strings to + -- transformPackedLiteral :: ShortText -> ShortText + -- transformPackedLiteral = mconcat. fmap f + -- where + -- f :: Char -> ShortText + -- f '\0' = "\^Z\0" + -- f '\^Z' = "\^Z\^Z" + -- f x = x + + -- allStringsPacked :: ShortText + -- allStringsPacked = T.intercalate "\0" $ + -- map (\str -> maybe (packBase64 str) + -- transformPackedLiteral + -- (U.decodeModifiedUTF8 str)) + -- allStringsList + + -- packBase64 :: ByteString -> ShortText + -- packBase64 bs + -- | BS.null bs = mempty + -- | otherwise = + -- let (h,t) = BS.splitAt 128 bs + -- esc = T.singleton '\^Z' <> + -- T.singleton (chr . fromIntegral $ BS.length h + 0x1f) + -- b64 = esc <> fromJust (U.decodeModifiedUTF8 (B64.encode h)) + -- in maybe b64 transformPackedLiteral (U.decodeModifiedUTF8 h) <> + -- packBase64 t + + -- allStringsWithOffset :: [(ByteString, Int)] + -- allStringsWithOffset = snd $ + -- mapAccumL (\o b -> let o' = o + fromIntegral (BS.length b) + 1 + -- in o' `seq` (o', (b, o))) + -- 0 + -- allStringsList + + -- -- the offset of each of the strings in the big blob + -- offsetIndex :: HashMap ByteString Int + -- offsetIndex = M.fromList allStringsWithOffset + + -- stringSymbol :: Ident + -- stringSymbol = head $ cstate ^. identSupply + + -- stringSymbolT :: ShortText + -- stringSymbolT = let (TxtI t) = stringSymbol in t + + -- stringSymbolIdx :: Int + -- stringSymbolIdx = snd (bounds $ stTableIdents origStringTable) + 1 + + -- -- append the new string symbol + -- newTableIdents :: Array Int ShortText + -- newTableIdents = + -- listArray (0, stringSymbolIdx) + -- (elems (stTableIdents origStringTable) ++ [stringSymbolT]) + + -- newOffsetsMap :: Map ByteString (Int, Int) + -- newOffsetsMap = M.union (stOffsets origStringTable) + -- (fmap (stringSymbolIdx,) offsetIndex) + + -- newIdentsMap :: HashMap ShortText (Either Int Int) + -- newIdentsMap = + -- let f (StaticInfo s (StaticUnboxed (StaticUnboxedString bs)) _) + -- = Just (s, Left . fst $ newOffsetsMap M.! bs) + -- f (StaticInfo s (StaticUnboxed (StaticUnboxedStringOffset bs)) _) + -- = Just (s, Right . snd $ newOffsetsMap M.! bs) + -- f _ = Nothing + -- in M.union (stIdents origStringTable) + -- (M.fromList $ mapMaybe f allStatics) + + -- newStringTable :: StringTable + -- newStringTable = StringTable newTableIdents newOffsetsMap newIdentsMap + + -- newOffsetsInverted :: HashMap (Int, Int) ByteString + -- newOffsetsInverted = M.fromList . + -- map (\(x,y) -> (y,x)) . + -- M.toList $ + -- newOffsetsMap + + -- replaceSymbol :: ShortText -> Maybe JVal + -- replaceSymbol t = + -- let f (Left i) = JVar (TxtI $ newTableIdents ! i) + -- f (Right o) = JInt (fromIntegral o) + -- in fmap f (M.lookup t newIdentsMap) + + -- cstate0 :: CompactorState + -- cstate0 = cstate & identSupply %~ tail + -- & stringTable .~ newStringTable + + -- initStr :: JStat + -- initStr = + -- DeclStat stringSymbol <> + -- AssignStat (ValExpr $ JVar stringSymbol) + -- (ApplExpr (ApplExpr (ValExpr $ JVar (TxtI "h$pstr")) + -- [ValExpr (JStr allStringsPacked)]) + -- []) + + -- rewriteValsE :: JExpr -> JExpr + -- rewriteValsE (ApplExpr e xs) + -- | Just t <- appMatchStringLit e xs = ValExpr (JStr t) + -- rewriteValsE (ValExpr v) = ValExpr (rewriteVals v) + -- rewriteValsE e = e & exprsE %~ rewriteValsE + + -- rewriteVals :: JVal -> JVal + -- rewriteVals (JVar (TxtI t)) + -- | Just v <- replaceSymbol t = v + -- rewriteVals (JList es) = JList (map rewriteValsE es) + -- rewriteVals (JHash m) = JHash (fmap rewriteValsE m) + -- rewriteVals (JFunc args body) = JFunc args (body & exprsS %~ rewriteValsE) + -- rewriteVals v = v + + -- rewriteStat :: JStat -> JStat + -- rewriteStat st = st & exprsS %~ rewriteValsE + + -- appMatchStringLit :: JExpr -> [JExpr] -> Maybe ShortText + -- appMatchStringLit (ValExpr (JVar (TxtI "h$decodeUtf8z"))) + -- [ValExpr (JVar (TxtI x)), ValExpr (JVar (TxtI y))] + -- | Just (Left i) <- M.lookup x newIdentsMap + -- , Just (Right j) <- M.lookup y newIdentsMap + -- , Just bs <- M.lookup (i,j) newOffsetsInverted = + -- U.decodeModifiedUTF8 bs + -- appMatchStringLit _ _ = Nothing + + -- rewriteStatic :: StaticInfo -> Maybe StaticInfo + -- rewriteStatic (StaticInfo _i + -- (StaticUnboxed StaticUnboxedString{}) + -- _cc) = + -- Nothing + -- rewriteStatic (StaticInfo _i + -- (StaticUnboxed StaticUnboxedStringOffset {}) + -- _cc) = + -- Nothing + -- rewriteStatic si = Just (si & staticInfoArgs %~ rewriteStaticArg) + + -- rewriteStaticArg :: StaticArg -> StaticArg + -- rewriteStaticArg a@(StaticObjArg t) = + -- case M.lookup t newIdentsMap of + -- Just (Right v) -> StaticLitArg (IntLit $ fromIntegral v) + -- Just (Left idx) -> StaticObjArg (newTableIdents ! idx) + -- _ -> a + -- rewriteStaticArg (StaticConArg v es) + -- = StaticConArg v (map rewriteStaticArg es) + -- rewriteStaticArg x = x + + -- initStatic :: LinkedUnit + -- initStatic = + -- let (TxtI ss) = stringSymbol + -- in (initStr, [], [StaticInfo ss (StaticThunk Nothing) Nothing]) + + -- rewriteBlock :: LinkedUnit -> LinkedUnit + -- rewriteBlock (stat, ci, si) + -- = (rewriteStat stat, ci, mapMaybe rewriteStatic si) + + -- in (cstate0, initStatic : map rewriteBlock code) + +renameInternals :: HasDebugCallStack + => JSLinkConfig + -> StgToJSConfig + -> CompactorState + -> [ShortText] + -> [LinkedUnit] + -> (CompactorState, [JStat], JStat) +renameInternals ln_cfg cfg cs0 rtsDeps stats0a = (cs, stats, meta) + where + (stbs, stats0) = (if lcDedupe ln_cfg + then dedupeBodies rtsDeps . dedupe rtsDeps + else (mempty,)) stats0a + ((stats, meta), cs) = runState renamed cs0 + + renamed :: State CompactorState ([JStat], JStat) + renamed + | csDebugAlloc cfg || csProf cfg = do -- FIXME: Jeff (2022,03): Move these Way flags into JSLinkConfig + cs <- get + let renamedStats = map (\(s,_,_) -> identsS' (lookupRenamed cs) s) stats0 + statics = map (renameStaticInfo cs) $ + concatMap (\(_,_,x) -> x) stats0 + infos = map (renameClosureInfo cs) $ + concatMap (\(_,x,_) -> x) stats0 + -- render metadata as individual statements + meta = mconcat (map staticDeclStat statics) <> + identsS' (lookupRenamed cs) stbs <> + mconcat (map (staticInitStat $ csProf cfg) statics) <> + mconcat (map (closureInfoStat True) infos) + return (renamedStats, meta) + | otherwise = do + -- collect all global objects and entries, add them to the renaming table + mapM_ (\(_, cis, sis) -> do + mapM_ (renameEntry . TxtI . ciVar) cis + mapM_ (renameObj . siVar) sis + mapM_ collectLabels sis) stats0 + + -- sort our entries, store the results + -- propagate all renamings throughtout the code + cs <- get + -- FIXME: Jeff (2022,03): Is this workaround still needed? + -- Safari on iOS 10 (64 bit only?) crashes on very long arrays + -- safariCrashWorkaround :: [Ident] -> JExpr + -- safariCrashWorkaround xs = + -- case chunksOf 10000 xs of + -- (y:ys) | not (null ys) + -- -> ApplExpr (SelExpr (toJExpr y) (TxtI "concat")) + -- (map toJExpr ys) + -- _ -> toJExpr xs + let renamedStats = map (\(s,_,_) -> identsS' (lookupRenamed cs) s) + stats0 + sortedInfo = concatMap (\(_,xs,_) -> map (renameClosureInfo cs) + xs) + stats0 + -- entryArr = safariCrashWorkaround $ + entryArr = toJExpr + . map (TxtI . fst) + . sortBy (compare `on` snd) + . M.toList + $ csEntries cs + lblArr = map (TxtI . fst) + . sortBy (compare `on` snd) + . M.toList + $ csLabels cs + ss = concatMap (\(_,_,xs) -> map (renameStaticInfo cs) xs) + stats0 + infoBlock = encodeStr (concatMap (encodeInfo cs) sortedInfo) + staticBlock = encodeStr (concatMap (encodeStatic cs) ss) + stbs' = identsS' (lookupRenamed cs) stbs + staticDecls = mconcat (map staticDeclStat ss) <> stbs' + meta = staticDecls `mappend` + appS "h$scheduleInit" [ entryArr + , var "h$staticDelayed" + , toJExpr lblArr + , toJExpr infoBlock + , toJExpr staticBlock + ] + -- error "scheduleInit" + {- + [j| h$scheduleInit( `entryArr` + , h$staticDelayed + , `lblArr` + , `infoBlock` + , `staticBlock`); + h$staticDelayed = []; + |] -} + return (renamedStats, meta) + +-- | initialize a global object. all global objects have to be declared (staticInfoDecl) first +-- (this is only used with -debug, normal init would go through the static data table) +staticInitStat :: Bool -- ^ profiling enabled + -> StaticInfo + -> JStat +staticInitStat _prof (StaticInfo i sv cc) = + case sv of + StaticData con args -> appS "h$sti" ([var i, var con, toJExpr args] ++ ccArg) + StaticFun f args -> appS "h$sti" ([var i, var f, toJExpr args] ++ ccArg) + StaticList args mt -> + appS "h$stl" ([var i, toJExpr args, toJExpr $ maybe null_ (toJExpr . TxtI) mt] ++ ccArg) + StaticThunk (Just (f,args)) -> + appS "h$stc" ([var i, var f, toJExpr args] ++ ccArg) + _ -> mempty + where + ccArg = maybeToList (fmap toJExpr cc) + +-- | declare and do first-pass init of a global object (create JS object for heap objects) +staticDeclStat :: StaticInfo -> JStat +staticDeclStat (StaticInfo si sv _) = + let si' = TxtI si + ssv (StaticUnboxed u) = Just (ssu u) + ssv (StaticThunk Nothing) = Nothing + ssv _ = Just (app "h$d" []) -- error "StaticUnboxed" -- Just [je| h$d() |] + ssu (StaticUnboxedBool b) = app "h$p" [toJExpr b] -- error "StaticUnboxedBool" -- [je| h$p(`b`) |] + ssu (StaticUnboxedInt i) = app "h$p" [toJExpr i] -- error "StaticUnboxedInt" -- [je| h$p(`i`) |] + ssu (StaticUnboxedDouble d) = app "h$p" [toJExpr (unSaneDouble d)] -- error "StaticUnboxedDouble" -- [je| h$p(`unSaneDouble d`) |] + ssu (StaticUnboxedString str) = ApplExpr (initStr str) [] + ssu StaticUnboxedStringOffset {} = 0 + -- FIXME, we shouldn't do h$di, we need to record the statement to init the thunks + in maybe (appS "h$di" [toJExpr si']) (\v -> DeclStat si' `mappend` (toJExpr si' |= v)) (ssv sv) + -- error "staticDeclStat" -- maybe [j| h$di(`si'`); |] (\v -> DeclStat si' <> error "staticDeclStat" {- [j| `si'` = `v`; |]-}) (ssv sv) + +initStr :: BS.ByteString -> JExpr +initStr str = app "h$str" [ValExpr (JStr . T.pack . BSC.unpack $! str)] + --TODO: Jeff (2022,03): This function used to call @decodeModifiedUTF8 in + --Gen2.Utils. I've removed the call site and opted to keep the Just case. + --We'll need to double check to see if we indeed do need to decoded the + --UTF8 strings and implement a replace function on bytestrings once the + --Linker is up. + -- Nothing -> app "h$rstr" [toJExpr $ map toInteger (BS.unpack str)] + -- error "initStr" + -- [je| h$rstr(`map toInteger (B.unpack str)`) |] + +-- | rename a heap object, which means adding it to the +-- static init table in addition to the renamer +renameObj :: ShortText + -> State CompactorState ShortText +renameObj xs = do + (TxtI xs') <- renameVar (TxtI xs) -- added to the renamer + modify (addStaticEntry xs') -- and now the table + return xs' + +renameEntry :: Ident + -> State CompactorState Ident +renameEntry i = do + i'@(TxtI i'') <- renameVar i + modify (addEntry i'') + return i' + +collectLabels :: StaticInfo -> State CompactorState () +collectLabels si = mapM_ go (labelsV . siVal $ si) + where + go :: ShortText -> State CompactorState () + go = modify . addLabel + labelsV (StaticData _ args) = concatMap labelsA args + labelsV (StaticList args _) = concatMap labelsA args + labelsV _ = [] + labelsA (StaticLitArg l) = labelsL l + labelsA _ = [] + labelsL (LabelLit _ lbl) = [lbl] + labelsL _ = [] + +lookupRenamed :: CompactorState -> Ident -> Ident +lookupRenamed cs i@(TxtI t) = + fromMaybe i (M.lookup t (csNameMap cs)) + +renameVar :: Ident -- ^ text identifier to rename + -> State CompactorState Ident -- ^ the updated renamer state and the new ident +renameVar i@(TxtI t) + | "h$$" `isPrefixOf` T.unpack t = do + m <- gets csNameMap + case M.lookup t m of + Just r -> return r + Nothing -> do + y <- newIdent + let add_var cs' = cs' {csNameMap = M.insert t y (csNameMap cs')} + modify add_var + return y + | otherwise = return i + +newIdent :: State CompactorState Ident +newIdent = do + yys <- gets csIdentSupply + case yys of + (y:ys) -> do + modify (\cs -> cs {csIdentSupply = ys}) + return y + _ -> error "newIdent: empty list" + +-- | rename a compactor info entry according to the compactor state (no new renamings are added) +renameClosureInfo :: CompactorState + -> ClosureInfo + -> ClosureInfo +renameClosureInfo cs (ClosureInfo v rs n l t s) = + ClosureInfo (renameV v) rs n l t (f s) + where + renameV t = maybe t (\(TxtI t') -> t') (M.lookup t m) + m = csNameMap cs + f (CIStaticRefs rs) = CIStaticRefs (map renameV rs) + +-- | rename a static info entry according to the compactor state (no new renamings are added) +renameStaticInfo :: CompactorState + -> StaticInfo + -> StaticInfo +renameStaticInfo cs = staticIdents renameIdent + where + renameIdent t = maybe t (\(TxtI t') -> t') (M.lookup t $ csNameMap cs) + +staticIdents :: (ShortText -> ShortText) + -> StaticInfo + -> StaticInfo +staticIdents f (StaticInfo i v cc) = StaticInfo (f i) (staticIdentsV f v) cc + +staticIdentsV ::(ShortText -> ShortText) -> StaticVal -> StaticVal +staticIdentsV f (StaticFun i args) = StaticFun (f i) (staticIdentsA f <$> args) +staticIdentsV f (StaticThunk (Just (i, args))) = StaticThunk . Just $ + (f i, staticIdentsA f <$> args) +staticIdentsV f (StaticData con args) = StaticData (f con) (staticIdentsA f <$> args) +staticIdentsV f (StaticList xs t) = StaticList (staticIdentsA f <$> xs) (f <$> t) +staticIdentsV _ x = x + +-- staticIdentsA :: Traversal' StaticArg ShortText +staticIdentsA :: (ShortText -> ShortText) -> StaticArg -> StaticArg +staticIdentsA f (StaticObjArg t) = StaticObjArg $! f t +staticIdentsA _ x = x + + +{- + simple encoding of naturals using only printable low char points, + rely on gzip to compress repeating sequences, + most significant bits first + 1 byte: ascii code 32-123 (0-89), \ and " unused + 2 byte: 124 a b (90-8189) + 3 byte: 125 a b c (8190-737189) +-} +encodeStr :: HasDebugCallStack => [Int] -> String +encodeStr = concatMap encodeChr + where + c :: HasDebugCallStack => Int -> Char + c i | i > 90 || i < 0 = error ("encodeStr: c " ++ show i) + | i >= 59 = chr (34+i) + | i >= 2 = chr (33+i) + | otherwise = chr (32+i) + encodeChr :: HasDebugCallStack => Int -> String + encodeChr i + | i < 0 = panic "encodeStr: negative" + | i <= 89 = [c i] + | i <= 8189 = let (c1, c2) = (i - 90) `divMod` 90 in [chr 124, c c1, c c2] + | i <= 737189 = let (c2a, c3) = (i - 8190) `divMod` 90 + (c1, c2) = c2a `divMod` 90 + in [chr 125, c c1, c c2, c c3] + | otherwise = panic "encodeStr: overflow" + +entryIdx :: HasDebugCallStack + => String + -> CompactorState + -> ShortText + -> Int +entryIdx msg cs i = fromMaybe lookupParent (M.lookup i' (csEntries cs)) + where + (TxtI i') = lookupRenamed cs (TxtI i) + lookupParent = maybe err + (+ csNumEntries cs) + (M.lookup i' (csParentEntries cs)) + err = panic (msg ++ ": invalid entry: " ++ T.unpack i') + +objectIdx :: HasDebugCallStack + => String + -> CompactorState + -> ShortText + -> Int +objectIdx msg cs i = fromMaybe lookupParent (M.lookup i' (csStatics cs)) + where + (TxtI i') = lookupRenamed cs (TxtI i) + lookupParent = maybe err + (+ csNumStatics cs) + (M.lookup i' (csParentStatics cs)) + err = panic (msg ++ ": invalid static: " ++ T.unpack i') + +labelIdx :: HasDebugCallStack + => String + -> CompactorState + -> ShortText + -> Int +labelIdx msg cs l = fromMaybe lookupParent (M.lookup l (csLabels cs)) + where + lookupParent = maybe err + (+ csNumLabels cs) + (M.lookup l (csParentLabels cs)) + err = panic (msg ++ ": invalid label: " ++ T.unpack l) + +encodeInfo :: HasDebugCallStack + => CompactorState + -> ClosureInfo -- ^ information to encode + -> [Int] +encodeInfo cs (ClosureInfo _var regs name layout typ static) + | CIThunk <- typ = 0 : ls + | (CIFun _arity regs0) <- typ, regs0 /= argSize regs + = panic ("encodeInfo: inconsistent register metadata for " ++ T.unpack name) + | (CIFun arity _regs0) <- typ = [1, arity, encodeRegs regs] ++ ls + | (CICon tag) <- typ = [2, tag] ++ ls + | CIStackFrame <- typ = [3, encodeRegs regs] ++ ls +-- (CIPap ar) <- typ = [4, ar] ++ ls -- these should only appear during runtime + | otherwise = panic $ + "encodeInfo, unexpected closure type: " ++ show typ + where + ls = encodeLayout layout ++ encodeSrt static + encodeLayout CILayoutVariable = [0] + encodeLayout (CILayoutUnknown s) = [s+1] + encodeLayout (CILayoutFixed s _vs) = [s+1] + encodeSrt (CIStaticRefs rs) = length rs : map (objectIdx "encodeInfo" cs) rs + encodeRegs CIRegsUnknown = 0 + encodeRegs (CIRegs skip regTypes) = let nregs = sum (map varSize regTypes) + in encodeRegsTag skip nregs + encodeRegsTag skip nregs + | skip < 0 || skip > 1 = panic "encodeRegsTag: unexpected skip" + | otherwise = 1 + nregs `shiftL` 1 + skip + argSize (CIRegs skip regTypes) = sum (map varSize regTypes) - 1 + skip + argSize _ = 0 + +encodeStatic :: HasDebugCallStack + => CompactorState + -> StaticInfo + -> [Int] +encodeStatic cs si = + -- U.trace' ("encodeStatic: " ++ show si) + encodeStatic0 cs si + +encodeStatic0 :: HasDebugCallStack + => CompactorState + -> StaticInfo + -> [Int] +encodeStatic0 cs (StaticInfo _to sv _) + | StaticFun f args <- sv = + [1, entry f, length args] ++ concatMap encodeArg args + | StaticThunk (Just (t, args)) <- sv = + [2, entry t, length args] ++ concatMap encodeArg args + | StaticThunk Nothing <- sv = + [0] + | StaticUnboxed (StaticUnboxedBool b) <- sv = + [3 + fromEnum b] + | StaticUnboxed (StaticUnboxedInt _i) <- sv = + [5] -- ++ encodeInt i + | StaticUnboxed (StaticUnboxedDouble _d) <- sv = + [6] -- ++ encodeDouble d + | (StaticUnboxed _) <- sv = [] -- unboxed strings have their own table +-- | StaticString t <- sv = [7, T.length t] ++ map encodeChar (T.unpack t) +-- | StaticBin bs <- sv = [8, BS.length bs] ++ map fromIntegral (BS.unpack bs) + | StaticList [] Nothing <- sv = + [8] + | StaticList args t <- sv = + [9, length args] ++ + maybe [0] (\t' -> [1, obj t']) t ++ + concatMap encodeArg (reverse args) + | StaticData con args <- sv = + (if length args <= 6 + then [11+length args] + else [10,length args]) ++ + [entry con] ++ + concatMap encodeArg args + where + obj = objectIdx "encodeStatic" cs + entry = entryIdx "encodeStatic" cs + lbl = labelIdx "encodeStatic" cs + -- | an argument is either a reference to a heap object or a primitive value + encodeArg (StaticLitArg (BoolLit b)) = + [0 + fromEnum b] + encodeArg (StaticLitArg (IntLit 0)) = + [2] + encodeArg (StaticLitArg (IntLit 1)) = + [3] + encodeArg (StaticLitArg (IntLit i)) = + 4 : encodeInt i + encodeArg (StaticLitArg NullLit) = + [5] + encodeArg (StaticLitArg (DoubleLit d)) = + 6 : encodeDouble d + encodeArg (StaticLitArg (StringLit s)) = + 7 : encodeString s + encodeArg (StaticLitArg (BinLit b)) = + 8 : encodeBinary b + encodeArg (StaticLitArg (LabelLit b l)) = + [9, fromEnum b, lbl l] + encodeArg (StaticConArg con args) = + [10, entry con, length args] ++ concatMap encodeArg args + encodeArg (StaticObjArg t) = + [11 + obj t] + -- encodeArg x = panic ("encodeArg: unexpected: " ++ show x) + -- encodeChar = ord -- fixme make characters more readable + +-- FIXME: Jeff (2022,03): Use FastString or ShortByteString and remove this +-- serialization/deserialization +encodeString :: ShortText -> [Int] +encodeString = encodeBinary . BSC.pack . T.unpack + +-- ByteString is prefixed with length, then blocks of 4 numbers encoding 3 bytes +encodeBinary :: BS.ByteString -> [Int] +encodeBinary bs = BS.length bs : go bs + where + go b | BS.null b = [] + | l == 1 = let b0 = b `BS.index` 0 + in map fromIntegral [ b0 `shiftR` 2, (b0 Bits..&. 3) `shiftL` 4 ] + | l == 2 = let b0 = b `BS.index` 0 + b1 = b `BS.index` 1 + in map fromIntegral [ b0 `shiftR` 2 + , ((b0 Bits..&. 3) `shiftL` 4) Bits..|. (b1 `shiftR` 4) + , (b1 Bits..&. 15) `shiftL` 2 + ] + | otherwise = let b0 = b `BS.index` 0 + b1 = b `BS.index` 1 + b2 = b `BS.index` 2 + in map fromIntegral [ b0 `shiftR` 2 + , ((b0 Bits..&. 3) `shiftL` 4) Bits..|. (b1 `shiftR` 4) + , ((b1 Bits..&. 15) `shiftL` 2) Bits..|. (b2 `shiftR` 6) + , b2 Bits..&. 63 + ] ++ go (BS.drop 3 b) + where l = BS.length b + +encodeInt :: Integer -> [Int] +encodeInt i + | i >= -10 && i < encodeMax - 11 = [fromIntegral i + 12] + | i > 2^(31::Int)-1 || i < -2^(31::Int) + = panic "encodeInt: integer outside 32 bit range" + | otherwise = let i' :: Int32 = fromIntegral i + in [ 0 + , fromIntegral ((i' `shiftR` 16) Bits..&. 0xffff) + , fromIntegral (i' Bits..&. 0xffff) + ] + +-- encode a possibly 53 bit int +encodeSignificand :: Integer -> [Int] +encodeSignificand i + | i >= -10 && i < encodeMax - 11 = [fromIntegral i + 12] + | i > 2^(53::Int) || i < -2^(53::Int) + = panic ("encodeInt: integer outside 53 bit range: " ++ show i) + | otherwise = let i' = abs i + in (if i < 0 then 0 else 1) : + map (\r -> fromIntegral ((i' `shiftR` r) Bits..&. 0xffff)) + [48,32,16,0] + +encodeDouble :: SaneDouble -> [Int] +encodeDouble (SaneDouble d) + | isNegativeZero d = [0] + | d == 0 = [1] + | isInfinite d && d > 0 = [2] + | isInfinite d = [3] + | isNaN d = [4] + | abs exponent <= 30 + = (6 + fromIntegral exponent + 30) : encodeSignificand significand + | otherwise + = [5] ++ encodeInt (fromIntegral exponent) ++ encodeSignificand significand + where + (significand, exponent) = decodeFloat d + +encodeMax :: Integer +encodeMax = 737189 + +{- | + The Base data structure contains the information we need + to do incremental linking against a base bundle. + + base file format: + GHCJSBASE + [renamer state] + [linkedPackages] + [packages] + [modules] + [symbols] + -} + +renderBase :: Base -- ^ base metadata + -> BL.ByteString -- ^ rendered result +renderBase = DB.runPut . putBase + +loadBase :: FilePath -> IO Base +loadBase file = DB.runGet (getBase file) <$> BL.readFile file + +staticInfoArgs :: Applicative f => (StaticArg -> f StaticArg) -> StaticInfo -> f StaticInfo +staticInfoArgs f (StaticInfo si sv sa) = StaticInfo si <$> staticValArgs f sv <*> pure sa + +staticValArgs :: Applicative f => (StaticArg -> f StaticArg) -> StaticVal -> f StaticVal +staticValArgs f (StaticFun fn as) = StaticFun fn <$> traverse f as +staticValArgs f (StaticThunk (Just (t, as))) = StaticThunk . Just . (t,) <$> traverse f as +staticValArgs f (StaticData c as) = StaticData c <$> traverse f as +staticValArgs f (StaticList as mt) = StaticList <$> traverse f as <*> pure mt +staticValArgs _ x = pure x + +compact :: JSLinkConfig + -> StgToJSConfig + -> CompactorState + -> [ShortText] + -> [LinkedUnit] + -> (CompactorState, [JStat], JStat) +compact ln_cfg cfg cs0 rtsDeps0 input0 +-- | dumpHashes' input + = + let rtsDeps1 = rtsDeps0 ++ + map (<> "_e") rtsDeps0 ++ + map (<> "_con_e") rtsDeps0 + -- FIXME: Jeff (2022,03): I've removed the real worker @packStrings@ to + -- get the Linker compiling. This linker will be very slow, when the time + -- comes, we need to uncomment packStrings and actually implement it to do + -- the link time compiling + -- (cs1, input1) = packStrings settings dflags cs0 input0 + in renameInternals ln_cfg cfg cs0 rtsDeps1 input0 + + +-- hash compactification + +dedupeBodies :: [ShortText] + -> [(JStat, [ClosureInfo], [StaticInfo])] + -> (JStat, [(JStat, [ClosureInfo], [StaticInfo])]) +dedupeBodies rtsDeps input = (renderBuildFunctions bfN bfCB, input') + where + (bfN, bfCB, input') = rewriteBodies globals hdefsR hdefs input + hdefs = M.fromListWith (\(s,ks1) (_,ks2) -> (s, ks1++ks2)) + (map (\(k, s, bs) -> (bs, (s, [k]))) hdefs0) + hdefsR = M.fromList $ map (\(k, _, bs) -> (k, bs)) hdefs0 + hdefs0 :: [(ShortText, Int, BS.ByteString)] + hdefs0 = concatMap (\(b,_,_) -> + (map (\(k,h) -> + let (s,fh, _deps) = finalizeHash' h + in (k, s, fh)) + . hashDefinitions globals) b + ) + input + globals = foldl' (flip S.delete) (findAllGlobals input) rtsDeps + +renderBuildFunctions :: [BuildFunction] -> [BuildFunction] -> JStat +renderBuildFunctions normalBfs cycleBreakerBfs = + cycleBr1 <> mconcat (map renderBuildFunction normalBfs) <> cycleBr2 + where + renderCbr f = mconcat (zipWith f cycleBreakerBfs [1..]) + cbName :: Int -> ShortText + cbName = T.pack . ("h$$$cb"++) . show + cycleBr1 = renderCbr $ \bf n -> + let args = map (TxtI . T.pack . ('a':) . show) [1..bfArgs bf] + body = ReturnStat $ ApplExpr (ValExpr (JVar (TxtI $ cbName n))) + (map (ValExpr . JVar) args) + in DeclStat (TxtI (bfName bf)) <> + AssignStat (ValExpr (JVar (TxtI (bfName bf)))) + (ValExpr (JFunc args body)) + cycleBr2 = renderCbr $ \bf n -> renderBuildFunction (bf { bfName = cbName n }) + +data BuildFunction = BuildFunction + { bfName :: !ShortText + , bfBuilder :: !Ident + , bfDeps :: [ShortText] + , bfArgs :: !Int + } deriving (Eq, Ord, Show) + +{- + Stack frame initialization order is important when code is reused: + all dependencies have to be ready when the closure is built. + + This function sorts the initializers and returns an additional list + of cycle breakers, which are built in a two-step fashion + -} +sortBuildFunctions :: [BuildFunction] -> ([BuildFunction], [BuildFunction]) +sortBuildFunctions bfs = (map snd normBFs, map snd cbBFs) + where + (normBFs, cbBFs) = partition (not.fst) . concatMap fromSCC $ sccs bfs + bfm :: Map ShortText BuildFunction + bfm = M.fromList (map (\x -> (bfName x, x)) bfs) + fromSCC :: G.SCC ShortText -> [(Bool, BuildFunction)] + fromSCC (G.AcyclicSCC x) = [(False, bfm M.! x)] + fromSCC (G.CyclicSCC xs) = breakCycles xs + sccs :: [BuildFunction] -> [G.SCC ShortText] + sccs b = G.stronglyConnComp $ + map (\bf -> let n = bfName bf in (n, n, bfDeps bf)) b + {- + finding the maximum acyclic subgraph is the Minimum Feedback Arc Set problem, + which is NP-complete. We use an approximation here. + -} + breakCycles :: [ShortText] -> [(Bool, BuildFunction)] + breakCycles nodes = + (True, bfm M.! selected) + : concatMap fromSCC (sccs (map (bfm M.!) $ filter (/=selected) nodes)) + where + outDeg, inDeg :: Map ShortText Int + outDeg = M.fromList $ map (\n -> (n, length (bfDeps (bfm M.! n)))) nodes + inDeg = M.fromListWith (+) (map (,1) . concatMap (bfDeps . (bfm M.!)) $ nodes) + -- ELS heuristic (Eades et. al.) + selected :: ShortText + selected = maximumBy (compare `on` (\x -> outDeg M.! x - inDeg M.! x)) nodes + +rewriteBodies :: Set ShortText + -> Map ShortText BS.ByteString + -> Map BS.ByteString (Int, [ShortText]) + -> [LinkedUnit] + -> ([BuildFunction], [BuildFunction], [LinkedUnit]) +rewriteBodies globals idx1 idx2 input = (bfsNormal, bfsCycleBreaker, input') + where + (bfs1, input') = unzip (map rewriteBlock input) + (bfsNormal, bfsCycleBreaker) = sortBuildFunctions (concat bfs1) + + -- this index only contains the entries we actually want to dedupe + idx2' :: Map BS.ByteString (Int, [ShortText]) + idx2' = M.filter (\(s, xs) -> dedupeBody (length xs) s) idx2 + + rewriteBlock :: (JStat, [ClosureInfo], [StaticInfo]) + -> ([BuildFunction], LinkedUnit) + rewriteBlock (st, cis, sis) = + let (bfs, st') = rewriteFunctions st + -- remove the declarations for things that we just deduped + st'' = removeDecls (S.fromList $ map bfName bfs) st' + in (bfs, (st'', cis, sis)) + + removeDecls :: Set ShortText -> JStat -> JStat + removeDecls t (BlockStat ss) = BlockStat (map (removeDecls t) ss) + removeDecls t (DeclStat (TxtI i)) + | i `S.member` t = mempty + removeDecls _ s = s + + rewriteFunctions :: JStat -> ([BuildFunction], JStat) + rewriteFunctions (BlockStat ss) = + let (bfs, ss') = unzip (map rewriteFunctions ss) + in (concat bfs, BlockStat ss') + rewriteFunctions (AssignStat (ValExpr (JVar (TxtI i))) + (ValExpr (JFunc args st))) + | Just h <- M.lookup i idx1 + , Just (_s, his) <- M.lookup h idx2' = + let (bf, st') = rewriteFunction i h his args st in ([bf], st') + rewriteFunctions x = ([], x) + + rewriteFunction :: ShortText + -> BS.ByteString + -> [ShortText] + -> [Ident] + -> JStat + -> (BuildFunction, JStat) + rewriteFunction i h his args body + | i == iFirst = (bf, createFunction i idx g args body) + | otherwise = (bf, mempty) + where + bf :: BuildFunction + bf = BuildFunction i (buildFunId idx) g (length args) + g :: [ShortText] + g = findGlobals globals body + iFirst = head his + Just idx = M.lookupIndex h idx2' + + createFunction :: ShortText + -> Int + -> [ShortText] + -> [Ident] + -> JStat + -> JStat + createFunction _i idx g args body = + DeclStat bi <> + AssignStat (ValExpr (JVar bi)) + (ValExpr (JFunc bargs bbody)) + where + ng = length g + bi = buildFunId idx + bargs :: [Ident] + bargs = map (TxtI . T.pack . ("h$$$g"++) . show) [1..ng] + bgm :: Map ShortText Ident + bgm = M.fromList (zip g bargs) + bbody :: JStat + bbody = ReturnStat (ValExpr $ JFunc args ibody) + ibody :: JStat + ibody = identsS' (\ti@(TxtI i) -> fromMaybe ti (M.lookup i bgm)) body + +renderBuildFunction :: BuildFunction -> JStat +renderBuildFunction (BuildFunction i bfid deps _nargs) = + DeclStat (TxtI i) <> + AssignStat (ValExpr (JVar (TxtI i))) + (ApplExpr (ValExpr (JVar bfid)) (map (ValExpr . JVar . TxtI) deps)) + +dedupeBody :: Int -> Int -> Bool +dedupeBody n size + | n < 2 = False + | size * n > 200 = True + | n > 6 = True + | otherwise = False + +buildFunId :: Int -> Ident +buildFunId i = TxtI (T.pack $ "h$$$f" ++ show i) + +-- result is ordered, does not contain duplicates +findGlobals :: Set ShortText -> JStat -> [ShortText] +findGlobals globals stat = nub' + (filter isGlobal . map (\(TxtI i) -> i) $ identsS stat ) + where + locals = S.fromList (findLocals stat) + isGlobal i = i `S.member` globals && i `S.notMember` locals + +findLocals :: JStat -> [ShortText] +findLocals (BlockStat ss) = concatMap findLocals ss +findLocals (DeclStat (TxtI i)) = [i] +findLocals _ = [] + +nub' :: Ord a => [a] -> [a] +nub' = go S.empty + where + go _ [] = [] + go s (x:xs) + | x `S.member` s = go s xs + | otherwise = x : go (S.insert x s) xs + +data HashIdx = HashIdx (Map ShortText Hash) (Map Hash ShortText) + +dedupe :: [ShortText] + -> [(JStat, [ClosureInfo], [StaticInfo])] + -> [(JStat, [ClosureInfo], [StaticInfo])] +dedupe rtsDeps input +-- | dumpHashIdx idx + = + map (\(st,cis,sis) -> dedupeBlock idx st cis sis) input + where + idx = HashIdx hashes hr + hashes0 = buildHashes rtsDeps input + hashes = foldl' (flip M.delete) hashes0 rtsDeps + hr = fmap pickShortest + (M.fromListWith (++) $ + map (\(i, h) -> (h, [i])) (M.toList hashes)) + pickShortest :: [ShortText] -> ShortText + pickShortest = minimumBy (compare `on` T.codepointLength) + +dedupeBlock :: HashIdx + -> JStat + -> [ClosureInfo] + -> [StaticInfo] + -> LinkedUnit +dedupeBlock hi st ci si = + ( dedupeStat hi st + , mapMaybe (dedupeClosureInfo hi) ci + , mapMaybe (dedupeStaticInfo hi) si + ) + +dedupeStat :: HashIdx -> JStat -> JStat +dedupeStat hi = go + where + go (BlockStat ss) = BlockStat (map go ss) + go s@(DeclStat (TxtI i)) + | not (isCanon hi i) = mempty + | otherwise = s + go (AssignStat v@(ValExpr (JVar (TxtI i))) e) + | not (isCanon hi i) = mempty + | otherwise = AssignStat v (identsE' (toCanonI hi) e) + -- rewrite identifiers in e + go s = identsS' (toCanonI hi) s + +dedupeClosureInfo :: HashIdx -> ClosureInfo -> Maybe ClosureInfo +dedupeClosureInfo hi (ClosureInfo i rs n l ty st) + | isCanon hi i = Just (ClosureInfo i rs n l ty (dedupeCIStatic hi st)) +dedupeClosureInfo _ _ = Nothing + +dedupeStaticInfo :: HashIdx -> StaticInfo -> Maybe StaticInfo +dedupeStaticInfo hi (StaticInfo i val ccs) + | isCanon hi i = Just (StaticInfo i (dedupeStaticVal hi val) ccs) +dedupeStaticInfo _ _ = Nothing + +dedupeCIStatic :: HashIdx -> CIStatic -> CIStatic +dedupeCIStatic hi (CIStaticRefs refs) = CIStaticRefs (nub $ map (toCanon hi) refs) + +dedupeStaticVal :: HashIdx -> StaticVal -> StaticVal +dedupeStaticVal hi (StaticFun t args) = + StaticFun (toCanon hi t) (map (dedupeStaticArg hi) args) +dedupeStaticVal hi (StaticThunk (Just (o, args))) = + StaticThunk (Just (toCanon hi o, map (dedupeStaticArg hi) args)) +dedupeStaticVal hi (StaticData dcon args) = + StaticData (toCanon hi dcon) (map (dedupeStaticArg hi) args) +dedupeStaticVal hi (StaticList args lt) = + StaticList (map (dedupeStaticArg hi) args) (fmap (toCanon hi) lt) +dedupeStaticVal _ v = v -- unboxed value or thunk with alt init, no rewrite needed + +dedupeStaticArg :: HashIdx -> StaticArg -> StaticArg +dedupeStaticArg hi (StaticObjArg o) + = StaticObjArg (toCanon hi o) +dedupeStaticArg hi (StaticConArg c args) + = StaticConArg (toCanon hi c) + (map (dedupeStaticArg hi) args) +dedupeStaticArg _hi a@StaticLitArg{} = a + +isCanon :: HashIdx -> ShortText -> Bool +isCanon (HashIdx a b) t + | Nothing <- la = True + | Just h <- la + , Just t' <- M.lookup h b = t == t' + | otherwise = False + where la = M.lookup t a + +toCanon :: HashIdx -> ShortText -> ShortText +toCanon (HashIdx a b) t + | Just h <- M.lookup t a + , Just t' <- M.lookup h b = t' + | otherwise = t + +toCanonI :: HashIdx -> Ident -> Ident +toCanonI hi (TxtI x) = TxtI (toCanon hi x) + +type Hash = (BS.ByteString, [ShortText]) + +data HashBuilder = HashBuilder !BB.Builder ![ShortText] + +instance Monoid HashBuilder where + mempty = HashBuilder mempty mempty + +instance Semigroup HashBuilder where + (<>) (HashBuilder b1 l1) (HashBuilder b2 l2) = + HashBuilder (b1 <> b2) (l1 <> l2) + +{- +dumpHashIdx :: HashIdx -> Bool +dumpHashIdx hi@(HashIdx ma mb) = + let ks = M.keys ma + difCanon i = let i' = toCanon hi i + in if i == i' then Nothing else Just i' + writeHashIdx = do + putStrLn "writing hash idx" + T.writeFile "hashidx.txt" + (T.unlines . sort $ mapMaybe (\i -> fmap ((i <> " -> ") <>) (difCanon i)) ks) + putStrLn "writing full hash idx" + T.writeFile "hashIdxFull.txt" + (T.unlines . sort $ M.keys ma) + in unsafePerformIO writeHashIdx `seq` True +-} +-- debug thing +{- +dumpHashes' :: [(JStat, [ClosureInfo], [StaticInfo])] -> Bool +dumpHashes' input = + let hashes = buildHashes input + writeHashes = do + putStrLn "writing hashes" + BL.writeFile "hashes.json" (Aeson.encode $ dumpHashes hashes) + in unsafePerformIO writeHashes `seq` True +-} +buildHashes :: [ShortText] -> [LinkedUnit] -> Map ShortText Hash +buildHashes rtsDeps xss + -- - | dumpHashes0 hashes0 + = fixHashes (fmap finalizeHash hashes0) + where + globals = foldl' (flip S.delete) (findAllGlobals xss) rtsDeps + hashes0 = M.unions (map buildHashesBlock xss) + buildHashesBlock (st, cis, sis) = + let hdefs = hashDefinitions globals st + hcis = map hashClosureInfo cis + hsis = map hashStaticInfo (filter (not . ignoreStatic) sis) + in M.fromList (combineHashes hdefs hcis ++ hsis) + +findAllGlobals :: [LinkedUnit] -> Set ShortText +findAllGlobals xss = S.fromList $ concatMap f xss + where + f (_, cis, sis) = + map (\(ClosureInfo i _ _ _ _ _) -> i) cis ++ + map (\(StaticInfo i _ _) -> i) sis + +fixHashes :: Map ShortText Hash -> Map ShortText Hash +fixHashes hashes = fmap (second (map replaceHash)) hashes + where + replaceHash :: ShortText -> ShortText + replaceHash h = maybe h T.pack (M.lookup h finalHashes) + hashText bs = "h$$$" <> utf8DecodeByteString bs + sccs :: [[ShortText]] + sccs = map fromSCC $ + G.stronglyConnComp (map (\(k, (_bs, deps)) -> (k, k, deps)) (M.toList hashes)) + ks = M.keys hashes + invDeps = M.fromListWith (++) (concatMap mkInvDeps $ M.toList hashes) + mkInvDeps (k, (_, ds)) = map (,[k]) ds + finalHashes = fmap hashText (fixHashesIter 500 invDeps ks ks sccs hashes mempty) + +fromSCC :: G.SCC a -> [a] +fromSCC (G.AcyclicSCC x) = [x] +fromSCC (G.CyclicSCC xs) = xs + +fixHashesIter :: Int + -> Map ShortText [ShortText] + -> [ShortText] + -> [ShortText] + -> [[ShortText]] + -> Map ShortText Hash + -> Map ShortText BS.ByteString + -> Map ShortText BS.ByteString +fixHashesIter n invDeps allKeys checkKeys sccs hashes finalHashes + -- - | unsafePerformIO (putStrLn ("fixHashesIter: " ++ show n)) `seq` False = undefined + | n < 0 = finalHashes + | not (null newHashes) = fixHashesIter (n-1) invDeps allKeys checkKeys' sccs hashes + (M.union finalHashes $ M.fromList newHashes) + -- - | unsafePerformIO (putStrLn ("fixHashesIter killing cycles:\n" ++ show rootSCCs)) `seq` False = undefined + | not (null rootSCCs) = fixHashesIter n {- -1 -} invDeps allKeys allKeys sccs hashes + (M.union finalHashes (M.fromList $ concatMap hashRootSCC rootSCCs)) + | otherwise = finalHashes + where + checkKeys' | length newHashes > M.size hashes `div` 10 = allKeys + | otherwise = S.toList . S.fromList $ concatMap newHashDeps newHashes + newHashDeps (k, _) = fromMaybe [] (M.lookup k invDeps) + mkNewHash k | M.notMember k finalHashes + , Just (hb, htxt) <- M.lookup k hashes + , Just bs <- mapM (`M.lookup` finalHashes) htxt = + Just (k, makeFinalHash hb bs) + | otherwise = Nothing + newHashes :: [(ShortText, BS.ByteString)] + newHashes = mapMaybe mkNewHash checkKeys + rootSCCs :: [[ShortText]] + rootSCCs = filter isRootSCC sccs + isRootSCC :: [ShortText] -> Bool + isRootSCC scc = not (all (`M.member` finalHashes) scc) && all check scc + where + check n = let Just (_bs, out) = M.lookup n hashes + in all checkEdge out + checkEdge e = e `S.member` s || e `M.member` finalHashes + s = S.fromList scc + hashRootSCC :: [ShortText] -> [(ShortText,BS.ByteString)] + hashRootSCC scc + | any (`M.member` finalHashes) scc = panic "Gen2.Compactor.hashRootSCC: has finalized nodes" + | otherwise = map makeHash toHash + where + makeHash k = let Just (bs,deps) = M.lookup k hashes + luds = map lookupDep deps + in (k, makeFinalHash bs luds) + lookupDep :: ShortText -> BS.ByteString + lookupDep d + | Just b <- M.lookup d finalHashes = b + | Just i <- M.lookup d toHashIdx + = grpHash <> (utf8EncodeString . show $ i) + | otherwise + = panic $ "Gen2.Compactor.hashRootSCC: unknown key: " ++ + T.unpack d + toHashIdx :: M.Map ShortText Integer + toHashIdx = M.fromList $ zip toHash [1..] + grpHash :: BS.ByteString + grpHash = BL.toStrict + . BB.toLazyByteString + $ mconcat (map (mkGrpHash . (hashes M.!)) toHash) + mkGrpHash (h, deps) = + let deps' = mapMaybe (`M.lookup` finalHashes) deps + in BB.byteString h <> + BB.int64LE (fromIntegral $ length deps') <> + mconcat (map BB.byteString deps') + toHash :: [ShortText] + toHash = sortBy (compare `on` fst . (hashes M.!)) scc + +makeFinalHash :: BS.ByteString -> [BS.ByteString] -> BS.ByteString +makeFinalHash b bs = mconcat (b:bs) +-- FIXME: Jeff (2022,03): I've removed the SHA256.hash function which would be +-- producing this final bytestring. Do we need it? If so how to replace it? + +-- do not deduplicate thunks +ignoreStatic :: StaticInfo -> Bool +ignoreStatic (StaticInfo _ StaticThunk {} _) = True +ignoreStatic _ = False + +-- combine hashes from x and y, leaving only those which have an entry in both +combineHashes :: [(ShortText, HashBuilder)] + -> [(ShortText, HashBuilder)] + -> [(ShortText, HashBuilder)] +combineHashes x y = M.toList $ M.intersectionWith (<>) + (M.fromList x) + (M.fromList y) + +{- +dumpHashes0 :: Map ShortText HashBuilder -> Bool +dumpHashes0 hashes = unsafePerformIO writeHashes `seq` True + where + hashLine (n, HashBuilder bb txt) = + n <> " ->\n " <> + escapeBS (BB.toLazyByteString bb) <> "\n [" <> T.intercalate " " txt <> "]\n" + escapeBS :: BL.ByteString -> T.Text + escapeBS = T.pack . concatMap escapeCH . BL.unpack + escapeCH c | c < 32 || c > 127 = '\\' : show c + | c == 92 = "\\\\" + | otherwise = [chr (fromIntegral c)] + + writeHashes = do + putStrLn "writing hashes0" + T.writeFile "hashes0.dump" (T.unlines $ map hashLine (M.toList hashes)) + +dumpHashes :: Map ShortText Hash -> Value +dumpHashes idx = toJSON iidx + where + iidx :: Map ShortText [(Text, [ShortText])] + iidx = M.fromListWith (++) $ + map (\(t, (b, deps)) -> (TE.decodeUtf8 (B16.encode b), [(t,deps)])) (M.toList idx) +-} + +ht :: Int8 -> HashBuilder +ht x = HashBuilder (BB.int8 x) [] + +hi :: Int -> HashBuilder +hi x = HashBuilder (BB.int64LE $ fromIntegral x) [] + +hi' :: (Show a, Integral a) => a -> HashBuilder +hi' x | x' > toInteger (maxBound :: Int64) || x' < toInteger (minBound :: Int64) = + panic $ "Gen2.Compactor.hi': integer out of range: " ++ show x + | otherwise = HashBuilder (BB.int64LE $ fromInteger x') [] + where + x' = toInteger x + +hd :: Double -> HashBuilder +hd d = HashBuilder (BB.doubleLE d) [] + +htxt :: ShortText -> HashBuilder +htxt x = HashBuilder (BB.int64LE (fromIntegral $ BS.length bs) <> BB.byteString bs) [] + where + bs = utf8EncodeString $ T.unpack x + +hobj :: ShortText -> HashBuilder +hobj x = HashBuilder (BB.int8 127) [x] + +hb :: BS.ByteString -> HashBuilder +hb x = HashBuilder (BB.int64LE (fromIntegral $ BS.length x) <> BB.byteString x) [] + +hashDefinitions :: Set ShortText -> JStat -> [(ShortText, HashBuilder)] +hashDefinitions globals st = + let defs = findDefinitions st + in map (uncurry (hashSingleDefinition globals)) defs + +findDefinitions :: JStat -> [(Ident, JExpr)] +findDefinitions (BlockStat ss) = concatMap findDefinitions ss +findDefinitions (AssignStat (ValExpr (JVar i)) e) = [(i,e)] +findDefinitions _ = [] + +hashSingleDefinition :: Set ShortText -> Ident -> JExpr -> (ShortText, HashBuilder) +hashSingleDefinition globals (TxtI i) expr = (i, ht 0 <> render st <> mconcat (map hobj globalRefs)) + where + globalRefs = nub $ filter (`S.member` globals) (map (\(TxtI i) -> i) (identsE expr)) + globalMap = M.fromList $ zip globalRefs (map (T.pack . ("h$$$global_"++) . show) [(1::Int)..]) + expr' = identsE' (\i@(TxtI t) -> maybe i TxtI (M.lookup t globalMap)) expr + st = AssignStat (ValExpr (JVar (TxtI "dummy"))) expr' + render = htxt . T.pack. show . pretty + + +-- FIXME: Jeff (2022,03): reduce the redundancy between these idents functions +-- and the idents functions in GHC.JS.Transform These helper functions also +-- exist in non-ticked for, e.g., @identsE@ in GHC.JS.Transform. These are +-- essential Functor instances over the JS syntax tree. We rewrite them here for +-- consumers like hashSingleDefinition. Had we used the Transform version we'll +-- end up with a compiler error in @expr'@ since AssignStat takes an Expr, but +-- Transform.IdentsE returns [Ident] +identsE' :: (Ident -> Ident) -> JExpr -> JExpr +identsE' f (ValExpr v) = ValExpr $! identsV' f v +identsE' f (SelExpr e i) = SelExpr (identsE' f e) i -- do not rename properties +identsE' f (IdxExpr e1 e2) = IdxExpr (identsE' f e1) (identsE' f e2) +identsE' f (InfixExpr s e1 e2) = InfixExpr s (identsE' f e1) (identsE' f e2) +identsE' f (UOpExpr o e) = UOpExpr o $! identsE' f e +identsE' f (IfExpr e1 e2 e3) = IfExpr (identsE' f e1) (identsE' f e2) (identsE' f e3) +identsE' f (ApplExpr e es) = ApplExpr (identsE' f e) (identsE' f <$> es) +identsE' _ UnsatExpr{} = error "identsE': UnsatExpr" + +identsV' :: (Ident -> Ident) -> JVal -> JVal +identsV' f (JVar i) = JVar $! f i +identsV' f (JList xs) = JList $! (fmap . identsE') f xs +identsV' _ d@JDouble{} = d +identsV' _ i@JInt{} = i +identsV' _ s@JStr{} = s +identsV' _ r@JRegEx{} = r +identsV' f (JHash m) = JHash $! (fmap . identsE') f m +identsV' f (JFunc args s) = JFunc (fmap f args) (identsS' f s) +identsV' _ UnsatVal{} = error "identsV': UnsatVal" + +identsS' :: (Ident -> Ident) -> JStat -> JStat +identsS' f (DeclStat i) = DeclStat $! f i +identsS' f (ReturnStat e) = ReturnStat $! identsE' f e +identsS' f (IfStat e s1 s2) = IfStat (identsE' f e) (identsS' f s1) (identsS' f s2) +identsS' f (WhileStat b e s) = WhileStat b (identsE' f e) (identsS' f s) +identsS' f (ForInStat b i e s) = ForInStat b (f i) (identsE' f e) (identsS' f s) +identsS' f (SwitchStat e xs s) = SwitchStat (identsE' f e) (fmap (traverseCase f) xs) (identsS' f s) + where traverseCase g (e,s) = (identsE' g e, identsS' g s) +identsS' f (TryStat s1 i s2 s3) = TryStat (identsS' f s1) (f i) (identsS' f s2) (identsS' f s3) +identsS' f (BlockStat xs) = BlockStat $! identsS' f <$> xs +identsS' f (ApplStat e es) = ApplStat (identsE' f e) (identsE' f <$> es) +identsS' f (UOpStat op e) = UOpStat op $! identsE' f e +identsS' f (AssignStat e1 e2) = AssignStat (identsE' f e1) (identsE' f e2) +identsS' _ UnsatBlock{} = error "identsS': UnsatBlock" +identsS' f (LabelStat l s) = LabelStat l $! identsS' f s +identsS' _ b@BreakStat{} = b +identsS' _ c@ContinueStat{} = c + +hashClosureInfo :: ClosureInfo -> (ShortText, HashBuilder) +hashClosureInfo (ClosureInfo civ cir _cin cil cit cis) = + (civ, ht 1 <> hashCIRegs cir <> hashCILayout cil <> hashCIType cit <> hashCIStatic cis) + +hashStaticInfo :: StaticInfo -> (ShortText, HashBuilder) +hashStaticInfo (StaticInfo sivr sivl _sicc) = + (sivr, ht 2 <> hashStaticVal sivl) + +hashCIType :: CIType -> HashBuilder +hashCIType (CIFun a r) = ht 1 <> hi a <> hi r +hashCIType CIThunk = ht 2 +hashCIType (CICon c) = ht 3 <> hi c +hashCIType CIPap = ht 4 +hashCIType CIBlackhole = ht 5 +hashCIType CIStackFrame = ht 6 + + +hashCIRegs :: CIRegs -> HashBuilder +hashCIRegs CIRegsUnknown = ht 1 +hashCIRegs (CIRegs sk tys) = ht 2 <> hi sk <> hashList hashVT tys + +hashCILayout :: CILayout -> HashBuilder +hashCILayout CILayoutVariable = ht 1 +hashCILayout (CILayoutUnknown size) = ht 2 <> hi size +hashCILayout (CILayoutFixed n l) = ht 3 <> hi n <> hashList hashVT l + +hashCIStatic :: CIStatic -> HashBuilder +hashCIStatic CIStaticRefs{} = mempty -- hashList hobj xs -- we get these from the code + +hashList :: (a -> HashBuilder) -> [a] -> HashBuilder +hashList f xs = hi (length xs) <> mconcat (map f xs) + +hashVT :: VarType -> HashBuilder +hashVT = hi . fromEnum + +hashStaticVal :: StaticVal -> HashBuilder +hashStaticVal (StaticFun t args) = ht 1 <> hobj t <> hashList hashStaticArg args +hashStaticVal (StaticThunk mtn) = ht 2 <> hashMaybe htobj mtn + where + htobj (o, args) = hobj o <> hashList hashStaticArg args +hashStaticVal (StaticUnboxed su) = ht 3 <> hashStaticUnboxed su +hashStaticVal (StaticData dcon args) = ht 4 <> hobj dcon <> hashList hashStaticArg args +hashStaticVal (StaticList args lt) = ht 5 <> hashList hashStaticArg args <> hashMaybe hobj lt + +hashMaybe :: (a -> HashBuilder) -> Maybe a -> HashBuilder +hashMaybe _ Nothing = ht 1 +hashMaybe f (Just x) = ht 2 <> f x + +hashStaticUnboxed :: StaticUnboxed -> HashBuilder +hashStaticUnboxed (StaticUnboxedBool b) = ht 1 <> hi (fromEnum b) +hashStaticUnboxed (StaticUnboxedInt iv) = ht 2 <> hi' iv +hashStaticUnboxed (StaticUnboxedDouble sd) = ht 3 <> hashSaneDouble sd +hashStaticUnboxed (StaticUnboxedString str) = ht 4 <> hb str +hashStaticUnboxed (StaticUnboxedStringOffset str) = ht 5 <> hb str + + +hashStaticArg :: StaticArg -> HashBuilder +hashStaticArg (StaticObjArg t) = ht 1 <> hobj t +hashStaticArg (StaticLitArg sl) = ht 2 <> hashStaticLit sl +hashStaticArg (StaticConArg cn args) = ht 3 <> hobj cn <> hashList hashStaticArg args + +hashStaticLit :: StaticLit -> HashBuilder +hashStaticLit (BoolLit b) = ht 1 <> hi (fromEnum b) +hashStaticLit (IntLit iv) = ht 2 <> hi (fromIntegral iv) +hashStaticLit NullLit = ht 3 +hashStaticLit (DoubleLit d) = ht 4 <> hashSaneDouble d +hashStaticLit (StringLit tt) = ht 5 <> htxt tt +hashStaticLit (BinLit bs) = ht 6 <> hb bs +hashStaticLit (LabelLit bb ln) = ht 7 <> hi (fromEnum bb) <> htxt ln + +hashSaneDouble :: SaneDouble -> HashBuilder +hashSaneDouble (SaneDouble sd) = hd sd + +finalizeHash :: HashBuilder -> Hash +finalizeHash (HashBuilder hb tt) = +-- FIXME: Jeff (2022,03): I've removed the SHA256.hash function which would be +-- producing h. Do we need it? If so how to replace it? + let h = (BL.toStrict $ BB.toLazyByteString hb) + in h `seq` (h, tt) + +finalizeHash' :: HashBuilder -> (Int, BS.ByteString, [ShortText]) +finalizeHash' (HashBuilder hb tt) = + let b = BL.toStrict (BB.toLazyByteString hb) + bl = BS.length b +-- FIXME: Jeff (2022,03): I've removed the SHA256.hash function which would be +-- producing h. So it is purposeful that `h = b` looks unnecessary. Do we need +-- it? If so how to replace it? + h = b + in h `seq` bl `seq` (bl, h, tt) diff --git a/compiler/GHC/StgToJS/Linker/Dynamic.hs b/compiler/GHC/StgToJS/Linker/Dynamic.hs new file mode 100644 index 0000000000..d9dfb8380d --- /dev/null +++ b/compiler/GHC/StgToJS/Linker/Dynamic.hs @@ -0,0 +1,564 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE LambdaCase #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.StgToJS.Linker.Dynamic +-- Copyright : (c) The University of Glasgow 2001 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io> +-- Luite Stegeman <luite.stegeman@iohk.io> +-- Sylvain Henry <sylvain.henry@iohk.io> +-- Josh Meredith <josh.meredith@iohk.io> +-- Stability : experimental +-- +-- Various utilities for building and loading dynamic libraries, to make +-- Template Haskell work in GHCJS +-- +----------------------------- FIXMEs ------------------------------------------- +-- FIXME: Jeff (2022,04): This module may be completely redundant and consist of +-- duplicate code. Before we can remove it we must understand how it alters the +-- link code in the GHC.Linker directory. Thus for the time being we live with +-- it. In particular cases where we have duplicated functions in +-- GHC.Driver.Pipeline and GHC.Linker.Static, I've prefixed these with "js" +-- except for @link@ and @link'@, for example GHC.Linker.Static.linkStaticLib +-- becomes GHC.StgToJS.Linker.Dynamic.jsLinkStaticLib. +-- +-- FIXME: Jeff (2022,04): In jsLinkBinary I've commented out a line that +-- dispatches to different systools based on a boolean flag. This line seems to +-- be a relic of the old ghc api but I left it in since it will require +-- attention to be verified correct. I suspect that entire function is made +-- redundant by the corresponding GHC.Linker.Static.linkBinary anyhow. Please +-- see the fixme comment in jsLinkBinary +-- +-- FIXME: Jeff (2022,04): You'll notice that the APIs for the linking functions, +-- @link@, @link'@ etc are quite hairy with lots of inputs, and over half of +-- those inputs are environments of some sort including DynFlags. Of course this +-- is insanity. The API is forced due a let expression in +-- @GHC.StgToJS.Linker.Dynamic.link'@ which requires all linking functions to +-- have the same interface as GHC.Linker.Static.linkBinary. To Fix this we +-- should begin removing these environments by refining JSLinkConfig. For +-- example: +-- 1. Move any required flags from StgToJSConfig to JSLinkConfig +-- 2. Remove DynFlags by removing any opts needed for linking and add them to +-- JSLinkConfig +-- 3. Similar for HscEnv, we might need to decouple GHCs Linker from DynFlags in +-- order to have a proper api +----------------------------------------------------------------------------- + +module GHC.StgToJS.Linker.Dynamic + ( ghcjsLink + , ghcjsDoLink + ) where + +import GHC.StgToJS.Linker.Archive +import GHC.StgToJS.Linker.Types +import GHC.StgToJS.Linker.Utils +import qualified GHC.StgToJS.Linker.Linker as JSLink + +import GHC.Linker.Dynamic +import GHC.Linker.ExtraObj +import GHC.Linker.MacOS +import GHC.Linker.Static +import GHC.Linker.Static.Utils +import GHC.Linker.Types +import GHC.Linker.Unit +import GHC.Linker.Windows + +import GHC.Utils.Error +import GHC.Utils.Misc +import GHC.Unit.Module +import GHC.Unit.Module.ModIface +import GHC.Unit.Module.Deps +import GHC.Utils.Outputable hiding ((<>)) +import GHC.Driver.Phases +import GHC.Driver.Pipeline +import GHC.Driver.Session + +import GHC.Types.Unique.DFM +import GHC.Types.Basic +import qualified GHC.SysTools as SysTools + +import GHC.Unit.Home.ModInfo +import GHC.Unit.Info +import GHC.Unit.Env +import GHC.Unit.State +import GHC.Iface.Recomp + +import GHC.Platform +import Prelude + +import Control.Monad + +import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString as BS +import Data.Functor ((<&>)) +import Data.List ( nub ) +import qualified GHC.Data.ShortText as T + +import System.Directory +import System.FilePath +import qualified GHC.Data.Maybe as Maybe +import GHC.Platform.Ways +import GHC.Utils.Logger +import GHC.Utils.Panic +import GHC.Driver.Env.Types +import GHC.StgToJS.Types +import GHC.Utils.TmpFs (TmpFs) +import qualified Data.Set as Set + +--------------------------------------------------------------------------------- +-- Link libraries + +ghcjsLink :: GhcjsEnv + -> JSLinkConfig + -> StgToJSConfig + -> HscEnv + -> [FilePath] -- ^ extra JS files + -> Bool -- ^ build JavaScript? + -> GhcLink -- ^ what to link + -> Logger + -> TmpFs + -> UnitEnv + -> Bool + -> HomePackageTable + -> IO SuccessFlag +ghcjsLink env lc_cfg cfg hsc_env extraJs buildJs ghcLink logger tmp_fs unit_env batch_attempt_linking pt + | ghcLink == LinkInMemory || ghcLink == NoLink = + return Succeeded + | ghcLink == LinkStaticLib || ghcLink == LinkDynLib = + if buildJs && Maybe.isJust (lcLinkJsLib lc_cfg) + then ghcjsLinkJsLib lc_cfg extraJs (hsc_dflags hsc_env) logger pt + else return Succeeded + | otherwise = do + when (buildJs && Maybe.isJust (lcLinkJsLib lc_cfg)) + (void $ ghcjsLinkJsLib lc_cfg extraJs (hsc_dflags hsc_env) logger pt) -- FIXME Jeff: (2022,04): use return value and remove void + link' env lc_cfg cfg hsc_env extraJs buildJs logger tmp_fs unit_env batch_attempt_linking pt + +ghcjsLinkJsLib :: JSLinkConfig + -> [FilePath] -- ^ extra JS files + -> DynFlags + -> Logger + -> HomePackageTable + -> IO SuccessFlag +ghcjsLinkJsLib settings jsFiles dflags _logger hpt + | Just jsLib <- lcLinkJsLib settings = do + let profSuff | WayProf `elem` ways dflags = "_p" + | otherwise = "" + libFileName = ("lib" ++ jsLib ++ profSuff) <.> "js_a" + inOutputDir file = + maybe file + (</>file) + (lcJsLibOutputDir settings `mplus` objectDir dflags) + outputFile = inOutputDir libFileName + jsFiles' = nub (lcJsLibSrcs settings ++ jsFiles) + meta = Meta (opt_P dflags) + jsEntries <- forM jsFiles' $ \file -> + (JsSource file,) . B.fromStrict <$> BS.readFile file + objEntries <- forM (eltsUDFM hpt) $ \hmi -> do + let mt = T.pack . moduleNameString . moduleName . mi_module . hm_iface $ hmi + files = maybe [] (\l -> [ o | DotO o <- linkableUnlinked l]) (hm_linkable hmi) + -- fixme archive does not handle multiple files for a module yet + forM files (fmap ((Object mt,) . B.fromStrict) . BS.readFile) + B.writeFile outputFile (buildArchive meta (concat objEntries ++ jsEntries)) + -- we don't use shared js_so libraries ourselves, but Cabal expects that we + -- generate one when building with --dynamic-too. Just write an empty file + when (gopt Opt_BuildDynamicToo dflags || WayDyn `elem` ways dflags) $ do + let sharedLibFileName = + "lib" ++ jsLib ++ "-ghcjs" ++ getCompilerVersion ++ profSuff <.> "js_so" + sharedOutputFile = inOutputDir sharedLibFileName + -- keep strip happy + BS.writeFile sharedOutputFile =<< BS.readFile (topDir dflags </> "empty.o") + return Succeeded + | otherwise = + return Succeeded + +ghcjsLinkJsBinary :: GhcjsEnv + -> JSLinkConfig + -> StgToJSConfig + -> [FilePath] + -> Logger + -> TmpFs + -> HscEnv + -> UnitEnv + -> [FilePath] + -> [UnitId] + -> IO () +ghcjsLinkJsBinary env lc_cfg cfg jsFiles _logger _tmpfs hsc_env _unit_env objs dep_pkgs = + void $ JSLink.link hsc_env env lc_cfg cfg exe mempty dep_pkgs objs' jsFiles isRoot mempty + where + objs' = map ObjFile objs + isRoot _ = True + exe = jsExeFileName (hsc_dflags hsc_env) + + +link' :: GhcjsEnv + -> JSLinkConfig + -> StgToJSConfig + -> HscEnv + -> [FilePath] -- extra js files + -> Bool -- building JavaScript + -> Logger -- Logger + -> TmpFs -- tmp file system + -> UnitEnv -- Unit Environment + -> Bool -- attempt linking in batch mode? + -> HomePackageTable -- what to link + -> IO SuccessFlag + +link' env lc_cfg cfg hsc_env extraJs buildJs logger tmpfs unit_env batch_attempt_linking hpt + | batch_attempt_linking + = do + let + dflags = hsc_dflags hsc_env + staticLink = case ghcLink dflags of + LinkStaticLib -> True + _ -> False + + home_mod_infos = eltsUDFM hpt + + -- the packages we depend on + pkg_deps = Set.toList . Set.unions + $ dep_direct_pkgs . mi_deps . hm_iface + <$> home_mod_infos + + -- the linkables to link + linkables = map (Maybe.expectJust "link".hm_linkable) home_mod_infos + debugTraceMsg logger 3 (text "link: hmi ..." $$ vcat (map (ppr . mi_module . hm_iface) home_mod_infos)) + debugTraceMsg logger 3 (text "link: pkgdeps ..." $$ vcat (map ppr pkg_deps)) + debugTraceMsg logger 3 (text "link: linkables are ..." $$ vcat (map ppr linkables)) + + -- check for the -no-link flag + if isNoLink (ghcLink dflags) + then + do debugTraceMsg logger 3 (text "link(batch): linking omitted (-c flag given).") + return Succeeded + else + do + let getOfiles (LM _ _ us) = map nameOfObject (filter isObject us) + obj_files = concatMap getOfiles linkables + exe_file = exeFileName (targetPlatform dflags) staticLink (outputFile_ dflags) + + linking_needed <- linkingNeeded logger dflags unit_env staticLink linkables pkg_deps + <&> (\case + NeedsRecompile _reason -> True + _ -> False) + + + if not (gopt Opt_ForceRecomp dflags) && not linking_needed + then + do + debugTraceMsg logger 2 (text exe_file <+> text "is up to date, linking not required.") + return Succeeded + else + do + unless buildJs $ compilationProgressMsg logger (text $ "ghcjs Linking " ++ exe_file ++ " ...") + -- Don't showPass in Batch mode; doLink will do that for us. + + -- FIXME: Jeff (2022,03): this let expression relies on all + -- these functions implementing the same interface. Which leads + -- to a lot of unused parameters. This is bad! We should be + -- employing the principle of least priviledge with these + -- functions. Untangle this later! + let link = case ghcLink dflags of + LinkBinary -> if buildJs + then ghcjsLinkJsBinary env lc_cfg cfg extraJs logger tmpfs hsc_env unit_env + else linkBinary logger tmpfs (hsc_dflags hsc_env) unit_env + LinkStaticLib -> jsLinkStaticLib logger tmpfs hsc_env unit_env + LinkDynLib -> linkDynLibCheck logger tmpfs hsc_env unit_env + other -> panicBadLink other + + _ <- link obj_files pkg_deps + debugTraceMsg logger 3 (text "link: done") + + -- linkBinary only returns if it succeeds + return Succeeded + + | otherwise + = do debugTraceMsg logger 3 (text "link(batch): upsweep (partially) failed OR" $$ + text " Main.main not exported; not linking.") + return Succeeded + + +panicBadLink :: GhcLink -> a +panicBadLink other = panic ("link: GHC not built to link this way: " ++ + show other) + +linkDynLibCheck :: Logger -> TmpFs -> HscEnv -> UnitEnv -> [FilePath] -> [UnitId] -> IO () +linkDynLibCheck logger tmpfs hsc_env unit_env o_files dep_packages + = do + let dflags = hsc_dflags hsc_env + when (haveRtsOptsFlags dflags) $ + logOutput logger (text "Warning: -rtsopts and -with-rtsopts have no effect with -shared." + $$ text " Call hs_init_ghc() from your main() function to set these options.") + + linkDynLib logger tmpfs (hsc_dflags hsc_env) unit_env o_files dep_packages + +-- FIXME: Jeff: (2022,04): This function is possibly redundant. Compare to +-- GHC.Linker.Static.linkStaticLib and decide and remove +jsLinkStaticLib ::Logger -> TmpFs -> HscEnv -> UnitEnv -> [FilePath] -> [UnitId] -> IO () +jsLinkStaticLib logger tmpfs hsc_env _unit_env o_files dep_packages + = -- XXX looks like this needs to be updated +{- + = do + when (platformOS (targetPlatform dflags) `notElem` [OSiOS, OSDarwin]) $ + throwGhcExceptionIO (ProgramError "Static archive creation only supported on Darwin/OS X/iOS") +-} + jsLinkBinary' True logger tmpfs (hsc_dflags hsc_env) (hsc_unit_env hsc_env) o_files dep_packages + +-- FIXME: Jeff: (2022,04): This function may be a duplicate functions from +-- GHC.Linker.Static.linkBinary. Decide if that is the case and remove +jsLinkBinary' :: Bool -> Logger -> TmpFs -> DynFlags -> UnitEnv -> [FilePath] -> [UnitId] -> IO () +jsLinkBinary' staticLink logger tmpfs dflags unit_env o_files dep_packages = do + let platform = targetPlatform dflags + mySettings = settings dflags + verbFlags = getVerbFlags dflags + output_file = outputFile_ dflags + output_fn = exeFileName platform staticLink output_file + + -- get the full list of packages to link with, by combining the + -- explicit packages with the auto packages and all of their + -- dependencies, and eliminating duplicates. + + full_output_fn <- if isAbsolute output_fn + then return output_fn + else do d <- getCurrentDirectory + return $ normalise (d </> output_fn) + pkg_lib_paths <- collectLibraryDirs (ways dflags) + <$> mayThrowUnitErr (preloadUnitsInfo' unit_env dep_packages) + + let pkg_lib_path_opts = concatMap get_pkg_lib_path_opts pkg_lib_paths + get_pkg_lib_path_opts l + | osElfTarget (platformOS platform) && + dynLibLoader dflags == SystemDependent && + WayDyn `elem` ways dflags + = let libpath = if gopt Opt_RelativeDynlibPaths dflags + then "$ORIGIN" </> + (l `makeRelativeTo` full_output_fn) + else l + -- See Note [-Xlinker -rpath vs -Wl,-rpath] + rpath = if gopt Opt_RPath dflags + then ["-Xlinker", "-rpath", "-Xlinker", libpath] + else [] + -- Solaris 11's linker does not support -rpath-link option. It silently + -- ignores it and then complains about next option which is -l<some + -- dir> as being a directory and not expected object file, E.g + -- ld: elf error: file + -- /tmp/ghc-src/libraries/base/dist-install/build: + -- elf_begin: I/O error: region read: Is a directory + rpathlink = if platformOS platform == OSSolaris2 + then [] + else ["-Xlinker", "-rpath-link", "-Xlinker", l] + in ["-L" ++ l] ++ rpathlink ++ rpath + | osMachOTarget (platformOS platform) && + dynLibLoader dflags == SystemDependent && + WayDyn `elem` ways dflags && + gopt Opt_RPath dflags + = let libpath = if gopt Opt_RelativeDynlibPaths dflags + then "@loader_path" </> + (l `makeRelativeTo` full_output_fn) + else l + in ("-L" ++ l) : ["-Xlinker", "-rpath", "-Xlinker", libpath] + | otherwise = ["-L" ++ l] + + let + dead_strip + | gopt Opt_WholeArchiveHsLibs dflags = [] + | otherwise = ["-Wl,-dead_strip" | osSubsectionsViaSymbols (platformOS platform)] + let lib_paths = libraryPaths dflags + let lib_path_opts = map ("-L"++) lib_paths + + extraLinkObj <- Maybe.fromMaybe mempty + <$> mkExtraObjToLinkIntoBinary logger tmpfs dflags (ue_units unit_env) + noteLinkObjs <- mkNoteObjsToLinkIntoBinary logger tmpfs dflags unit_env dep_packages + + let + (pre_hs_libs, post_hs_libs) + | gopt Opt_WholeArchiveHsLibs dflags + = if platformOS platform == OSDarwin + then (["-Wl,-all_load"], []) + -- OS X does not have a flag to turn off -all_load + else (["-Wl,--whole-archive"], ["-Wl,--no-whole-archive"]) + | otherwise + = ([],[]) + + pkg_link_opts <- do + (package_hs_libs, extra_libs, other_flags) <- getUnitLinkOpts dflags unit_env dep_packages + return $ if staticLink + then package_hs_libs -- If building an executable really means making a static + -- library (e.g. iOS), then we only keep the -l options for + -- HS packages, because libtool doesn't accept other options. + -- In the case of iOS these need to be added by hand to the + -- final link in Xcode. + else other_flags ++ dead_strip + ++ pre_hs_libs ++ package_hs_libs ++ post_hs_libs + ++ extra_libs + -- -Wl,-u,<sym> contained in other_flags + -- needs to be put before -l<package>, + -- otherwise Solaris linker fails linking + -- a binary with unresolved symbols in RTS + -- which are defined in base package + -- the reason for this is a note in ld(1) about + -- '-u' option: "The placement of this option + -- on the command line is significant. + -- This option must be placed before the library + -- that defines the symbol." + + -- frameworks + pkg_framework_opts <- getUnitFrameworkOpts unit_env dep_packages + let framework_opts = getFrameworkOpts dflags platform + + -- probably _stub.o files + let extra_ld_inputs = ldInputs dflags + + -- Here are some libs that need to be linked at the *end* of + -- the command line, because they contain symbols that are referred to + -- by the RTS. We can't therefore use the ordinary way opts for these. + let + debug_opts | WayDebug `elem` ways dflags = [ + + + + ] + | otherwise = [] + + thread_opts | WayThreaded `elem` ways dflags = [ + + + + ] + | otherwise = [] + + rc_objs <- maybeCreateManifest logger tmpfs dflags output_fn + + -- FIXME: Jeff (2022,04): jsLinkBinary' is only ever called with staticLink == + -- True. However, if it were False I'm unsure how to create the TmpFS + -- parameter required by SysTools.runLink. Is this necessary for the + -- js-backend? Fix this when we know more. + + -- let link = if staticLink + -- then SysTools.runLibtool + -- else SysTools.runLink + let link = SysTools.runLibtool -- <--- fix this line in particular + + link logger dflags ( + map SysTools.Option verbFlags + ++ [ SysTools.Option "-o" + , SysTools.FileOption "" output_fn + ] + ++ map SysTools.Option ( + [] + + -- See Note [No PIE when linking] + ++ picCCOpts dflags + + -- Permit the linker to auto link _symbol to _imp_symbol. + -- This lets us link against DLLs without needing an "import library". + ++ (["-Wl,--enable-auto-import" | platformOS platform == OSMinGW32]) + + -- '-no_compact_unwind' + -- C++/Objective-C exceptions cannot use optimised + -- stack unwinding code. The optimised form is the + -- default in Xcode 4 on at least x86_64, and + -- without this flag we're also seeing warnings + -- like + -- ld: warning: could not create compact unwind for .LFB3: non-standard register 5 being saved in prolog + -- on x86. + ++ ([ "-Wl,-no_compact_unwind" + | sLdSupportsCompactUnwind mySettings + && not staticLink + && (platformOS platform == OSDarwin) + && case platformArch platform of + ArchX86 -> True + ArchX86_64 -> True + ArchARM {} -> True + ArchAArch64 -> True + _ -> False]) + + -- '-Wl,-read_only_relocs,suppress' + -- ld gives loads of warnings like: + -- ld: warning: text reloc in _base_GHCziArr_unsafeArray_info to _base_GHCziArr_unsafeArray_closure + -- when linking any program. We're not sure + -- whether this is something we ought to fix, but + -- for now this flags silences them. + ++ (["-Wl,-read_only_relocs,suppress" + | platformOS platform == OSDarwin + && platformArch platform == ArchX86 + && not staticLink + ]) + + ++ (["-Wl,--gc-sections" + | sLdIsGnuLd mySettings + && not (gopt Opt_WholeArchiveHsLibs dflags) + ]) + + ++ o_files + ++ lib_path_opts) + ++ extra_ld_inputs + ++ map SysTools.Option ( + rc_objs + ++ framework_opts + ++ pkg_lib_path_opts + ++ extraLinkObj:noteLinkObjs + ++ pkg_link_opts + ++ pkg_framework_opts + ++ debug_opts + ++ thread_opts + )) + + +ghcjsDoLink :: GhcjsEnv -> JSLinkConfig -> StgToJSConfig -> HscEnv -> Phase -> [FilePath] -> IO () +ghcjsDoLink env lc_cfg cfg hsc_env stop_phase o_files + | not (isStopLn stop_phase) + = return () -- We stopped before the linking phase + {-| native + = case ghcLink dflags of + NoLink -> return () + LinkBinary -> linkBinary dflags o_files [] + LinkDynLib -> linkDynLibCheck dflags o_files [] + other -> panicBadLink other -} + -- | isJust (gsLinkJsLib settings) + -- = void $ ghcjsLinkJsLib settings o_files dflags emptyHomePackageTable + | otherwise = do + -- void $ ghcjsLink env settings o_files True (ghcLink dflags) dflags True emptyHomePackageTable + let dflags = hsc_dflags hsc_env + case ghcLink (hsc_dflags hsc_env) of + NoLink -> return () + LinkBinary -> do + putStrLn $ "ghcjsDoLink: " ++ show (ghcLink dflags) ++ " " ++ show o_files + let output_fn = exeFileName (targetPlatform dflags) False (outputFile_ dflags) + let extra_ld_inputs = [ f | FileOption _ f <- ldInputs dflags ] + putStrLn $ "ghcjsDoLink: " ++ show extra_ld_inputs + + full_output_fn <- if isAbsolute output_fn + then return output_fn + else do d <- getCurrentDirectory + return $ normalise (d </> output_fn) + let dep_packages = case preloadUnitsInfo $ hsc_unit_env hsc_env of + Maybe.Succeeded us -> us + Maybe.Failed err -> pprPanic "Panic in ghcjsDoLink: " (ppr err) + + + let isRoot _ = True + objs' = map ObjFile o_files + dep_package_ids = map unitId dep_packages + jsFiles = [] -- XXX check whether we need to sort the obj_files for js-files + + putStrLn $ "ghcjsDoLink: " ++ show (map unitIdString dep_package_ids) + + void $ + JSLink.link + hsc_env + env + lc_cfg + cfg + (full_output_fn <.> "jsexe") -- output file or dir + mempty -- include path for home package + dep_package_ids -- packages to link + objs' -- object we're currently linking + jsFiles -- extra js files to include + isRoot -- functions from the objects to use as roots + mempty -- extra symbols to link in + + _other -> panicBadLink _other diff --git a/compiler/GHC/StgToJS/Linker/Linker.hs b/compiler/GHC/StgToJS/Linker/Linker.hs new file mode 100644 index 0000000000..e8facd322a --- /dev/null +++ b/compiler/GHC/StgToJS/Linker/Linker.hs @@ -0,0 +1,887 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.StgToJS.Linker.Linker +-- Copyright : (c) The University of Glasgow 2001 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io> +-- Luite Stegeman <luite.stegeman@iohk.io> +-- Sylvain Henry <sylvain.henry@iohk.io> +-- Josh Meredith <josh.meredith@iohk.io> +-- Stability : experimental +-- +-- GHCJS linker, collects dependencies from the object files (.js_o, js_p_o), +-- which contain linkable units with dependency information +-- +----------------------------- FIXMEs ------------------------------------------- +-- FIXME: Jeff (2022,03): Finish module description. Specifically: +-- 1. What are the important modules this module uses +-- 2. Who is the consumer for this module (hint: DynamicLinking) +-- 3. What features are missing due to the implementation in this module? For +-- example, Are we blocked from linking foreign imports due to some code in this +-- module? +-- +-- - add ForeignRefs imports in @link@ +-- - factor out helper functions in @link'@ +-- - remove @head@ function in @link'@ +-- - remove @ue_unsafeHomeUnit@ function in @link'@ +-- - use newtypes instead of strings for output directories in @writeRunner@ +-- - add support for windows in @writeRunner@ +-- - resolve strange unpack call in @writeExterns@ the right thing to do here +-- might be to just remove it +-- - fix: @collectDeps@ inputs a [UnitId], but [] is unordered yet comments in +-- @collectDeps@ indicate a specific ordering is needed. This ordering +-- should be enforced in some data structure other than [] which is +-- obviously ordered but in an undefined and ad-hoc way +-- - fix: For most of the Linker I pass around UnitIds, I (Jeff) am unsure if +-- these should really be modules. Or to say this another way is UnitId the +-- right abstraction level? Or is Module? Or some other unit thing? +-- - fix: Gen2.GHCJS used NFData instances over a lot of types. Replicating +-- these instances would mean adding a Generic and NFData instance to some +-- internal GHC types. I (Jeff) do not think we want to do that. Instead, we +-- should use strict data structures as a default and then implement lazy +-- ones where it makes sense and only if it makes sense. IMHO Gen2.GHCJS was +-- overly lazy and we should avoid repeating that here. Let profiling be our +-- guide during our performance refactoring. +-- - Employ the type system more effectively for @readSystemDeps'@, in +-- particular get rid of the string literals +-- - fix foldl' memory leak in @staticDeps@ +-- - move @mkSymb@ +----------------------------------------------------------------------------- + +module GHC.StgToJS.Linker.Linker where + +import GHC.StgToJS.Linker.Types +import GHC.StgToJS.Linker.Utils +import GHC.StgToJS.Linker.Compactor + +import GHC.StgToJS.Rts.Rts + +import GHC.JS.Syntax + +import GHC.StgToJS.Object +import GHC.StgToJS.Types hiding (LinkableUnit) +import GHC.StgToJS.UnitUtils +import GHC.StgToJS.Printer + +import qualified GHC.SysTools.Ar as Ar +import GHC.Utils.Encoding +import GHC.Utils.Outputable (ppr, text) +import GHC.Utils.Panic +import GHC.Unit.State +import GHC.Unit.Env +import GHC.Unit.Home +import GHC.Unit.Types +import GHC.Utils.Error +import GHC.Platform.Ways +import GHC.Driver.Env.Types +import GHC.Data.ShortText (ShortText) +import qualified GHC.Data.ShortText as T + +import Control.Concurrent.MVar +import Control.Monad + +import Data.Array +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as BC +import qualified Data.ByteString.Lazy.Char8 as BLC +import qualified Data.ByteString.Lazy as BL +import Data.Function (on) +import Data.Int +import Data.IntSet (IntSet) +import qualified Data.IntSet as IS +import Data.IORef +import Data.List ( partition, nub, foldl', intercalate, group, sort + , groupBy, isSuffixOf, find, intersperse + ) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as M +import Data.Maybe +import Data.Set (Set) +import qualified Data.Set as S + +import GHC.Generics (Generic) + +import System.FilePath (splitPath, (<.>), (</>), dropExtension) +import System.IO +import System.Directory ( createDirectoryIfMissing + , doesFileExist + , getCurrentDirectory + , Permissions(..) + , setPermissions + , getPermissions + ) + +import Prelude +import GHC.Driver.Session (targetWays_, settings) +import GHC.Settings (sTopDir) +import GHC.Unit.Module.Name +import GHC.Unit.Module (moduleStableString) + +-- number of bytes linked per module +type LinkerStats = Map Module Int64 + +-- | result of a link pass +data LinkResult = LinkResult + { linkOut :: BL.ByteString -- ^ compiled Haskell code + , linkOutStats :: LinkerStats -- ^ statistics about generated code + , linkOutMetaSize :: Int64 -- ^ size of packed metadata in generated code + , linkForeignRefs :: [ForeignJSRef] -- ^ foreign code references in compiled haskell code + , linkLibRTS :: [FilePath] -- ^ library code to load with the RTS + , linkLibA :: [FilePath] -- ^ library code to load after RTS + , linkLibAArch :: [FilePath] -- ^ library code to load from archives after RTS + , linkBase :: Base -- ^ base metadata to use if we want to link incrementally against this result + } deriving (Generic) + +newtype ArchiveState = ArchiveState { loadedArchives :: IORef (Map FilePath Ar.Archive) } + +emptyArchiveState :: IO ArchiveState +emptyArchiveState = ArchiveState <$> newIORef M.empty + +-- | link and write result to disk (jsexe directory) +link :: HscEnv + -> GhcjsEnv + -> JSLinkConfig + -> StgToJSConfig + -> FilePath -- ^ output file/directory + -> [FilePath] -- ^ include path for home package + -> [UnitId] -- ^ packages to link + -> [LinkedObj] -- ^ the object files we're linking + -> [FilePath] -- ^ extra js files to include + -> (ExportedFun -> Bool) -- ^ functions from the objects to use as roots (include all their deps) + -> Set ExportedFun -- ^ extra symbols to link in + -> IO () +link hsc_env env lc_cfg cfg out include pkgs objFiles jsFiles isRootFun extraStaticDeps + | lcNoJSExecutables lc_cfg = return () + | otherwise = do + LinkResult lo lstats lmetasize _lfrefs llW lla llarch lbase <- + link' hsc_env env lc_cfg cfg out include pkgs objFiles jsFiles + isRootFun extraStaticDeps + let genBase = isJust (lcGenBase lc_cfg) + jsExt | genBase = "base.js" + | otherwise = "js" + createDirectoryIfMissing False out + BL.writeFile (out </> "out" <.> jsExt) lo + unless (lcOnlyOut lc_cfg) $ do + let frefsFile = if genBase then "out.base.frefs" else "out.frefs" + -- FIXME: Jeff (2022,03): GHCJS used Aeson to encode Foreign + -- references as StaticDeps to a Bytestring and then write these out + -- to a tmp file for linking. We do not have access to Aeson so + -- we'll need to find an alternative coding strategy to write these + -- out. See the commented instance for FromJSON StaticDeps below. + -- - this line called out to the FromJSon Instance + -- jsonFrefs = Aeson.encode lfrefs + jsonFrefs = mempty + dflags = hsc_dflags hsc_env + + BL.writeFile (out </> frefsFile <.> "json") jsonFrefs + BL.writeFile (out </> frefsFile <.> "js") + ("h$checkForeignRefs(" <> jsonFrefs <> ");") + unless (lcNoStats lc_cfg) $ do + let statsFile = if genBase then "out.base.stats" else "out.stats" + writeFile (out </> statsFile) (linkerStats lmetasize lstats) + unless (lcNoRts lc_cfg) $ do + withRts <- mapM (tryReadShimFile dflags) llW + BL.writeFile (out </> "rts.js") (BLC.pack (T.unpack rtsDeclsText) + <> BL.fromChunks withRts + <> BLC.pack (T.unpack $ rtsText cfg)) + lla' <- mapM (tryReadShimFile dflags) lla + llarch' <- mapM (readShimsArchive dflags) llarch + BL.writeFile (out </> "lib" <.> jsExt) + (BL.fromChunks $ llarch' ++ lla') + if genBase + then generateBase out lbase + else when ( not (lcOnlyOut lc_cfg) + && not (lcNoRts lc_cfg) + && not (usingBase lc_cfg) + ) + $ do + let top = sTopDir . settings . hsc_dflags $ hsc_env + _ <- combineFiles lc_cfg top out + writeHtml top out + writeRunMain top out + writeRunner lc_cfg out + writeWebAppManifest top out + writeExterns out + +-- | link in memory +link' :: HscEnv + -> GhcjsEnv + -> JSLinkConfig + -> StgToJSConfig + -> String -- ^ target (for progress message) + -> [FilePath] -- ^ include path for home package + -> [UnitId] -- ^ packages to link + -> [LinkedObj] -- ^ the object files we're linking + -> [FilePath] -- ^ extra js files to include + -> (ExportedFun -> Bool) -- ^ functions from the objects to use as roots (include all their deps) + -> Set ExportedFun -- ^ extra symbols to link in + -> IO LinkResult +link' hsc_env env lc_cfg cfg target _include pkgs objFiles jsFiles isRootFun extraStaticDeps + = do + -- FIXME: Jeff (2022,04): This function has several helpers that should be + -- factored out. In its current condition it is hard to read exactly whats + -- going on and why. + (objDepsMap, objRequiredUnits) <- loadObjDeps objFiles + + let rootSelector | Just baseMod <- lcGenBase lc_cfg = + \(ExportedFun m _s) -> m == baseMod + | otherwise = isRootFun + roots = S.fromList . filter rootSelector $ concatMap (M.keys . depsHaskellExported . fst) (M.elems objDepsMap) + -- FIXME: Jeff (2022,03): Remove head. opt for NonEmptyList. Every + -- head is a time bomb waiting to explode + rootMods = map (moduleNameString . moduleName . head) . group . sort . map funModule . S.toList $ roots + objPkgs = map moduleUnitId $ nub (M.keys objDepsMap) + + _ <- compilationProgressMsg (hsc_logger hsc_env) . text $ + case lcGenBase lc_cfg of + Just baseMod -> "Linking base bundle " ++ target ++ " (" ++ moduleNameString (moduleName baseMod) ++ ")" + _ -> "Linking " ++ target ++ " (" ++ intercalate "," rootMods ++ ")" + base <- case lcUseBase lc_cfg of + NoBase -> return emptyBase + BaseFile file -> loadBase file + BaseState b -> return b + (rdPkgs, rds) <- rtsDeps hsc_env pkgs + -- c <- newMVar M.empty + let rtsPkgs = map stringToUnitId ["@rts", "@rts_" ++ waysTag (targetWays_ $ hsc_dflags hsc_env)] + pkgs' :: [UnitId] + pkgs' = nub (rtsPkgs ++ rdPkgs ++ reverse objPkgs ++ reverse pkgs) + pkgs'' = filter (not . isAlreadyLinked base) pkgs' + ue_state = ue_units $ hsc_unit_env hsc_env + -- pkgLibPaths = mkPkgLibPaths pkgs' + -- getPkgLibPaths :: UnitId -> ([FilePath],[String]) + -- getPkgLibPaths k = fromMaybe ([],[]) (lookup k pkgLibPaths) + (archsDepsMap, archsRequiredUnits) <- loadArchiveDeps env =<< getPackageArchives cfg (map snd $ mkPkgLibPaths ue_state pkgs') + pkgArchs <- getPackageArchives cfg (map snd $ mkPkgLibPaths ue_state pkgs'') + (allDeps, code) <- + collectDeps (objDepsMap `M.union` archsDepsMap) + (pkgs' ++ [homeUnitId (ue_unsafeHomeUnit $ hsc_unit_env hsc_env)]) -- FIXME: dont use unsafe + (baseUnits base) + (roots `S.union` rds `S.union` extraStaticDeps) + (archsRequiredUnits ++ objRequiredUnits) + let (outJs, metaSize, compactorState, stats) = + renderLinker lc_cfg cfg (baseCompactorState base) rds code + base' = Base compactorState (nub $ basePkgs base ++ pkgs'') + (allDeps `S.union` baseUnits base) + (alreadyLinkedBefore, alreadyLinkedAfter) <- getShims [] (filter (isAlreadyLinked base) pkgs') + (shimsBefore, shimsAfter) <- getShims jsFiles pkgs'' + return $ LinkResult outJs stats metaSize + (concatMap (\(_,_,_,_,_,r) -> r) code) + (filter (`notElem` alreadyLinkedBefore) shimsBefore) + (filter (`notElem` alreadyLinkedAfter) shimsAfter) + pkgArchs base' + where + isAlreadyLinked :: Base -> UnitId -> Bool + isAlreadyLinked b uid = uid `elem` basePkgs b + + mkPkgLibPaths :: UnitState -> [UnitId] -> [(UnitId, ([FilePath],[String]))] + mkPkgLibPaths u_st + = map (\k -> ( k + , (getInstalledPackageLibDirs u_st k + , getInstalledPackageHsLibs u_st k) + )) + +renderLinker :: JSLinkConfig + -> StgToJSConfig + -> CompactorState + -> Set ExportedFun + -> [(Module, JStat, ShortText, [ClosureInfo], [StaticInfo], [ForeignJSRef])] -- ^ linked code per module + -> (BL.ByteString, Int64, CompactorState, LinkerStats) +renderLinker settings cfg renamerState rtsDeps code = + let + (_renamerState', compacted, meta) = compact settings cfg renamerState (map funSymbol $ S.toList rtsDeps) (map (\(_,s,_,ci,si,_) -> (s,ci,si)) code) + pe = (<>"\n") . show . pretty + rendered = fmap pe compacted + renderedMeta = pe meta + renderedExports = concatMap T.unpack . filter (not . T.null) $ map (\(_,_,rs,_,_,_) -> rs) code + mkStat (m,_,_,_,_,_) b = (m, BL.length . BLC.pack $ b) + in ( BL.fromStrict $ BC.pack $ mconcat [mconcat rendered, renderedMeta, renderedExports] + , BL.length $ BL.fromStrict $ BC.pack renderedMeta + , renamerState + , M.fromList $ zipWith mkStat code rendered + ) + +linkerStats :: Int64 -- ^ code size of packed metadata + -> LinkerStats -- ^ code size per module + -> String +linkerStats meta s = + intercalate "\n\n" [packageStats, moduleStats, metaStats] <> "\n\n" + where + ps = M.fromListWith (+) . map (\(m,s) -> (moduleName m,s)) . M.toList $ s + pad :: Int -> String -> String + pad n t = let l = length t + in if l < n then t <> replicate (n-l) ' ' else t + + pkgMods :: [[(Module,Int64)]] + pkgMods = groupBy ((==) `on` fst) (M.toList s) + + showMod :: (Module, Int64) -> String + showMod (m,s) = pad 40 (" " <> moduleStableString m <> ":") <> show s + + packageStats :: String + packageStats = "code size summary per package:\n\n" + <> concatMap (\(p,s) -> pad 25 (show p <> ":") <> show s) (M.toList ps) + + moduleStats :: String + moduleStats = "code size per module:\n\n" <> unlines (map (concatMap showMod) pkgMods) + + metaStats :: String + metaStats = "packed metadata: " <> show meta + +splitPath' :: FilePath -> [FilePath] +splitPath' = map (filter (`notElem` ("/\\"::String))) . splitPath + +getPackageArchives :: StgToJSConfig -> [([FilePath],[String])] -> IO [FilePath] +getPackageArchives cfg pkgs = + filterM doesFileExist [ p </> "lib" ++ l ++ profSuff <.> "a" + | (paths, libs) <- pkgs, p <- paths, l <- libs ] + where + -- XXX the profiling library name is probably wrong now + profSuff | csProf cfg = "_p" + | otherwise = "" + +-- fixme the wired-in package id's we get from GHC we have no version +getShims :: [FilePath] -> [UnitId] -> IO ([FilePath], [FilePath]) +getShims = panic "Panic from getShims: Shims not implemented! no to shims!" +-- getShims dflags extraFiles pkgDeps = do +-- (w,a) <- collectShims (getLibDir dflags </> "shims") +-- (map (convertPkg dflags) pkgDeps) +-- extraFiles' <- mapM canonicalizePath extraFiles +-- return (w, a++extraFiles') + +{- | convenience: combine rts.js, lib.js, out.js to all.js that can be run + directly with node.js or SpiderMonkey jsshell + -} +combineFiles :: JSLinkConfig + -> FilePath -- ^ top level dir + -> FilePath + -> IO () +combineFiles cfg top fp = do + files <- mapM (B.readFile.(fp</>)) ["rts.js", "lib.js", "out.js"] + runMain <- if lcNoHsMain cfg + then pure mempty + else B.readFile (top </> "runmain.js") + writeBinaryFile (fp</>"all.js") (mconcat (files ++ [runMain])) + +-- | write the index.html file that loads the program if it does not exit +writeHtml :: FilePath -- ^ top level library directory + -> FilePath -- ^ output directory + -> IO () +writeHtml top out = do + e <- doesFileExist htmlFile + unless e $ + B.readFile (top </>"template.html") >>= B.writeFile htmlFile + where + htmlFile = out </> "index.html" + +-- | write the runmain.js file that will be run with defer so that it runs after +-- index.html is loaded +writeRunMain :: FilePath -- ^ top level library directory + -> FilePath -- ^ output directory + -> IO () +writeRunMain top out = do + e <- doesFileExist runMainFile + unless e $ + B.readFile (top </> "runmain.js") >>= B.writeFile runMainFile + where + runMainFile = out </> "runmain.js" + +-- FIXME: Jeff (2022,03): Use Newtypes instead of Strings for these directories +writeRunner :: JSLinkConfig -- ^ Settings + -> FilePath -- ^ Output directory + -> IO () +writeRunner _settings out = + -- FIXME: Jeff (2022,03): why was the buildRunner check removed? If we don't + -- need to check then does the flag need to exist? + {-when (lcBuildRunner _settings) $ -} do + cd <- getCurrentDirectory + let runner = cd </> addExeExtension (dropExtension out) + srcFile = out </> "all" <.> "js" + -- nodeSettings <- readNodeSettings dflags + nodePgm :: B.ByteString + nodePgm = "node" -- XXX we don't read nodeSettings.json anymore, we should somehow know how to find node? + + --------------------------------------------- + -- FIXME: Jeff (2022,03): Add support for windows. Detect it and act on it here: + -- if Platform.isWindows + -- then do + -- copyFile (topDir dflags </> "bin" </> "wrapper" <.> "exe") + -- runner + -- writeFile (runner <.> "options") $ unlines + -- [ T.pack nodePgm -- T.pack (nodeProgram nodeSettings) + -- , T.pack ("{{EXEPATH}}" </> out </> "all" <.> "js") + -- ] + -- else do + --------------------------------------------- + src <- B.readFile (cd </> srcFile) + B.writeFile runner ("#!/usr/bin/env " <> nodePgm <> "\n" <> src) + perms <- getPermissions runner + setPermissions runner (perms {executable = True}) + +-- | write the manifest.webapp file that for firefox os +writeWebAppManifest :: FilePath -- ^ top directory + -> FilePath -- ^ output directory + -> IO () +writeWebAppManifest top out = do + e <- doesFileExist manifestFile + unless e $ B.readFile (top </> "manifest.webapp") >>= B.writeFile manifestFile + where + manifestFile = out </> "manifest.webapp" + +rtsExterns :: ShortText +rtsExterns = + "// GHCJS RTS externs for closure compiler ADVANCED_OPTIMIZATIONS\n\n" <> + mconcat (map (\x -> "/** @type {*} */\nObject.d" <> T.pack (show x) <> ";\n") + [(7::Int)..16384]) + +writeExterns :: FilePath -> IO () +writeExterns out = writeFile (out </> "all.js.externs") + $ T.unpack rtsExterns -- FIXME: Jeff (2022,03): Why write rtsExterns as + -- ShortText just to unpack? + +-- | get all functions in a module +modFuns :: Deps -> [ExportedFun] +modFuns (Deps _m _r e _b) = M.keys e + +-- | get all dependencies for a given set of roots +getDeps :: Map Module Deps -- ^ loaded deps + -> Set LinkableUnit -- ^ don't link these blocks + -> Set ExportedFun -- ^ start here + -> [LinkableUnit] -- ^ and also link these + -> IO (Set LinkableUnit) +getDeps loaded_deps base fun startlu = go' S.empty (S.fromList startlu) (S.toList fun) + where + go :: Set LinkableUnit + -> Set LinkableUnit + -> IO (Set LinkableUnit) + go result open = case S.minView open of + Nothing -> return result + Just (lu@(lmod,n), open') -> + case M.lookup lmod loaded_deps of + Nothing -> pprPanic "getDeps.go: object file not loaded for: " (pprModule lmod) + Just (Deps _ _ _ b) -> + let block = b!n + result' = S.insert lu result + in go' result' + (addOpen result' open' $ + map (lmod,) (blockBlockDeps block)) (blockFunDeps block) + + go' :: Set LinkableUnit + -> Set LinkableUnit + -> [ExportedFun] + -> IO (Set LinkableUnit) + go' result open [] = go result open + go' result open (f:fs) = + let key = funModule f + in case M.lookup key loaded_deps of + Nothing -> pprPanic "getDeps.go': object file not loaded for: " $ pprModule key + Just (Deps _m _r e _b) -> + let lun :: Int + lun = fromMaybe (pprPanic "exported function not found: " $ pprModule key) + (M.lookup f e) + lu = (key, lun) + in go' result (addOpen result open [lu]) fs + + addOpen :: Set LinkableUnit -> Set LinkableUnit -> [LinkableUnit] + -> Set LinkableUnit + addOpen result open newUnits = + let alreadyLinked s = S.member s result || + S.member s open || + S.member s base + in open `S.union` S.fromList (filter (not . alreadyLinked) newUnits) + +-- FIXME: Jeff: (2022,03): if the order of the [UnitId] list matters after +-- ghc-prim then we should be using an Ordered Set or something +-- similar since the implementation of this function uses a lot of +-- expensive operations on this list and a lot of +-- serialization/deserialization +-- FIXME: Jeff (2022,03): Should [UnitId] be [Module]? +-- | collect dependencies for a set of roots +collectDeps :: Map Module (Deps, DepsLocation) -- ^ Dependency map + -> [UnitId] -- ^ packages, code linked in this order + -> Set LinkableUnit -- ^ do not include these + -> Set ExportedFun -- ^ roots + -> [LinkableUnit] -- ^ more roots + -> IO ( Set LinkableUnit + , [(Module, JStat, ShortText, [ClosureInfo], [StaticInfo], [ForeignJSRef])] + ) +collectDeps mod_deps packages base roots units = do + allDeps <- getDeps (fmap fst mod_deps) base roots units + -- read ghc-prim first, since we depend on that for static initialization + let packages' = uncurry (++) $ partition (== primUnitId) (nub packages) + + units_by_module :: Map Module IntSet + units_by_module = M.fromListWith IS.union $ + map (\(m,n) -> (m, IS.singleton n)) (S.toList allDeps) + + mod_deps_bypkg :: Map UnitId [(Deps, DepsLocation)] + mod_deps_bypkg = M.mapKeys moduleUnitId + $ M.fromListWith (++) + (map (\(m,v) -> (m,[v])) (M.toList mod_deps)) + + ar_state <- emptyArchiveState + code <- fmap (catMaybes . concat) . forM packages' $ \pkg -> + mapM (uncurry $ extractDeps ar_state units_by_module) + (fromMaybe [] $ M.lookup pkg mod_deps_bypkg) + return (allDeps, code) + +extractDeps :: ArchiveState + -> Map Module IntSet + -> Deps + -> DepsLocation + -> IO (Maybe (Module, JStat, ShortText, [ClosureInfo], [StaticInfo], [ForeignJSRef])) +extractDeps ar_state units deps loc = + case M.lookup mod units of + Nothing -> return Nothing + Just modUnits -> do + let selector n _ = n `IS.member` modUnits || isGlobalUnit n + x <- case loc of + ObjectFile o -> collectCode =<< readObjectFileKeys selector o + ArchiveFile a -> collectCode + . readObjectKeys (a ++ ':':moduleNameString (moduleName mod)) selector + =<< readArObject ar_state mod a + -- error ("Ar.readObject: " ++ a ++ ':' : T.unpack mod)) + -- Ar.readObject (mkModuleName $ T.unpack mod) a) + InMemory n b -> collectCode $ readObjectKeys n selector b + -- evaluate (rnf x) -- See FIXME Re: NFData instance on Safety and + -- ForeignJSRefs below + return x + where + mod = depsModule deps + -- FIXME: Jeff (2022,03): remove this hacky reimplementation of unlines + newline = T.pack "\n" + unlines' = intersperse newline . map oiRaw + collectCode l = let x = ( mod + , mconcat (map oiStat l) + , mconcat (unlines' l) + , concatMap oiClInfo l + , concatMap oiStatic l + , concatMap oiFImports l) + -- FIXME: (2022,04): this evaluate and rnf require an NFData + -- instance on ForeignJSRef which in turn requries a NFData + -- instance on Safety. Does this even make sense? We'll skip + -- this for now. + + -- in evaluate (rnf x) >> return (Just x) + + in return (Just x) + +readArObject :: ArchiveState -> Module -> FilePath -> IO BL.ByteString +readArObject ar_state mod ar_file = do + loaded_ars <- readIORef (loadedArchives ar_state) + (Ar.Archive entries) <- case M.lookup ar_file loaded_ars of + Just a -> pure a + Nothing -> do + a <- Ar.loadAr ar_file + modifyIORef (loadedArchives ar_state) (M.insert ar_file a) + pure a + let tag = moduleNameTag $ moduleName mod + matchTag entry + | Right hdr <- getHeader (BL.fromStrict $ Ar.filedata entry) + = hdrModuleName hdr == tag + | otherwise + = False + + -- XXX this shouldn't be an exception probably + pure $ maybe (error $ "could not find object for module " + ++ moduleNameString (moduleName mod) + ++ " in " + ++ ar_file) + (BL.fromStrict . Ar.filedata) (find matchTag entries) + -- mapM_ (\e -> putStrLn ("found file: " ++ Ar.filename e)) entries + +{- | Static dependencies are symbols that need to be linked regardless + of whether the linked program refers to them. For example + dependencies that the RTS uses or symbols that the user program + refers to directly + -} +newtype StaticDeps = + StaticDeps { unStaticDeps :: [(ShortText, ShortText)] -- module/symbol + } + +noStaticDeps :: StaticDeps +noStaticDeps = StaticDeps [] + +{- | The input file format for static deps is a yaml document with a + package/module/symbol tree where symbols can be either a list or + just a single string, for example: + + base: + GHC.Conc.Sync: reportError + Control.Exception.Base: nonTermination + ghcjs-prim: + GHCJS.Prim: + - JSVal + - JSException + -} +-- instance FromJSON StaticDeps where +-- parseJSON (Object v) = StaticDeps . concat <$> mapM (uncurry parseMod) (HM.toList v) +-- where +-- parseMod p (Object v) = concat <$> mapM (uncurry (parseSymb p)) (HM.toList v) +-- parseMod _ _ = mempty +-- parseSymb p m (String s) = pure [(p,m,s)] +-- parseSymb p m (Array v) = mapM (parseSingleSymb p m) (V.toList v) +-- parseSymb _ _ _ = mempty +-- parseSingleSymb p m (String s) = pure (p,m,s) +-- parseSingleSymb _ _ _ = mempty +-- parseJSON _ = mempty + +-- | dependencies for the RTS, these need to be always linked +rtsDeps :: HscEnv -> [UnitId] -> IO ([UnitId], Set ExportedFun) +rtsDeps hsc_env pkgs = readSystemDeps hsc_env pkgs "rtsdeps.yaml" + +-- | dependencies for the Template Haskell, these need to be linked when running +-- Template Haskell (in addition to the RTS deps) +thDeps :: HscEnv -> [UnitId] -> IO ([UnitId], Set ExportedFun) +thDeps hsc_env pkgs = readSystemDeps hsc_env pkgs "thdeps.yaml" + +-- FIXME: Jeff (2022,03): fill in the ? +-- | A helper function to read system dependencies that are hardcoded via a file +-- path. +readSystemDeps :: HscEnv -- ^ HS Env + -> [UnitId] -- ^ Packages to ?? + -> FilePath -- ^ File to read + -> IO ([UnitId], Set ExportedFun) +readSystemDeps hsc_env pkgs file = do + (deps_pkgs, deps_funs) <- readSystemDeps' hsc_env file + pure ( filter (`S.member` linked_pkgs) deps_pkgs + , S.filter (\fun -> + moduleUnitId (funModule fun) `S.member` linked_pkgs) deps_funs + ) + + where + -- FIXME: Jeff (2022,03): Each time we _do not_ use a list like a stack we + -- gain evidence that we should be using a different data structure. @pkgs@ + -- is the list in question here + linked_pkgs = S.fromList pkgs + + +readSystemDeps' :: HscEnv + -> FilePath + -> IO ([UnitId], Set ExportedFun) +readSystemDeps' hsc_env file + -- hardcode contents to get rid of yaml dep + -- XXX move runTHServer to some suitable wired-in package + -- FIXME: Jeff (2022,03): Use types not string matches, These should be + -- wired-in just like in GHC and thus we should make them top level + -- definitions + | file == "thdeps.yaml" = pure ( [stringToUnitId "base"] + , S.fromList $ d "base" "GHCJS.Prim.TH.Eval" ["runTHServer"]) + | file == "rtsdeps.yaml" = pure ( [stringToUnitId "base" + , stringToUnitId "ghc-prim" + , stringToUnitId "integer-wired-in" + ] + , S.fromList $ concat + [ d "base" "GHC.Conc.Sync" ["reportError"] + , d "base" "Control.Exception.Base" ["nonTermination"] + , d "base" "GHC.Exception.Type" ["SomeException"] + , d "base" "GHC.TopHandler" ["runMainIO", "topHandler"] + , d "base" "GHC.Base" ["$fMonadIO"] + , d "base" "GHC.Maybe" ["Nothing", "Just"] + , d "base" "GHC.Ptr" ["Ptr"] + , d "ghc-prim" "GHC.Types" [":", "[]"] + , d "ghc-prim" "GHC.Tuple" ["(,)", "(,,)", "(,,,)", "(,,,,)", "(,,,,,)","(,,,,,,)", "(,,,,,,,)", "(,,,,,,,,)", "(,,,,,,,,,)"] + , d "integer-wired-in" "GHC.Integer.Type" ["S#", "Jp#", "Jn#"] + , d "ghc-prim" "GHC.Types" [ "JSVal" ] + , d "base" "GHCJS.Prim" ["JSException", "$fShowJSException", "$fExceptionJSException", "resolve", "resolveIO", "toIO"] + , d "base" "GHCJS.Prim.Internal" ["wouldBlock", "blockedIndefinitelyOnMVar", "blockedIndefinitelyOnSTM", "ignoreException", "setCurrentThreadResultException", "setCurrentThreadResultValue"] + ] + ) + | otherwise = pure (mempty, mempty) + where + + d :: String -> String -> [String] -> [ExportedFun] + d pkg mod symbols = map (let pkg_module = mkJsModule pkg + in ExportedFun pkg_module + . mkHaskellSym pkg_module (T.pack mod) + . T.pack) + symbols + zenc = T.pack . zEncodeString . T.unpack + + mkHaskellSym :: Module -> ShortText -> ShortText -> ShortText + mkHaskellSym mod m s = "h$" <> zenc (T.pack (encodeModule hsc_env mod) + <> ":" + <> m + <> "." + <> s) + mkJsModule :: String -> GenModule Unit + mkJsModule pkg = mkModule (RealUnit (Definite (stringToUnitId pkg))) (mkModuleName pkg) + +{- + b <- readBinaryFile (getLibDir dflags </> file) + wi <- readSystemWiredIn dflags + case Yaml.decodeEither b of + Left err -> panic $ "could not read " ++ depsName ++ + " dependencies from " ++ file ++ ":\n" ++ err + Right sdeps -> + let (StaticDeps unresolved, pkgs, funs) = staticDeps dflags wi sdeps + in case unresolved of + ((p,_,_):_) -> + panic $ "Package `" ++ T.unpack p ++ "' is required for " ++ + requiredFor ++ ", but was not found" + _ -> + -- putStrLn "system dependencies:" + -- print (map installedUnitIdString pkgs, funs) + return (pkgs, funs) + +-} + +readSystemWiredIn :: HscEnv -> IO [(ShortText, UnitId)] +readSystemWiredIn _ = pure [] -- XXX +{- +readSystemWiredIn dflags = do + b <- B.readFile filename + case Yaml.decodeEither b of + Left _err -> error $ "could not read wired-in package keys from " ++ filename + Right m -> return . M.toList + . M.union ghcWiredIn -- GHC wired-in package keys override those in the file + . fmap stringToUnitId $ m + where + filename = getLibDir dflags </> "wiredinkeys" <.> "yaml" + ghcWiredIn :: Map Text UnitId + ghcWiredIn = M.fromList $ map (\k -> (T.pack (installedUnitIdString k), k)) + (map toUnitId wiredInUnitIds) + -} +{- | read a static dependencies specification and give the roots + + if dependencies come from a versioned (non-hardwired) package + that is linked multiple times, then the returned dependencies + will all come from the same version, but it's undefined which one. + -} + +type SDep = (ShortText, ShortText) -- ^ module/symbol + +staticDeps :: HscEnv + -> [(ShortText, Module)] -- ^ wired-in package names / keys + -> StaticDeps -- ^ deps from yaml file + -> (StaticDeps, Set UnitId, Set ExportedFun) + -- ^ the StaticDeps contains the symbols + -- for which no package could be found +staticDeps hsc_env wiredin sdeps = mkDeps sdeps + where + zenc = T.pack . zEncodeString . T.unpack + u_st = ue_units $ hsc_unit_env hsc_env + mkDeps (StaticDeps ds) = + -- FIXME: Jeff (2022,03): this foldl' will leak memory due to the tuple + -- and in the list in the fst position because the list is neither spine + -- nor value strict. So the WHNF computed by foldl' will by a 3-tuple with + -- 3 thunks and the WHNF for the list will be a cons cell + let (u, p, r) = foldl' resolveDep ([], S.empty, S.empty) ds + in (StaticDeps u, closePackageDeps u_st p, r) + resolveDep :: ([SDep], Set UnitId, Set ExportedFun) + -> SDep + -> ([SDep], Set UnitId, Set ExportedFun) + resolveDep (unresolved, pkgs, resolved) dep@(mod_name, s) = + -- lookup our module in wiredin names + case lookup mod_name wiredin of + -- we didn't find the module in wiredin so add to unresolved + Nothing -> ( dep : unresolved, pkgs, resolved) + -- this is a wired in module + Just mod -> + let mod_uid = moduleUnitId mod + in case lookupUnitId u_st mod_uid of + -- couldn't find the uid for this wired in package so explode + Nothing -> pprPanic ("Package key for wired-in dependency could not be found.`" + ++ "I looked for: " + ++ T.unpack mod_name + ++ " receieved " ++ moduleNameString (moduleName mod) + ++ " but could not find: " ++ unitString mod_uid + ++ " in the UnitState." + ++ " Here is too much info for you: ") + $ pprWithUnitState u_st (ppr mod) + -- we are all good, add the uid to the package set, construct + -- its symbols on the fly and add the module to exported symbol + -- set + Just _ -> ( unresolved + , S.insert mod_uid pkgs + , S.insert (ExportedFun mod + $ mkSymb mod mod_name s) resolved + ) + -- confusingly with the new ghc api we now use Module where we formerly had + -- Package, so this becomes Module -> Module -> Symbol where the first + -- Module is GHC's module type and the second is the SDep Moudle read as a + -- ShortText + -- FIXME: Jeff (2022,03): should mkSymb be in the UnitUtils? + mkSymb :: Module -> ShortText -> ShortText -> ShortText + mkSymb p m s = + "h$" <> zenc (T.pack (encodeModule hsc_env p) <> ":" <> m <> "." <> s) + +closePackageDeps :: UnitState -> Set UnitId -> Set UnitId +closePackageDeps u_st pkgs + | S.size pkgs == S.size pkgs' = pkgs + | otherwise = closePackageDeps u_st pkgs' + where + pkgs' = pkgs `S.union` S.fromList (concatMap deps $ S.toList pkgs) + notFound = error "closePackageDeps: package not found" + deps :: UnitId -> [UnitId] + deps = unitDepends + . fromMaybe notFound + . lookupUnitId u_st + +-- read all dependency data from the to-be-linked files +loadObjDeps :: [LinkedObj] -- ^ object files to link + -> IO (Map Module (Deps, DepsLocation), [LinkableUnit]) +loadObjDeps objs = prepareLoadedDeps <$> mapM readDepsFile' objs + +loadArchiveDeps :: GhcjsEnv + -> [FilePath] + -> IO ( Map Module (Deps, DepsLocation) + , [LinkableUnit] + ) +loadArchiveDeps env archives = modifyMVar (linkerArchiveDeps env) $ \m -> + case M.lookup archives' m of + Just r -> return (m, r) + Nothing -> loadArchiveDeps' archives >>= \r -> return (M.insert archives' r m, r) + where + archives' = S.fromList archives + +loadArchiveDeps' :: [FilePath] + -> IO ( Map Module (Deps, DepsLocation) + , [LinkableUnit] + ) +loadArchiveDeps' archives = do + archDeps <- forM archives $ \file -> do + (Ar.Archive entries) <- Ar.loadAr file + pure (mapMaybe (readEntry file) entries) + return (prepareLoadedDeps $ concat archDeps) + where + readEntry :: FilePath -> Ar.ArchiveEntry -> Maybe (Deps, DepsLocation) + readEntry ar_file ar_entry + | isObjFile (Ar.filename ar_entry) = + fmap (,ArchiveFile ar_file) + (readDepsMaybe (ar_file ++ ':':Ar.filename ar_entry) (BL.fromStrict $ Ar.filedata ar_entry)) + | otherwise = Nothing + + +isObjFile :: FilePath -> Bool +isObjFile file = ".o" `isSuffixOf` file || -- vanilla + "_o" `isSuffixOf` file -- some "Way", like .p_o + +prepareLoadedDeps :: [(Deps, DepsLocation)] + -> ( Map Module (Deps, DepsLocation) + , [LinkableUnit] + ) +prepareLoadedDeps deps = + let req = concatMap (requiredUnits . fst) deps + depsMap = M.fromList $ map (\d -> (depsModule (fst d), d)) deps + in (depsMap, req) + +requiredUnits :: Deps -> [LinkableUnit] +requiredUnits d = map (depsModule d,) (IS.toList $ depsRequired d) + +-- read dependencies from an object that might have already been into memory +-- pulls in all Deps from an archive +readDepsFile' :: LinkedObj -> IO (Deps, DepsLocation) +readDepsFile' (ObjLoaded name bs) = pure . (,InMemory name bs) $ + readDeps name bs +readDepsFile' (ObjFile file) = + (,ObjectFile file) <$> readDepsFile file + +generateBase :: FilePath -> Base -> IO () +generateBase outDir b = + BL.writeFile (outDir </> "out.base.symbs") (renderBase b) + diff --git a/compiler/GHC/StgToJS/Linker/Types.hs b/compiler/GHC/StgToJS/Linker/Types.hs new file mode 100644 index 0000000000..742c0926c1 --- /dev/null +++ b/compiler/GHC/StgToJS/Linker/Types.hs @@ -0,0 +1,581 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PartialTypeSignatures #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.StgToJS.Linker.Types +-- Copyright : (c) The University of Glasgow 2001 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io> +-- Luite Stegeman <luite.stegeman@iohk.io> +-- Sylvain Henry <sylvain.henry@iohk.io> +-- Josh Meredith <josh.meredith@iohk.io> +-- Stability : experimental +-- +-- A base bundle is used for incremental linking. it contains information about +-- the symbols that have already been linked. These symbols are not included +-- again in the incrementally linked program. +-- +-- The Base data structure contains the information we need to do incremental +-- linking against a base bundle. +-- +-- base file format: +-- - GHCJSBASE +-- - [renamer state] +-- - [linkedPackages] +-- - [packages] +-- - [modules] +-- - [symbols] +-- +-- The base contains a CompactorState for consistent renaming of private names +-- and packed initialization of info tables and static closures. + +----------------------------- FIXMEs ------------------------------------------- +-- - Find a better data structure for linkerArchiveDeps +-- - Specialize Functor instances for helpers +-- - Better name for Base +-- - Remove unsafeShowSDoc +-- - Better implementation for Monoid JSLinkConfig +-- - Should we use (Messages String) or parameterize over (Messages e) in ThRunner? +-- - Fix name collision between LinkableUnit type in this module and the LinkableUnit type in StgToJS.Types +----------------------------------------------------------------------------- + +module GHC.StgToJS.Linker.Types where + +import GHC.JS.Syntax +import GHC.StgToJS.Object +import GHC.StgToJS.Types (ClosureInfo, StaticInfo) + +import GHC.Unit.Types +import GHC.Utils.Panic +import GHC.Utils.Outputable hiding ((<>)) +import GHC.Data.ShortText (ShortText) +import qualified GHC.Data.ShortText as T +import GHC.Driver.Env.Types (HscEnv) +import GHC.Types.Error (Messages) + +import Control.Monad + +import Data.Array +import qualified Data.Binary as DB +import qualified Data.Binary.Get as DB +import qualified Data.Binary.Put as DB +import Data.ByteString (ByteString) +import qualified Data.ByteString.Lazy as BL +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as M +import Data.Set (Set) +import qualified Data.Set as S +import qualified Data.IntMap as I + +import Control.Concurrent.MVar +import qualified Control.Exception as E + +import System.IO +import System.Process + +import Prelude + +newLocals :: [Ident] +newLocals = filter (not . isJsKeyword) $ + map (TxtI . T.pack) $ + map (:[]) chars0 ++ concatMap mkIdents [1..] + where + mkIdents n = [c0:cs | c0 <- chars0, cs <- replicateM n chars] + chars0 = ['a'..'z']++['A'..'Z'] + chars = chars0++['0'..'9'] + +renamedVars :: [Ident] +renamedVars = map (\(TxtI xs) -> TxtI ("h$$"<>xs)) newLocals + +-------------------------------------------------------------------------------- +-- CompactorState +-------------------------------------------------------------------------------- + +-- FIXME: Jeff (2022,03): These maps should be newtyped so we cannot confuse +-- them and thus accidently construct hard to understand bugs. When we newtype +-- we should use deriving via to avoid boilerplate +data CompactorState = CompactorState + { csIdentSupply :: [Ident] -- ^ ident supply for new names + , csNameMap :: !(M.Map ShortText Ident) -- ^ renaming mapping for internal names + , csEntries :: !(M.Map ShortText Int) -- ^ entry functions (these get listed in the metadata init + -- array) + , csNumEntries :: !Int + , csStatics :: !(M.Map ShortText Int) -- ^ mapping of global closure -> index in current block, + -- for static initialisation + , csNumStatics :: !Int -- ^ number of static entries + , csLabels :: !(M.Map ShortText Int) -- ^ non-Haskell JS labels + , csNumLabels :: !Int -- ^ number of labels + , csParentEntries :: !(M.Map ShortText Int) -- ^ entry functions we're not linking, offset where parent + -- gets [0..n], grandparent [n+1..k] etc + , csParentStatics :: !(M.Map ShortText Int) -- ^ objects we're not linking in base bundle + , csParentLabels :: !(M.Map ShortText Int) -- ^ non-Haskell JS labels in parent + , csStringTable :: !StringTable + } deriving (Show) + +data StringTable = StringTable + { stTableIdents :: !(Array Int ShortText) + , stOffsets :: !(M.Map ByteString (Int, Int)) -- ^ content of the table + , stIdents :: !(M.Map ShortText (Either Int Int)) -- ^ identifiers in the table + } deriving (Show) + +instance DB.Binary StringTable where + put (StringTable tids offs idents) = do + DB.put tids + DB.put (M.toList offs) + DB.put (M.toList idents) + get = StringTable <$> DB.get + <*> fmap M.fromList DB.get + <*> fmap M.fromList DB.get + +emptyStringTable :: StringTable +emptyStringTable = StringTable (listArray (0,-1) []) M.empty M.empty + +-- FIXME: Jeff: (2022,03): Each of these helper functions carry a Functor f +-- constraint. We should specialize these once we know how they are used +entries :: Functor f + => (M.Map ShortText Int -> f (M.Map ShortText Int)) + -> CompactorState + -> f CompactorState +entries f cs = fmap (\x -> cs { csEntries = x }) (f $ csEntries cs) +{-# INLINE entries #-} + +identSupply :: Functor f + => ([Ident] -> f [Ident]) + -> CompactorState + -> f CompactorState +identSupply f cs = fmap (\x -> cs { csIdentSupply = x }) (f $ csIdentSupply cs) +{-# INLINE identSupply #-} + +labels :: Functor f + => (M.Map ShortText Int -> f (M.Map ShortText Int)) + -> CompactorState + -> f CompactorState +labels f cs = fmap (\x -> cs { csLabels = x }) (f $ csLabels cs) +{-# INLINE labels #-} + +nameMap :: Functor f + => (M.Map ShortText Ident -> f (M.Map ShortText Ident)) + -> CompactorState + -> f CompactorState +nameMap f cs = fmap (\x -> cs { csNameMap = x }) (f $ csNameMap cs) +{-# INLINE nameMap #-} + +numEntries :: Functor f + => (Int -> f Int) + -> CompactorState + -> f CompactorState +numEntries f cs = fmap (\x -> cs { csNumEntries = x }) (f $ csNumEntries cs) +{-# INLINE numEntries #-} + +numLabels :: Functor f + => (Int -> f Int) + -> CompactorState + -> f CompactorState +numLabels f cs = fmap (\x -> cs { csNumLabels = x }) (f $ csNumLabels cs) +{-# INLINE numLabels #-} + +numStatics :: Functor f + => (Int -> f Int) + -> CompactorState + -> f CompactorState +numStatics f cs = fmap (\x -> cs { csNumStatics = x }) (f $ csNumStatics cs) +{-# INLINE numStatics #-} + +parentEntries :: Functor f + => (M.Map ShortText Int -> f (M.Map ShortText Int)) + -> CompactorState + -> f CompactorState +parentEntries f cs = fmap (\x -> cs { csParentEntries = x }) (f $ csParentEntries cs) +{-# INLINE parentEntries #-} + +parentLabels :: Functor f + => (M.Map ShortText Int -> f (M.Map ShortText Int)) + -> CompactorState + -> f CompactorState +parentLabels f cs = fmap (\x -> cs { csParentLabels = x }) (f $ csParentLabels cs) +{-# INLINE parentLabels #-} + +parentStatics :: Functor f + => (M.Map ShortText Int -> f (M.Map ShortText Int)) + -> CompactorState + -> f CompactorState +parentStatics f cs = fmap (\x -> cs { csParentStatics = x }) (f $ csParentStatics cs) +{-# INLINE parentStatics #-} + +statics :: Functor f + => (M.Map ShortText Int -> f (M.Map ShortText Int)) + -> CompactorState + -> f CompactorState +statics f cs = fmap (\x -> cs { csStatics = x }) (f $ csStatics cs) +{-# INLINE statics #-} + +stringTable :: Functor f + => (StringTable -> f StringTable) + -> CompactorState + -> f CompactorState +stringTable f cs = fmap (\x -> cs { csStringTable = x }) (f $ csStringTable cs) +{-# INLINE stringTable #-} + +emptyCompactorState :: CompactorState +emptyCompactorState = CompactorState renamedVars + mempty + mempty + 0 + mempty + 0 + mempty + 0 + mempty + mempty + mempty + emptyStringTable + +-- | make a base state from a CompactorState: empty the current symbols sets, +-- move everything to the parent +makeCompactorParent :: CompactorState -> CompactorState +makeCompactorParent (CompactorState is nm es nes ss nss ls nls pes pss pls sts) + = CompactorState is + nm + M.empty 0 + M.empty 0 + M.empty 0 + (M.union (fmap (+nes) pes) es) + (M.union (fmap (+nss) pss) ss) + (M.union (fmap (+nls) pls) ls) + sts + +-- Helper functions used in Linker.Compactor. We live with some redundant code +-- to avoid the lens mayhem in Gen2 GHCJS. TODO: refactor to avoid redundant +-- code +addStaticEntry :: ShortText -- ^ The static entry to add + -> CompactorState -- ^ the old state + -> CompactorState -- ^ the new state +addStaticEntry new cs = + -- check if we have seen new before + let cur_statics = csStatics cs + go = M.lookup new cur_statics >> M.lookup new (csParentStatics cs) + in case go of + Just _ -> cs -- we have so return + Nothing -> let cnt = csNumStatics cs -- we haven't so do the business + newStatics = M.insert new cnt cur_statics + newCnt = cnt + 1 + in cs {csStatics = newStatics, csNumStatics = newCnt} + +addEntry :: ShortText -- ^ The entry function to add + -> CompactorState -- ^ the old state + -> CompactorState -- ^ the new state +addEntry new cs = + let cur_entries = csEntries cs + go = M.lookup new cur_entries >> M.lookup new (csParentEntries cs) + in case go of + Just _ -> cs + Nothing -> let cnt = csNumEntries cs + newEntries = M.insert new cnt cur_entries + newCnt = cnt + 1 + in cs {csEntries = newEntries, csNumEntries = newCnt} + +addLabel :: ShortText -- ^ The label to add + -> CompactorState -- ^ the old state + -> CompactorState -- ^ the new state +addLabel new cs = + let cur_lbls = csLabels cs + go = M.lookup new cur_lbls >> M.lookup new (csParentLabels cs) + in case go of + Just _ -> cs + Nothing -> let cnt = csNumLabels cs + newLabels = M.insert new cnt cur_lbls + newCnt = cnt + 1 + in cs {csEntries = newLabels, csNumLabels = newCnt} +-------------------------------------------------------------------------------- +-- Base +-------------------------------------------------------------------------------- + +-- FIXME: Jeff (2022,03): Pick a better name than Base, and should baseUnits be +-- Set UnitId and basePkgs be [PackageId]? I'm unsure if this should hold +-- UnitIds or UnitInfos or PackageIds or PackageNames +-- | The Base bundle. Used for incremental linking it maintains the compactor +-- state the base packages and units. +data Base = Base { baseCompactorState :: CompactorState + , basePkgs :: [UnitId] + , baseUnits :: Set (Module, Int) + } + +instance DB.Binary Base where + get = getBase "<unknown file>" + put = putBase + +showBase :: Base -> String +showBase b = unlines + [ "Base:" + , " packages: " ++ showSDocUnsafe (ppr (basePkgs b)) -- FIXME: Jeff (2022,03): Either use the sdoc context in the StgToJS + -- config or find a better way than showSDocUnsafe + , " number of units: " ++ show (S.size $ baseUnits b) + , " renaming table size: " ++ + show (M.size . csNameMap . baseCompactorState $ b) + ] + +emptyBase :: Base +emptyBase = Base emptyCompactorState [] S.empty + +putBase :: Base -> DB.Put +putBase (Base cs packages funs) = do + DB.putByteString "GHCJSBASE" + DB.putLazyByteString versionTag + putCs cs + putList DB.put packages + -- putList putPkg pkgs + putList DB.put mods + putList putFun (S.toList funs) + where + pi :: Int -> DB.Put + pi = DB.putWord32le . fromIntegral + uniq :: Ord a => [a] -> [a] + uniq = S.toList . S.fromList -- FIXME: Ick! Just use the Set in the first place! + -- pkgs = uniq (map fst $ S.toList funs) + -- pkgsM = M.fromList (zip pkgs [(0::Int)..]) + mods = uniq (map fst $ S.toList funs) + modsM = M.fromList (zip mods [(0::Int)..]) + putList f xs = pi (length xs) >> mapM_ f xs + -- serialise the compactor state + putCs (CompactorState [] _ _ _ _ _ _ _ _ _ _ _) = + panic "putBase: putCs exhausted renamer symbol names" + putCs (CompactorState (ns:_) nm es _ ss _ ls _ pes pss pls sts) = do + DB.put ns + DB.put (M.toList nm) + DB.put (M.toList es) + DB.put (M.toList ss) + DB.put (M.toList ls) + DB.put (M.toList pes) + DB.put (M.toList pss) + DB.put (M.toList pls) + DB.put sts + -- putPkg mod = DB.put mod + -- fixme group things first + putFun (m,s) = --pi (pkgsM M.! p) >> + pi (modsM M.! m) >> DB.put s + +getBase :: FilePath -> DB.Get Base +getBase file = getBase' + where + gi :: DB.Get Int + gi = fromIntegral <$> DB.getWord32le + getList f = DB.getWord32le >>= \n -> replicateM (fromIntegral n) f + getFun ms = (,) <$> + -- ((ps!) <$> gi) <*> + ((ms!) <$> gi) <*> DB.get + la xs = listArray (0, length xs - 1) xs + -- getPkg = DB.get + getCs = do + n <- DB.get + nm <- M.fromList <$> DB.get + es <- M.fromList <$> DB.get + ss <- M.fromList <$> DB.get + ls <- M.fromList <$> DB.get + pes <- M.fromList <$> DB.get + pss <- M.fromList <$> DB.get + pls <- M.fromList <$> DB.get + CompactorState (dropWhile (/=n) renamedVars) + nm + es + (M.size es) + ss + (M.size ss) + ls + (M.size ls) + pes + pss + pls <$> DB.get + getBase' = do + hdr <- DB.getByteString 9 + when (hdr /= "GHCJSBASE") + (panic $ "getBase: invalid base file: " <> file) + vt <- DB.getLazyByteString (fromIntegral versionTagLength) + when (vt /= versionTag) + (panic $ "getBase: incorrect version: " <> file) + cs <- makeCompactorParent <$> getCs + linkedPackages <- getList DB.get + -- pkgs <- la <$> getList getPkg + mods <- la <$> getList DB.get + funs <- getList (getFun mods) + return (Base cs linkedPackages $ S.fromList funs) + +-- | lazily render the base metadata into a bytestring +renderBase :: Base -> BL.ByteString +renderBase = DB.runPut . putBase + +-- | lazily load base metadata from a file, see @UseBase@. +loadBase :: FilePath -> IO Base +loadBase file = DB.runGet (getBase file) <$> BL.readFile file + +-- | There are 3 ways the linker can use @Base@. We can not use it, and thus not +-- do any incremental linking. We can load it from a file, where we assume that +-- the symbols from the bundle and their dependencies have already been loaded. +-- In this case We must save the CompactorState so that we can do consistent +-- renaming. Or we can use a Base that is already in memory. +-- +-- Incremental linking greatly improves link time and can also be used in +-- multi-page or repl-type applications to serve most of the code from a static +-- location, reloading only the small parts that are actually different. +data UseBase = NoBase -- ^ don't use incremental linking + | BaseFile FilePath -- ^ load base from file + | BaseState Base -- ^ use this base + +instance Show UseBase where + show NoBase = "NoBase" + show BaseFile {} = "BaseFile" + show BaseState {} = "BaseState" + +instance Monoid UseBase where + mempty = NoBase + +instance Semigroup UseBase where + x <> NoBase = x + _ <> x = x + +-------------------------------------------------------------------------------- +-- Linker Config +-- TODO: Jeff: (2022,03): Move to separate module? Linker.Config? Or Merge with StgToJSConfig? +-------------------------------------------------------------------------------- + +data JSLinkConfig = + JSLinkConfig { lcNativeExecutables :: Bool + , lcNativeToo :: Bool + , lcBuildRunner :: Bool + , lcNoJSExecutables :: Bool + , lcNoHsMain :: Bool + , lcStripProgram :: Maybe FilePath + , lcLogCommandLine :: Maybe FilePath + , lcGhc :: Maybe FilePath + , lcOnlyOut :: Bool + , lcNoRts :: Bool + , lcNoStats :: Bool + , lcGenBase :: Maybe Module -- ^ base module + , lcUseBase :: UseBase + , lcLinkJsLib :: Maybe String + , lcJsLibOutputDir :: Maybe FilePath + , lcJsLibSrcs :: [FilePath] + , lcDedupe :: Bool + } + +usingBase :: JSLinkConfig -> Bool +usingBase s | NoBase <- lcUseBase s = False + | otherwise = True + +-- | we generate a runnable all.js only if we link a complete application, +-- no incremental linking and no skipped parts +generateAllJs :: JSLinkConfig -> Bool +generateAllJs s + | NoBase <- lcUseBase s = not (lcOnlyOut s) && not (lcNoRts s) + | otherwise = False + +{- + -- FIXME: Jeff (2022,03): This instance is supposed to capture overriding + -- settings, where one group comes from the environment (env vars, config + -- files) and the other from the command line. (env `mappend` cmdLine) should + -- give the combined settings, but it doesn't work very well. find something + -- better. + -} +instance Monoid JSLinkConfig where + -- FIXME: Jeff (2022,03): Adding no hs main to config, should False be default + -- here? + mempty = JSLinkConfig False False False False False + Nothing Nothing Nothing False + False False Nothing NoBase + Nothing Nothing mempty False + +instance Semigroup JSLinkConfig where + (<>) (JSLinkConfig ne1 nn1 bc1 nj1 noHs1 sp1 lc1 gh1 oo1 nr1 ns1 gb1 ub1 ljsl1 jslo1 jslsrc1 dd1) + (JSLinkConfig ne2 nn2 bc2 nj2 noHs2 sp2 lc2 gh2 oo2 nr2 ns2 gb2 ub2 ljsl2 jslo2 jslsrc2 dd2) = + JSLinkConfig (ne1 || ne2) + (nn1 || nn2) + (bc1 || bc2) + (nj1 || nj2) + (noHs1 || noHs2) + (sp1 `mplus` sp2) + (lc1 `mplus` lc2) + (gh1 `mplus` gh2) + (oo1 || oo2) + (nr1 || nr2) + (ns1 || ns2) + (gb1 `mplus` gb2) + (ub1 <> ub2) + (ljsl1 <> ljsl2) + (jslo1 <> jslo2) + (jslsrc1 <> jslsrc2) + (dd1 || dd2) + +-------------------------------------------------------------------------------- +-- Linker Environment +-- TODO: Jeff: (2022,03): Move to separate module, same as Config? +-------------------------------------------------------------------------------- +-- | A LinkableUnit is a pair of a module and the index of the block in the +-- object file +-- FIXME: Jeff: (2022,03): Refactor to avoid name collision between +-- StgToJS.Linker.Types.LinkableUnit and StgToJS.Types.LinkableUnit +type LinkableUnit = (Module, Int) + +type LinkedUnit = (JStat, [ClosureInfo], [StaticInfo]) + +-- TODO: Jeff: (2022,03): Where to move LinkedObj +-- | An object file that's either already in memory (with name) or on disk +data LinkedObj = ObjFile FilePath -- ^ load from this file + | ObjLoaded String BL.ByteString -- ^ already loaded: description and payload + deriving (Eq, Ord, Show) + +data GhcjsEnv = GhcjsEnv + { compiledModules :: MVar (Map Module ByteString) -- ^ keep track of already compiled modules so we don't compile twice for dynamic-too + , thRunners :: MVar THRunnerState -- (Map String ThRunner) -- ^ template haskell runners + , thSplice :: MVar Int + -- FIXME: Jeff a Map keyed on a Set is going to be quite costly. The Eq + -- instance over Sets _can_ be fast if the sets are different sizes, this + -- would be O(1), however if they are equal size then we incur a costly + -- converstion to an Ascending List O(n) and then perform the element wise + -- check hence O(mn) where m is the cost of the element check. Thus, we should + -- fix this data structure and use something more efficient, HashMap if + -- available, IntMap if possible. Nested maps, in particular, seem like a + -- design smell. + , linkerArchiveDeps :: MVar (Map (Set FilePath) + (Map Module (Deps, DepsLocation) + , [LinkableUnit] + ) + ) + , pluginState :: MVar (Maybe HscEnv) + } + +data THRunnerState = THRunnerState + { activeRunners :: Map String THRunner + , idleRunners :: [THRunner] + } + +data THRunner = + THRunner { thrProcess :: ProcessHandle + , thrHandleIn :: Handle + , thrHandleErr :: Handle + , thrBase :: MVar Base + -- FIXME: Jeff (2022,03): Is String the right type here? I chose it + -- because it was easy but I am unsure what the needs of its consumer + -- are. + , thrRecover :: MVar [Messages String] + , thrExceptions :: MVar (I.IntMap E.SomeException) + } + +consIdleRunner :: THRunner -> THRunnerState -> THRunnerState +consIdleRunner r s = s { idleRunners = r : idleRunners s } + +unconsIdleRunner :: THRunnerState -> Maybe (THRunner, THRunnerState) +unconsIdleRunner s + | (r:xs) <- idleRunners s = Just (r, s { idleRunners = xs }) + | otherwise = Nothing + +deleteActiveRunner :: String -> THRunnerState -> THRunnerState +deleteActiveRunner m s = + s { activeRunners = M.delete m (activeRunners s) } + +insertActiveRunner :: String -> THRunner -> THRunnerState -> THRunnerState +insertActiveRunner m runner s = + s { activeRunners = M.insert m runner (activeRunners s) } + +emptyTHRunnerState :: THRunnerState +emptyTHRunnerState = THRunnerState mempty mempty diff --git a/compiler/GHC/StgToJS/Linker/Utils.hs b/compiler/GHC/StgToJS/Linker/Utils.hs new file mode 100644 index 0000000000..81598f07a4 --- /dev/null +++ b/compiler/GHC/StgToJS/Linker/Utils.hs @@ -0,0 +1,101 @@ +----------------------------------------------------------------------------- +-- | +-- Module : GHC.StgToJS.Linker.Utils +-- Copyright : (c) The University of Glasgow 2001 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io> +-- Luite Stegeman <luite.stegeman@iohk.io> +-- Sylvain Henry <sylvain.henry@iohk.io> +-- Josh Meredith <josh.meredith@iohk.io> +-- Stability : experimental +-- +-- Various utilies used in the JS Linker +-- +----------------------------- FIXMEs ------------------------------------------- +-- - implement Windows check for @addExeExtension@ +-- - resolve macOS comment in @writeBinaryFile@ +-- - remove redundant function @jsExeFileName@ +----------------------------------------------------------------------------- + +module GHC.StgToJS.Linker.Utils where + +import System.FilePath +import qualified Data.ByteString as B +import Data.ByteString (ByteString) +import System.IO (withBinaryFile, IOMode(WriteMode)) + +import GHC.Driver.Session +import GHC.Settings.Config (cProjectVersion) + +import GHC.Data.ShortText +import GHC.Unit.State +import GHC.Unit.Types +import GHC.Utils.Panic + +import Prelude +import GHC.Platform +import Data.List (isPrefixOf) + +addExeExtension :: FilePath -> FilePath +addExeExtension = id +{- FIXME: Jeff (2022,03): with FIXME: after Windows FIXME in Linker, fix this too + | Platform.isWindows = (<.> "exe") + | otherwise = id +-} + +{- + macOS has trouble writing more than 2GiB at once to a file + (tested with 10.14.6), and the base library doesn't work around this + problem yet (tested with GHC 8.6), so we work around it here. + + in this workaround we write a binary file in chunks of 1 GiB + FIXME: Jeff (2022,03): Is this still true? + -} +writeBinaryFile :: FilePath -> ByteString -> IO () +writeBinaryFile file bs = + withBinaryFile file WriteMode $ \h -> mapM_ (B.hPut h) (chunks bs) + where + -- split the ByteString into a nonempty list of chunks of at most 1GiB + chunks :: ByteString -> [ByteString] + chunks b = + let (b1, b2) = B.splitAt 1073741824 b + in b1 : if B.null b1 then [] else chunks b2 + +getInstalledPackageLibDirs :: UnitState -> UnitId -> [FilePath] +getInstalledPackageLibDirs us = fmap unpack . maybe mempty unitLibraryDirs . lookupUnitId us + +getInstalledPackageHsLibs :: UnitState -> UnitId -> [String] +getInstalledPackageHsLibs us = fmap unpack . maybe mempty unitLibraries . lookupUnitId us + +tryReadShimFile :: DynFlags -> FilePath -> IO B.ByteString +tryReadShimFile = panic "tryReadShimFile: Shims not yet implemented!" + +readShimsArchive :: DynFlags -> FilePath -> IO B.ByteString +readShimsArchive = panic "readShimsArchive: Shims not yet implemented!" + +getCompilerVersion :: String +getCompilerVersion = cProjectVersion + +jsexeExtension :: String +jsexeExtension = "jsexe" + +-- FIXME: Jeff (2022,04): remove this function since it is a duplicate of +-- GHC.Linker.Static.Utils.exeFileName +jsExeFileName :: DynFlags -> FilePath +jsExeFileName dflags + | Just s <- outputFile_ dflags = + -- unmunge the extension + let s' = dropPrefix "js_" (drop 1 $ takeExtension s) + -- FIXME: add this check when support for Windows check is added + in if Prelude.null s' -- || (Platform.isWindows && map toLower s' == "exe") + then dropExtension s <.> jsexeExtension + else dropExtension s <.> s' + | otherwise = + if platformOS (targetPlatform dflags) == OSMinGW32 + then "main.jsexe" + else "a.jsexe" + where + dropPrefix prefix xs + | prefix `isPrefixOf` xs = drop (length prefix) xs + | otherwise = xs diff --git a/compiler/GHC/StgToJS/Object.hs b/compiler/GHC/StgToJS/Object.hs index bb986926b1..11e62e2eb1 100644 --- a/compiler/GHC/StgToJS/Object.hs +++ b/compiler/GHC/StgToJS/Object.hs @@ -1,32 +1,53 @@ -{-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE TypeSynonymInstances #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE Rank2Types #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} - --- | Serialization/deserialization of binary .o files for the JavaScript backend +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE Rank2Types #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} + +-- only for DB.Binary instances on Module see FIXME below +{-# OPTIONS_GHC -fno-warn-orphans #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.StgToJS.Object +-- Copyright : (c) The University of Glasgow 2001 +-- License : BSD-style (see the file LICENSE) -- --- The .o files contain dependency information and generated code. +-- Maintainer : Sylvain Henry <sylvain.henry@iohk.io> +-- Jeffrey Young <jeffrey.young@iohk.io> +-- Luite Stegeman <luite.stegeman@iohk.io> +-- Josh Meredith <josh.meredith@iohk.io> +-- Stability : experimental -- --- All strings are mapped to a central string table, which helps reduce --- file size and gives us efficient hash consing on read +-- Serialization/deserialization of binary .o files for the JavaScript backend +-- The .o files contain dependency information and generated code. +-- All strings are mapped to a central string table, which helps reduce +-- file size and gives us efficient hash consing on read -- --- Binary intermediate JavaScript object files: --- serialized [Text] -> ([ClosureInfo], JStat) blocks --- --- file layout: --- - header ["GHCJSOBJ", length of symbol table, length of dependencies, length of index] --- - compiler version tag --- - symbol table --- - dependency info --- - closureinfo index --- - closureinfo data (offsets described by index) +-- Binary intermediate JavaScript object files: +-- serialized [Text] -> ([ClosureInfo], JStat) blocks -- +-- file layout: +-- - header ["GHCJSOBJ", length of symbol table, length of dependencies, length of index] +-- - compiler version tag +-- - symbol table +-- - dependency info +-- - closureinfo index +-- - closureinfo data (offsets described by index) + +-- FIXME: Jeff (2022,03): There are orphan instances for DB.Binary Module and +-- ModuleName. These are needed in StgToJS.Linker.Types for @Base@ serialization +-- in @putBase@. We end up in this situation because Base now holds a @Module@ +-- type instead of GHCJS's previous @Package@ type. In addition to this GHC uses +-- GHC.Utils.Binary for binary instances rather than Data.Binary (even though +-- Data.Binary is a boot lib) so to fix the situation we must: +-- - 1. Choose to use GHC.Utils.Binary or Data.Binary +-- - 2. Remove Objectable since this is redundant +-- - 3. Adapt the Linker types, like Base to the new Binary methods +----------------------------------------------------------------------------- + module GHC.StgToJS.Object ( object , object' @@ -47,7 +68,11 @@ module GHC.StgToJS.Object , Header(..), getHeader, moduleNameTag , SymbolTable , ObjUnit (..) - , Deps (..), BlockDeps (..) + -- FIXME: Jeff (2022,03): These exports are just for Base use in Linker.Types + , Objectable(..) + , PutS + -- end exports for Linker.Types + , Deps (..), BlockDeps (..), DepsLocation (..) , ExpFun (..), ExportedFun (..) , versionTag, versionTagLength ) @@ -112,9 +137,15 @@ data Deps = Deps { depsModule :: !Module -- ^ module , depsRequired :: !IntSet -- ^ blocks that always need to be linked when this object is loaded (e.g. everything that contains initializer code or foreign exports) , depsHaskellExported :: !(Map ExportedFun Int) -- ^ exported Haskell functions -> block - , depsBlocks :: !(Array Int BlockDeps) -- ^ info about each block + , depsBlocks :: !(Array Int BlockDeps) -- ^ info about each block } deriving (Generic) +-- | Where are the dependencies +data DepsLocation = ObjectFile FilePath -- ^ In an object file at path + | ArchiveFile FilePath -- ^ In a Ar file at path + | InMemory String ByteString -- ^ In memory + deriving (Eq, Show) + data BlockDeps = BlockDeps { blockBlockDeps :: [Int] -- ^ dependencies on blocks in this object , blockFunDeps :: [ExportedFun] -- ^ dependencies on exported symbols in other objects @@ -368,7 +399,7 @@ readDepsEither name bs = -- | call with contents of the file -readDeps :: String -> ByteString -> Deps +readDeps :: String -> B.ByteString -> Deps readDeps name bs = case readDepsEither name bs of Left err -> error ("readDeps: not a valid GHCJS object: " ++ name ++ "\n " ++ err) @@ -736,6 +767,34 @@ instance Objectable Module where put (Module unit mod_name) = put unit >> put mod_name get = Module <$> get <*> get +instance DB.Binary Module where + put (Module unit mod_name) = DB.put unit >> DB.put mod_name + get = Module <$> DB.get <*> DB.get + +instance DB.Binary ModuleName where + put (ModuleName fs) = DB.put fs + get = ModuleName <$> DB.get + +instance DB.Binary Unit where + put = \case + RealUnit (Definite uid) -> DB.put (0 :: Int) >> DB.put uid + VirtUnit uid -> DB.put (1 :: Int) >> DB.put uid + HoleUnit -> DB.put (2 :: Int) + get = DB.get >>= \case + (0 :: Int) -> RealUnit . Definite <$> DB.get + 1 -> VirtUnit <$> DB.get + _ -> pure HoleUnit + +instance DB.Binary UnitId where + put (UnitId fs) = DB.put fs + get = UnitId <$> DB.get + +instance DB.Binary InstantiatedUnit where + put indef = do + DB.put (instUnitInstanceOf indef) + DB.put (instUnitInsts indef) + get = mkInstantiatedUnitSorted <$> DB.get <*> DB.get + instance Objectable ModuleName where put (ModuleName fs) = put fs get = ModuleName <$> get @@ -754,6 +813,10 @@ instance Objectable FastString where put fs = put (unpackFS fs) get = mkFastString <$> get +instance DB.Binary FastString where + put fs = DB.put (unpackFS fs) + get = mkFastString <$> DB.get + instance Objectable UnitId where put (UnitId fs) = put fs get = UnitId <$> get diff --git a/compiler/GHC/StgToJS/Printer.hs b/compiler/GHC/StgToJS/Printer.hs new file mode 100644 index 0000000000..21e0dfa252 --- /dev/null +++ b/compiler/GHC/StgToJS/Printer.hs @@ -0,0 +1,165 @@ +{-# LANGUAGE OverloadedStrings #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.StgToJS.Printer +-- Copyright : (c) The University of Glasgow 2001 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : Jeffrey Young <jeffrey.young@iohk.io> +-- Luite Stegeman <luite.stegeman@iohk.io> +-- Sylvain Henry <sylvain.henry@iohk.io> +-- Stability : experimental +-- +-- Custom prettyprinter for JS AST uses the JS PPr module for most of +-- the work +-- +-- +----------------------------------------------------------------------------- +module GHC.StgToJS.Printer + ( pretty + , ghcjsRenderJs + , prettyBlock + ) where + + +import GHC.JS.Syntax +import GHC.JS.Ppr + +import qualified GHC.Data.ShortText as T +import GHC.Utils.Ppr as PP + +import qualified Data.Map as M + +import Data.Char (isAlpha,isDigit) + +import GHC.Prelude + +pretty :: JStat -> Doc +pretty = jsToDocR ghcjsRenderJs + +ghcjsRenderJs :: RenderJs +ghcjsRenderJs = defaultRenderJs { renderJsV = ghcjsRenderJsV + , renderJsS = ghcjsRenderJsS + } + +-- attempt to resugar some of the common constructs +ghcjsRenderJsS :: RenderJs -> JStat -> Doc +ghcjsRenderJsS r (BlockStat xs) = prettyBlock r (flattenBlocks xs) +ghcjsRenderJsS r s = renderJsS defaultRenderJs r s + +-- don't quote keys in our object literals, so closure compiler works +ghcjsRenderJsV :: RenderJs -> JVal -> Doc +ghcjsRenderJsV r (JHash m) + | M.null m = text "{}" + | otherwise = braceNest . PP.fsep . punctuate comma . + map (\(x,y) -> quoteIfRequired x <> PP.colon <+> jsToDocR r y) $ M.toList m + where + quoteIfRequired :: T.ShortText -> Doc + quoteIfRequired x + | isUnquotedKey x' = text x' + | otherwise = PP.squotes (text x') + -- FIXME: Jeff (2022,03): remove the deserialization to String. We are only + -- converting from ShortText to String here to call @all@ and @tail@. + where x' = T.unpack x + + isUnquotedKey :: String -> Bool + isUnquotedKey x | null x = False + | all isDigit x = True + | otherwise = validFirstIdent (head x) + && all validOtherIdent (tail x) + + + -- fixme, this will quote some idents that don't really need to be quoted + validFirstIdent c = c == '_' || c == '$' || isAlpha c + validOtherIdent c = isAlpha c || isDigit c +ghcjsRenderJsV r v = renderJsV defaultRenderJs r v + +prettyBlock :: RenderJs -> [JStat] -> Doc +prettyBlock r xs = vcat $ map addSemi (prettyBlock' r xs) + +-- recognize common patterns in a block and convert them to more idiomatic/concise javascript +prettyBlock' :: RenderJs -> [JStat] -> [Doc] +-- resugar for loops with/without var declaration +prettyBlock' r ( (DeclStat i) + : (AssignStat (ValExpr (JVar i')) v0) + : (WhileStat False p (BlockStat bs)) + : xs + ) + | i == i' && not (null flat) && isForUpdStat (last flat) + = mkFor r True i v0 p (last flat) (init flat) : prettyBlock' r xs + where + flat = flattenBlocks bs +prettyBlock' r ( (AssignStat (ValExpr (JVar i)) v0) + : (WhileStat False p (BlockStat bs)) + : xs + ) + | not (null flat) && isForUpdStat (last flat) + = mkFor r False i v0 p (last flat) (init flat) : prettyBlock' r xs + where + flat = flattenBlocks bs + +-- global function (does not preserve semantics but works for GHCJS) +prettyBlock' r ( (DeclStat i) + : (AssignStat (ValExpr (JVar i')) (ValExpr (JFunc is b))) + : xs + ) + | i == i' = (text "function" <+> jsToDocR r i + <> parens (fsep . punctuate comma . map (jsToDocR r) $ is) + $$ braceNest' (jsToDocR r b) + ) : prettyBlock' r xs +-- declare/assign +prettyBlock' r ( (DeclStat i) + : (AssignStat (ValExpr (JVar i')) v) + : xs + ) + | i == i' = (text "var" <+> jsToDocR r i <+> char '=' <+> jsToDocR r v) : prettyBlock' r xs + +-- modify/assign operators (fixme this should be more general, but beware of side effects like PPostExpr) +prettyBlock' r ( (AssignStat (ValExpr (JVar i)) (InfixExpr AddOp (ValExpr (JVar i')) (ValExpr (JInt 1)))) + : xs + ) + | i == i' = (text "++" <> jsToDocR r i) : prettyBlock' r xs +prettyBlock' r ( (AssignStat (ValExpr (JVar i)) (InfixExpr SubOp (ValExpr (JVar i')) (ValExpr (JInt 1)))) + : xs + ) + | i == i' = (text "--" <> jsToDocR r i) : prettyBlock' r xs +prettyBlock' r ( (AssignStat (ValExpr (JVar i)) (InfixExpr AddOp (ValExpr (JVar i')) e)) + : xs + ) + | i == i' = (jsToDocR r i <+> text "+=" <+> jsToDocR r e) : prettyBlock' r xs +prettyBlock' r ( (AssignStat (ValExpr (JVar i)) (InfixExpr SubOp (ValExpr (JVar i')) e)) + : xs + ) + | i == i' = (jsToDocR r i <+> text "-=" <+> jsToDocR r e) : prettyBlock' r xs + + +prettyBlock' r (x:xs) = jsToDocR r x : prettyBlock' r xs +prettyBlock' _ [] = [] + +-- build the for block +mkFor :: RenderJs -> Bool -> Ident -> JExpr -> JExpr -> JStat -> [JStat] -> Doc +mkFor r decl i v0 p s1 sb = text "for" <> forCond <+> braceNest'' (jsToDocR r $ BlockStat sb) + where + c0 | decl = text "var" <+> jsToDocR r i <+> char '=' <+> jsToDocR r v0 + | otherwise = jsToDocR r i <+> char '=' <+> jsToDocR r v0 + forCond = parens $ hcat $ interSemi + [ c0 + , jsToDocR r p + , parens (jsToDocR r s1) + ] + +-- check if a statement is suitable to be converted to something in the for(;;x) position +isForUpdStat :: JStat -> Bool +isForUpdStat UOpStat {} = True +isForUpdStat AssignStat {} = True +isForUpdStat ApplStat {} = True +isForUpdStat _ = False + +interSemi :: [Doc] -> [Doc] +interSemi [] = [PP.empty] +interSemi [s] = [s] +interSemi (x:xs) = x <> text ";" : interSemi xs + +addSemi :: Doc -> Doc +addSemi x = x <> text ";" diff --git a/compiler/GHC/StgToJS/Rts/Rts.hs b/compiler/GHC/StgToJS/Rts/Rts.hs index 91dd117e78..57f24ea86c 100644 --- a/compiler/GHC/StgToJS/Rts/Rts.hs +++ b/compiler/GHC/StgToJS/Rts/Rts.hs @@ -38,6 +38,7 @@ import GHC.StgToJS.Apply import GHC.StgToJS.Closure import GHC.StgToJS.Heap import GHC.StgToJS.Monad +import GHC.StgToJS.Printer import GHC.StgToJS.Profiling import GHC.StgToJS.Regs import GHC.StgToJS.Types @@ -380,6 +381,12 @@ rtsDecls = jsSaturate (Just "h$RTSD") $ , declRegs , declRets] +rtsText :: StgToJSConfig -> T.ShortText +rtsText = T.pack . show . pretty . rts + +rtsDeclsText :: T.ShortText +rtsDeclsText = T.pack . show . pretty $ rtsDecls + rts :: StgToJSConfig -> JStat rts = jsSaturate (Just "h$RTS") . rts' diff --git a/compiler/GHC/StgToJS/Types.hs b/compiler/GHC/StgToJS/Types.hs index 02b09a0a7d..1c32e60555 100644 --- a/compiler/GHC/StgToJS/Types.hs +++ b/compiler/GHC/StgToJS/Types.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} @@ -26,9 +27,12 @@ import GHC.Data.ShortText import GHC.Unit.Module import qualified Data.Map as M -import Data.Set (Set) +import Data.Set (Set) import qualified Data.ByteString as BS -import Data.Monoid +import Data.Monoid +import Data.Typeable (Typeable) +import GHC.Generics (Generic) +import Control.DeepSeq type G = State GenState @@ -79,14 +83,18 @@ data ClosureInfo = ClosureInfo , ciType :: CIType -- ^ type of the object, with extra info where required , ciStatic :: CIStatic -- ^ static references of this object } - deriving (Eq, Ord) + deriving stock (Eq, Ord, Show, Generic) + +instance NFData ClosureInfo data CIRegs = CIRegsUnknown | CIRegs { ciRegsSkip :: Int -- ^ unused registers before actual args start , ciRegsTypes :: [VarType] -- ^ args } - deriving (Eq, Ord) + deriving stock (Eq, Ord, Show, Generic) + +instance NFData CIRegs data CILayout = CILayoutVariable -- layout stored in object itself, first position from the start @@ -97,7 +105,9 @@ data CILayout { layoutSize :: !Int -- closure size in array positions, including entry , layout :: [VarType] } - deriving (Eq, Ord) + deriving stock (Eq, Ord, Show, Generic) + +instance NFData CILayout data CIType = CIFun { citArity :: !Int -- ^ function arity @@ -108,15 +118,16 @@ data CIType | CIPap | CIBlackhole | CIStackFrame - deriving (Eq, Ord) + deriving stock (Eq, Ord, Show, Generic) + +instance NFData CIType -- | Static references that must be kept alive newtype CIStatic = CIStaticRefs { staticRefs :: [ShortText] } - deriving stock (Eq, Ord) - deriving newtype (Semigroup, Monoid) + deriving stock (Eq, Ord, Generic) + deriving newtype (Semigroup, Monoid, Show) --- TODO: Jeff (2022,03): Make ToJExpr derivable? will need Default Signatures --- and depends on the increase in compilation time +instance NFData CIStatic -- | static refs: array = references, null = nothing to report -- note: only works after all top-level objects have been created @@ -136,7 +147,9 @@ data VarType | RtsObjV -- some RTS object from GHCJS (for example TVar#, MVar#, MutVar#, Weak#) | ObjV -- some JS object, user supplied, be careful around these, can be anything | ArrV -- boxed array - deriving (Eq, Ord, Enum, Bounded) + deriving stock (Eq, Ord, Enum, Bounded, Show, Generic) + +instance NFData VarType instance ToJExpr VarType where toJExpr = toJExpr . fromEnum @@ -172,7 +185,9 @@ data StaticInfo = StaticInfo { siVar :: !ShortText -- ^ global object , siVal :: !StaticVal -- ^ static initialization , siCC :: !(Maybe Ident) -- ^ optional CCS name - } + } deriving stock (Eq, Ord, Show, Typeable, Generic) + +instance NFData StaticInfo data StaticVal = StaticFun !ShortText [StaticArg] @@ -186,7 +201,9 @@ data StaticVal -- ^ regular datacon app | StaticList [StaticArg] (Maybe ShortText) -- ^ list initializer (with optional tail) - deriving (Eq, Ord) + deriving stock (Eq, Ord, Show, Generic) + +instance NFData StaticVal data StaticUnboxed = StaticUnboxedBool !Bool @@ -194,13 +211,17 @@ data StaticUnboxed | StaticUnboxedDouble !SaneDouble | StaticUnboxedString !BS.ByteString | StaticUnboxedStringOffset !BS.ByteString - deriving (Eq, Ord) + deriving stock (Eq, Ord, Show, Generic) + +instance NFData StaticUnboxed data StaticArg = StaticObjArg !ShortText -- ^ reference to a heap object | StaticLitArg !StaticLit -- ^ literal | StaticConArg !ShortText [StaticArg] -- ^ unfloated constructor - deriving (Eq, Ord, Show) + deriving stock (Eq, Ord, Show, Generic) + +instance NFData StaticArg instance Outputable StaticArg where ppr x = text (show x) @@ -213,11 +234,23 @@ data StaticLit | StringLit !ShortText | BinLit !BS.ByteString | LabelLit !Bool !ShortText -- ^ is function pointer, label (also used for string / binary init) - deriving (Eq, Ord, Show) + deriving (Eq, Ord, Show, Generic) + +instance NFData StaticLit instance Outputable StaticLit where ppr x = text (show x) + +instance ToJExpr StaticLit where + toJExpr (BoolLit b) = toJExpr b + toJExpr (IntLit i) = toJExpr i + toJExpr NullLit = null_ + toJExpr (DoubleLit d) = toJExpr (unSaneDouble d) + toJExpr (StringLit t) = app (pack "h$str") [toJExpr t] + toJExpr (BinLit b) = app (pack "h$rstr") [toJExpr (map toInteger (BS.unpack b))] + toJExpr (LabelLit _isFun lbl) = var lbl + data ForeignJSRef = ForeignJSRef { foreignRefSrcSpan :: !ShortText , foreignRefPattern :: !ShortText @@ -225,7 +258,7 @@ data ForeignJSRef = ForeignJSRef , foreignRefCConv :: !CCallConv , foreignRefArgs :: ![ShortText] , foreignRefResult :: !ShortText - } + } deriving stock (Generic) -- | data used to generate one ObjUnit in our object file data LinkableUnit = LinkableUnit @@ -268,8 +301,6 @@ data ExprResult newtype ExprValData = ExprValData [JExpr] deriving newtype (Eq, Ord, Show) - - -- closure types data ClosureType = Thunk | Fun | Pap | Con | Blackhole | StackFrame deriving (Show, Eq, Ord, Enum, Bounded) diff --git a/compiler/GHC/StgToJS/UnitUtils.hs b/compiler/GHC/StgToJS/UnitUtils.hs index 61886f43f0..75c3c616d5 100644 --- a/compiler/GHC/StgToJS/UnitUtils.hs +++ b/compiler/GHC/StgToJS/UnitUtils.hs @@ -4,13 +4,22 @@ module GHC.StgToJS.UnitUtils ( unitModuleString , moduleGlobalSymbol , moduleExportsSymbol - ) -where + , getPackageName + , encodeModule + , ghcjsPrimUnit + , ghcjsThUnit + ) where import GHC.Data.ShortText as ST import GHC.Unit.Module +import GHC.Unit.Info +import GHC.Unit.State +import GHC.Unit.Env +import GHC.Unit.Home import GHC.Utils.Encoding +import GHC.Driver.Env.Types +import GHC.Driver.Session import GHC.Prelude @@ -36,3 +45,75 @@ moduleExportsSymbol m = mconcat , ST.pack (zEncodeString $ unitModuleString m) , "_<exports>" ] + +-- FIXME: Use FastString +encodeModule :: HscEnv -> Module -> String +encodeModule env k + | isGhcjsPrimUnit env (moduleUnitId k) = "ghcjs-prim" + | isGhcjsThUnit env (moduleUnitId k) = "ghcjs-th" + | otherwise = unitModuleString k + +{- + some packages are wired into GHCJS, but not GHC + make sure we don't version them in the output + since the RTS uses thins from them +-} + +-- FIXME: Jeff (2022,03): I've swapped DynFlags for HscEnv to gain access to the +-- UnitState for these checks. Unsure if this is a great idea or even workable. +-- In either case it will proliferate DynFlags throughout the Linker. So the fix +-- should be to add flags to the Linker config so we do not need to carry HscEnv +-- or DynFlags around. +isGhcjsPrimUnit :: HscEnv -> UnitId -> Bool +isGhcjsPrimUnit env pkgKey + = pn == "ghcjs-prim" || -- FIXME: Jeff (2022,03): use UnitID only instead of + -- a hacky String comparison, same for + -- @isGhcjsThUnit@ + (GHC.Prelude.null pn && pkgKey == home_uid && + elem "-DBOOTING_PACKAGE=ghcjs-prim" (opt_P $ hsc_dflags env)) + where + pn = unitIdString . ue_current_unit $ hsc_unit_env env + -- FIXME: Jeff (2022,03): remove call to unsafe. Only using this because I + -- am unsure when exactly the home unit for the GhcJS prims gets + -- instantiated + home_uid = homeUnitId . ue_unsafeHomeUnit $ hsc_unit_env env + +isGhcjsThUnit :: HscEnv -> UnitId -> Bool +isGhcjsThUnit env pkgKey + = pn == "ghcjs-th" || + (GHC.Prelude.null pn && pkgKey == home_uid && + elem "-DBOOTING_PACKAGE=ghcjs-th" (opt_P $ hsc_dflags env)) + where + home_uid = homeUnitId . ue_unsafeHomeUnit $ hsc_unit_env env + pn = unitIdString . ue_current_unit $ hsc_unit_env env + +-- FIXME: Jeff (2022,03): These return a UnitId, but I think they should be +-- @RealUnit (Definite UnitId). Per the description of @GenUnit@ in +-- Ghc.Unit.Types: a RealUnit is a UnitId that is closed or fully instantiated. +-- These should be fully instantiated, and Definite. See Note [Wired-in units] +-- in GHC.Unit.Types for a similar scenario for the NCG +ghcjsPrimUnit :: UnitState -> UnitId +ghcjsPrimUnit env = + case prims of + ((_,k):_) -> k + _ -> error "Package `ghcjs-prim' is required to link executables" + where + prims = filter ((=="ghcjs-prim").fst) + (searchModule env (mkModuleName "GHCJS.Prim")) + +ghcjsThUnit :: UnitState -> UnitId +ghcjsThUnit env = + case prims of + ((_,k):_) -> k + _ -> error "Package `ghcjs-th' is required to link executables" + where + prims = filter ((=="ghcjs-th").fst) + (searchModule env (mkModuleName "GHCJS.Prim.TH.Eval")) + +searchModule :: UnitState -> ModuleName -> [(String, UnitId)] +searchModule env = + fmap ((\k -> (getPackageName env k, k)) . moduleUnitId . fst) + . lookupModuleInAllUnits env + +getPackageName :: UnitState -> UnitId -> String +getPackageName u_st = maybe "" unitPackageNameString . lookupUnitId u_st diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index a9f9570e1e..9be8f83da0 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -642,6 +642,7 @@ Library GHC.StgToJS.Object GHC.StgToJS.Prim GHC.StgToJS.Profiling + GHC.StgToJS.Printer GHC.StgToJS.Regs GHC.StgToJS.Rts.Types GHC.StgToJS.Rts.Rts @@ -651,6 +652,12 @@ Library GHC.StgToJS.Types GHC.StgToJS.UnitUtils GHC.StgToJS.Utils + GHC.StgToJS.Linker.Compactor + GHC.StgToJS.Linker.Dynamic + GHC.StgToJS.Linker.Linker + GHC.StgToJS.Linker.Types + GHC.StgToJS.Linker.Utils + GHC.StgToJS.Linker.Archive GHC.Stg.Unarise GHC.SysTools GHC.SysTools.Ar |