diff options
author | Edward Z. Yang <ezyang@cs.stanford.edu> | 2015-06-23 13:15:17 -0700 |
---|---|---|
committer | Edward Z. Yang <ezyang@cs.stanford.edu> | 2015-07-23 13:35:45 -0700 |
commit | f9687caf337d409e4735d5bb4cf73a7dc629a58c (patch) | |
tree | 3f4d0bc7fcd74b66ad750eed4d134c4afdcb7803 /compiler | |
parent | 5ff4daddd9bc8f424d8f71fb01ebbbae9d608cdf (diff) | |
download | haskell-f9687caf337d409e4735d5bb4cf73a7dc629a58c.tar.gz |
Library names, with Cabal submodule update
A library name is a package name, package version, and hash of the
version names of all textual dependencies (i.e. packages which were included.) A library
name is a coarse approximation of installed package IDs, which are suitable for
inclusion in package keys (you don't want to put an IPID in a package key, since
it means the key will change any time the source changes.)
- We define ShPackageKey, which is the semantic object which
is hashed into a PackageKey. You can use 'newPackageKey'
to hash a ShPackageKey to a PackageKey
- Given a PackageKey, we can lookup its ShPackageKey with
'lookupPackageKey'. The way we can do this is by consulting
the 'pkgKeyCache', which records a reverse mapping from
every hash to the ShPackageKey. This means that if you
load in PackageKeys from external sources (e.g. interface
files), you also need to load in a mapping of PackageKeys
to their ShPackageKeys so we can populate the cache.
- We define a 'LibraryName' which encapsulates the full
depenency resolution that Cabal may have selected; this
is opaque to GHC but can be used to distinguish different
versions of a package.
- Definite packages don't have an interesting PackageKey,
so we rely on Cabal to pass them to us.
- We can pretty-print package keys while displaying the
instantiation, but it's not wired up to anything (e.g.
the Outputable instance of PackageKey).
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
Test Plan: validate
Reviewers: austin, bgamari
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D1056
GHC Trac Issues: #10566
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/backpack/ShPackageKey.hs | 280 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 2 | ||||
-rw-r--r-- | compiler/main/DynFlags.hs | 43 | ||||
-rw-r--r-- | compiler/main/PackageConfig.hs | 20 | ||||
-rw-r--r-- | compiler/main/Packages.hs | 8 |
5 files changed, 349 insertions, 4 deletions
diff --git a/compiler/backpack/ShPackageKey.hs b/compiler/backpack/ShPackageKey.hs new file mode 100644 index 0000000000..9fc44ae5cb --- /dev/null +++ b/compiler/backpack/ShPackageKey.hs @@ -0,0 +1,280 @@ +{-# LANGUAGE CPP #-} +module ShPackageKey( + ShFreeHoles, + calcModuleFreeHoles, + + newPackageKey, + newPackageKeyWithScope, + lookupPackageKey, + + generalizeHoleModule, + canonicalizeModule, + + pprPackageKey +) where + +#include "HsVersions.h" + +import Module +import Packages +import FastString +import UniqFM +import UniqSet +import Outputable +import Util +import DynFlags + +import System.IO.Unsafe ( unsafePerformIO ) +import Control.Monad +import Numeric +import Data.IORef +import GHC.Fingerprint +import Data.Word +import qualified Data.Char as Char +import Data.List +import Data.Function + +-- NB: didn't put this in Module, that seems a bit too low in the +-- hierarchy, need to refer to DynFlags + +{- +************************************************************************ +* * + Package Keys +* * +************************************************************************ +-} + +-- Note: [PackageKey cache] +-- ~~~~~~~~~~~~~~~~~~~~~~~~ +-- The built-in PackageKey type (used by Module, Name, etc) +-- records the instantiation of the package as an MD5 hash +-- which is not reversible without some extra information. +-- However, the shape merging process requires us to be able +-- to substitute Module occurrences /inside/ the package key. +-- +-- Thus, we maintain the invariant: for every PackageKey +-- in our system, either: +-- +-- 1. It is in the installed package database (lookupPackage) +-- so we can lookup the recorded instantiatedWith +-- 2. We've recorded the associated mapping in the +-- PackageKeyCache. +-- +-- A PackageKey can be expanded into a ShPackageKey which has +-- the instance mapping. In the mapping, we don't bother +-- expanding a 'Module'; depending on 'shPackageKeyFreeHoles', +-- it may not be necessary to do a substitution (you only +-- need to drill down when substituing HOLE:H if H is in scope. + +-- Note: [Module name in scope set] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Similar to InScopeSet, ShFreeHoles is an optimization that +-- allows us to avoid expanding a PackageKey into an ShPackageKey +-- if there isn't actually anything in the module expression that +-- we can substitute. + +-- | Given a Name or Module, the 'ShFreeHoles' contains the set +-- of free variables, i.e. HOLE:A modules, which may be substituted. +-- If this set is empty no substitutions are possible. +type ShFreeHoles = UniqSet ModuleName + +-- | Calculate the free holes of a 'Module'. +calcModuleFreeHoles :: DynFlags -> Module -> IO ShFreeHoles +calcModuleFreeHoles dflags m + | modulePackageKey m == holePackageKey = return (unitUniqSet (moduleName m)) + | otherwise = do + shpk <- lookupPackageKey dflags (modulePackageKey m) + return $ case shpk of + ShDefinitePackageKey{} -> emptyUniqSet + ShPackageKey{ shPackageKeyFreeHoles = in_scope } -> in_scope + +-- | Calculate the free holes of the hole map @[('ModuleName', 'Module')]@. +calcInstsFreeHoles :: DynFlags -> [(ModuleName, Module)] -> IO ShFreeHoles +calcInstsFreeHoles dflags insts = + fmap unionManyUniqSets (mapM (calcModuleFreeHoles dflags . snd) insts) + +-- | Given a 'UnitName', a 'LibraryName', and sorted mapping of holes to +-- their implementations, compute the 'PackageKey' associated with it, as well +-- as the recursively computed 'ShFreeHoles' of holes that may be substituted. +newPackageKeyWithScope :: DynFlags + -> UnitName + -> LibraryName + -> [(ModuleName, Module)] + -> IO (PackageKey, ShFreeHoles) +newPackageKeyWithScope dflags pn vh insts = do + fhs <- calcInstsFreeHoles dflags insts + pk <- newPackageKey' dflags (ShPackageKey pn vh insts fhs) + return (pk, fhs) + +-- | Given a 'UnitName' and sorted mapping of holes to +-- their implementations, compute the 'PackageKey' associated with it. +-- (Analogous to 'newGlobalBinder'). +newPackageKey :: DynFlags + -> UnitName + -> LibraryName + -> [(ModuleName, Module)] + -> IO PackageKey +newPackageKey dflags pn vh insts = do + (pk, _) <- newPackageKeyWithScope dflags pn vh insts + return pk + +-- | Given a 'ShPackageKey', compute the 'PackageKey' associated with it. +-- This function doesn't calculate the 'ShFreeHoles', because it is +-- provided with 'ShPackageKey'. +newPackageKey' :: DynFlags -> ShPackageKey -> IO PackageKey +newPackageKey' _ (ShDefinitePackageKey pk) = return pk +newPackageKey' dflags + shpk@(ShPackageKey pn vh insts fhs) = do + ASSERTM( fmap (==fhs) (calcInstsFreeHoles dflags insts) ) + let pk = mkPackageKey pn vh insts + pkt_var = pkgKeyCache dflags + pk_cache <- readIORef pkt_var + let consistent pk_cache = maybe True (==shpk) (lookupUFM pk_cache pk) + MASSERT( consistent pk_cache ) + when (not (elemUFM pk pk_cache)) $ + atomicModifyIORef' pkt_var (\pk_cache -> + -- Could race, but it's guaranteed to be the same + ASSERT( consistent pk_cache ) (addToUFM pk_cache pk shpk, ())) + return pk + +-- | Given a 'PackageKey', reverse lookup the 'ShPackageKey' associated +-- with it. This only gives useful information for keys which are +-- created using 'newPackageKey' or the associated functions, or that are +-- already in the installed package database, since we generally cannot reverse +-- MD5 hashes. +lookupPackageKey :: DynFlags + -> PackageKey + -> IO ShPackageKey +lookupPackageKey dflags pk + | pk `elem` wiredInPackageKeys + || pk == mainPackageKey + || pk == holePackageKey + = return (ShDefinitePackageKey pk) + | otherwise = do + let pkt_var = pkgKeyCache dflags + pk_cache <- readIORef pkt_var + case lookupUFM pk_cache pk of + Just r -> return r + _ -> return (ShDefinitePackageKey pk) + +pprPackageKey :: PackageKey -> SDoc +pprPackageKey pk = sdocWithDynFlags $ \dflags -> + -- name cache is a memotable + let shpk = unsafePerformIO (lookupPackageKey dflags pk) + in case shpk of + shpk@ShPackageKey{} -> + ppr (shPackageKeyUnitName shpk) <> + parens (hsep + (punctuate comma [ ppUnless (moduleName m == modname) + (ppr modname <+> text "->") + <+> ppr m + | (modname, m) <- shPackageKeyInsts shpk])) + <> ifPprDebug (braces (ftext (packageKeyFS pk))) + ShDefinitePackageKey pk -> ftext (packageKeyFS pk) + +-- NB: newPackageKey and lookupPackageKey are mutually recursive; this +-- recursion is guaranteed to bottom out because you can't set up cycles +-- of PackageKeys. + + +{- +************************************************************************ +* * + Package key hashing +* * +************************************************************************ +-} + +-- | Generates a 'PackageKey'. Don't call this directly; you probably +-- want to cache the result. +mkPackageKey :: UnitName + -> LibraryName + -> [(ModuleName, Module)] -- hole instantiations + -> PackageKey +mkPackageKey (UnitName fsUnitName) + (LibraryName fsLibraryName) unsorted_holes = + -- NB: don't use concatFS here, it's not much of an improvement + fingerprintPackageKey . fingerprintString $ + unpackFS fsUnitName ++ "\n" ++ + unpackFS fsLibraryName ++ "\n" ++ + concat [ moduleNameString m + ++ " " ++ packageKeyString (modulePackageKey b) + ++ ":" ++ moduleNameString (moduleName b) ++ "\n" + | (m, b) <- sortBy (stableModuleNameCmp `on` fst) unsorted_holes] + +-- | Generalize a 'Module' into one where all the holes are indefinite. +-- @p(A -> ...):C@ generalizes to @p(A -> HOLE:A):C@. Useful when +-- you need to figure out if you've already type-checked the generalized +-- version of this module, so you don't have to do the whole rigamarole. +generalizeHoleModule :: DynFlags -> Module -> IO Module +generalizeHoleModule dflags m = do + pk <- generalizeHolePackageKey dflags (modulePackageKey m) + return (mkModule pk (moduleName m)) + +-- | Generalize a 'PackageKey' into one where all the holes are indefinite. +-- @p(A -> q():A) generalizes to p(A -> HOLE:A)@. +generalizeHolePackageKey :: DynFlags -> PackageKey -> IO PackageKey +generalizeHolePackageKey dflags pk = do + shpk <- lookupPackageKey dflags pk + case shpk of + ShDefinitePackageKey _ -> return pk + ShPackageKey { shPackageKeyUnitName = pn, + shPackageKeyLibraryName = vh, + shPackageKeyInsts = insts0 } + -> let insts = map (\(x, _) -> (x, mkModule holePackageKey x)) insts0 + in newPackageKey dflags pn vh insts + +-- | Canonicalize a 'Module' so that it uniquely identifies a module. +-- For example, @p(A -> M):A@ canonicalizes to @M@. Useful for making +-- sure the interface you've loaded as the right @mi_module@. +canonicalizeModule :: DynFlags -> Module -> IO Module +canonicalizeModule dflags m = do + let pk = modulePackageKey m + shpk <- lookupPackageKey dflags pk + return $ case shpk of + ShPackageKey { shPackageKeyInsts = insts } + | Just m' <- lookup (moduleName m) insts -> m' + _ -> m + +{- +************************************************************************ +* * + Base 62 +* * +************************************************************************ +-} + +-------------------------------------------------------------------------- +-- Base 62 + +-- The base-62 code is based off of 'locators' +-- ((c) Operational Dynamics Consulting, BSD3 licensed) + +-- Note: Instead of base-62 encoding a single 128-bit integer +-- (ceil(21.49) characters), we'll base-62 a pair of 64-bit integers +-- (2 * ceil(10.75) characters). Luckily for us, it's the same number of +-- characters! In the long term, this should go in GHC.Fingerprint, +-- but not now... + +-- | Size of a 64-bit word when written as a base-62 string +word64Base62Len :: Int +word64Base62Len = 11 + +-- | Converts a 64-bit word into a base-62 string +toBase62 :: Word64 -> String +toBase62 w = pad ++ str + where + pad = replicate len '0' + len = word64Base62Len - length str -- 11 == ceil(64 / lg 62) + str = showIntAtBase 62 represent w "" + represent :: Int -> Char + represent x + | x < 10 = Char.chr (48 + x) + | x < 36 = Char.chr (65 + x - 10) + | x < 62 = Char.chr (97 + x - 36) + | otherwise = error ("represent (base 62): impossible!") + +fingerprintPackageKey :: Fingerprint -> PackageKey +fingerprintPackageKey (Fingerprint a b) + = stringToPackageKey (toBase62 a ++ toBase62 b) diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 536c536995..28227f32bf 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -124,6 +124,7 @@ Library cbits/genSym.c hs-source-dirs: + backpack basicTypes cmm codeGen @@ -500,6 +501,7 @@ Library Vectorise Hoopl.Dataflow Hoopl + ShPackageKey -- CgInfoTbls used in ghci/DebuggerUtils -- CgHeapery mkVirtHeapOffsets used in ghci diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index ad604c8cfc..74e9bf303d 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -100,6 +100,10 @@ module DynFlags ( parseDynamicFilePragma, parseDynamicFlagsFull, + -- ** Package key cache + PackageKeyCache, + ShPackageKey(..), + -- ** Available DynFlags allFlags, flagsAll, @@ -177,6 +181,8 @@ import Foreign.C ( CInt(..) ) import System.IO.Unsafe ( unsafeDupablePerformIO ) #endif import {-# SOURCE #-} ErrUtils ( Severity(..), MsgDoc, mkLocMessage ) +import UniqFM +import UniqSet import System.IO.Unsafe ( unsafePerformIO ) import Data.IORef @@ -654,6 +660,29 @@ type SigOf = Map ModuleName Module getSigOf :: DynFlags -> ModuleName -> Maybe Module getSigOf dflags n = Map.lookup n (sigOf dflags) +-- NameCache updNameCache +type PackageKeyEnv = UniqFM +type PackageKeyCache = PackageKeyEnv ShPackageKey + +-- | An elaborated representation of a 'PackageKey', which records +-- all of the components that go into the hashed 'PackageKey'. +data ShPackageKey + = ShPackageKey { + shPackageKeyUnitName :: !UnitName, + shPackageKeyLibraryName :: !LibraryName, + shPackageKeyInsts :: ![(ModuleName, Module)], + shPackageKeyFreeHoles :: UniqSet ModuleName + } + | ShDefinitePackageKey { + shPackageKey :: !PackageKey + } + deriving Eq + +instance Outputable ShPackageKey where + ppr (ShPackageKey pn vh insts fh) + = ppr pn <+> ppr vh <+> ppr insts <+> parens (ppr fh) + ppr (ShDefinitePackageKey pk) = ppr pk + -- | Contains not only a collection of 'GeneralFlag's but also a plethora of -- information relating to the compilation of a single file or GHC session data DynFlags = DynFlags { @@ -698,7 +727,10 @@ data DynFlags = DynFlags { solverIterations :: IntWithInf, -- ^ Number of iterations in the constraints solver -- Typically only 1 is needed - thisPackage :: PackageKey, -- ^ name of package currently being compiled + thisPackage :: PackageKey, -- ^ key of package currently being compiled + thisLibraryName :: LibraryName, + -- ^ the version hash which identifies the textual + -- package being compiled. -- ways ways :: [Way], -- ^ Way flags from the command line @@ -785,6 +817,7 @@ data DynFlags = DynFlags { -- Packages.initPackages pkgDatabase :: Maybe [PackageConfig], pkgState :: PackageState, + pkgKeyCache :: {-# UNPACK #-} !(IORef PackageKeyCache), -- Temporary files -- These have to be IORefs, because the defaultCleanupHandler needs to @@ -1437,6 +1470,7 @@ defaultDynFlags mySettings = solverIterations = treatZeroAsInf mAX_SOLVER_ITERATIONS, thisPackage = mainPackageKey, + thisLibraryName = LibraryName nilFS, objectDir = Nothing, dylibInstallName = Nothing, @@ -1482,6 +1516,7 @@ defaultDynFlags mySettings = pkgDatabase = Nothing, -- This gets filled in with GHC.setSessionDynFlags pkgState = emptyPackageState, + pkgKeyCache = v_unsafePkgKeyCache, ways = defaultWays mySettings, buildTag = mkBuildTag (defaultWays mySettings), rtsBuildTag = mkBuildTag (defaultWays mySettings), @@ -2730,6 +2765,7 @@ package_flags = [ upd (setPackageKey name) deprecate "Use -this-package-key instead") , defGhcFlag "this-package-key" (hasArg setPackageKey) + , defGhcFlag "library-name" (hasArg setLibraryName) , defFlag "package-id" (HasArg exposePackageId) , defFlag "package" (HasArg exposePackage) , defFlag "package-key" (HasArg exposePackageKey) @@ -3725,6 +3761,9 @@ exposePackage' p dflags setPackageKey :: String -> DynFlags -> DynFlags setPackageKey p s = s{ thisPackage = stringToPackageKey p } +setLibraryName :: String -> DynFlags -> DynFlags +setLibraryName v s = s{ thisLibraryName = LibraryName (mkFastString v) } + -- ----------------------------------------------------------------------------- -- | Find the package environment (if one exists) -- @@ -4179,6 +4218,8 @@ unsafeGlobalDynFlags = unsafePerformIO $ readIORef v_unsafeGlobalDynFlags setUnsafeGlobalDynFlags :: DynFlags -> IO () setUnsafeGlobalDynFlags = writeIORef v_unsafeGlobalDynFlags +GLOBAL_VAR(v_unsafePkgKeyCache, emptyUFM, PackageKeyCache) + -- ----------------------------------------------------------------------------- -- SSE and AVX diff --git a/compiler/main/PackageConfig.hs b/compiler/main/PackageConfig.hs index 3c41151c11..71a84d8622 100644 --- a/compiler/main/PackageConfig.hs +++ b/compiler/main/PackageConfig.hs @@ -12,13 +12,18 @@ module PackageConfig ( -- * PackageKey packageConfigId, + -- * LibraryName + LibraryName(..), + -- * The PackageConfig type: information about a package PackageConfig, InstalledPackageInfo(..), InstalledPackageId(..), SourcePackageId(..), PackageName(..), + UnitName(..), Version(..), + packageUnitName, defaultPackageConfig, installedPackageIdString, sourcePackageIdString, @@ -54,6 +59,8 @@ type PackageConfig = InstalledPackageInfo newtype InstalledPackageId = InstalledPackageId FastString deriving (Eq, Ord) newtype SourcePackageId = SourcePackageId FastString deriving (Eq, Ord) newtype PackageName = PackageName FastString deriving (Eq, Ord) +newtype UnitName = UnitName FastString deriving (Eq, Ord) +newtype LibraryName = LibraryName FastString deriving (Eq, Ord) instance BinaryStringRep InstalledPackageId where fromStringRep = InstalledPackageId . mkFastStringByteString @@ -67,6 +74,10 @@ instance BinaryStringRep PackageName where fromStringRep = PackageName . mkFastStringByteString toStringRep (PackageName s) = fastStringToByteString s +instance BinaryStringRep LibraryName where + fromStringRep = LibraryName . mkFastStringByteString + toStringRep (LibraryName s) = fastStringToByteString s + instance Uniquable InstalledPackageId where getUnique (InstalledPackageId n) = getUnique n @@ -79,6 +90,12 @@ instance Uniquable PackageName where instance Outputable InstalledPackageId where ppr (InstalledPackageId str) = ftext str +instance Outputable UnitName where + ppr (UnitName str) = ftext str + +instance Outputable LibraryName where + ppr (LibraryName str) = ftext str + instance Outputable SourcePackageId where ppr (SourcePackageId str) = ftext str @@ -172,3 +189,6 @@ pprPackageConfig InstalledPackageInfo {..} = packageConfigId :: PackageConfig -> PackageKey packageConfigId = packageKey +packageUnitName :: PackageConfig -> UnitName +packageUnitName pkg = let PackageName fs = packageName pkg + in UnitName fs diff --git a/compiler/main/Packages.hs b/compiler/main/Packages.hs index 16ee352243..20822476cd 100644 --- a/compiler/main/Packages.hs +++ b/compiler/main/Packages.hs @@ -363,7 +363,7 @@ initPackages dflags = do Nothing -> readPackageConfigs dflags Just db -> return $ setBatchPackageFlags dflags db (pkg_state, preload, this_pkg) - <- mkPackageState dflags pkg_db [] (thisPackage dflags) + <- mkPackageState dflags pkg_db [] return (dflags{ pkgDatabase = Just pkg_db, pkgState = pkg_state, thisPackage = this_pkg }, @@ -885,15 +885,17 @@ mkPackageState :: DynFlags -> [PackageConfig] -- initial database -> [PackageKey] -- preloaded packages - -> PackageKey -- this package -> IO (PackageState, [PackageKey], -- new packages to preload PackageKey) -- this package, might be modified if the current -- package is a wired-in package. -mkPackageState dflags0 pkgs0 preload0 this_package = do +mkPackageState dflags0 pkgs0 preload0 = do dflags <- interpretPackageEnv dflags0 + -- Compute the package key + let this_package = thisPackage dflags + {- Plan. |