diff options
author | Bartosz Nitka <niteria@gmail.com> | 2015-09-21 19:30:41 -0500 |
---|---|---|
committer | Austin Seipp <austin@well-typed.com> | 2015-09-21 19:31:44 -0500 |
commit | d4d34a73aacc225a8f28d7138137bf548c9e51cc (patch) | |
tree | 567269a18feeda56eb136d93a6817a3b66fc29e7 | |
parent | 0b852fcf74c65291aeb6357973ecb715735d6383 (diff) | |
download | haskell-d4d34a73aacc225a8f28d7138137bf548c9e51cc.tar.gz |
Make derived names deterministic
The names of auxiliary bindings end up in the interface file, and since uniques
are nondeterministic, we end up with nondeterministic interface files.
This uses the package and module name in the generated name, so I believe it
should avoid problems from #7947 and be deterministic as well.
The generated names look like this now:
`$cLrlbmVwI3gpI8G2E6Hg3mO`
and with `-ppr-debug`:
`$c$aeson_70dylHtv1FFGeai1IoxcQr$Data.Aeson.Types.Internal$String`.
Reviewed By: simonmar, austin, ezyang
Differential Revision: https://phabricator.haskell.org/D1133
GHC Trac Issues: #4012
-rw-r--r-- | compiler/backpack/ShPackageKey.hs | 45 | ||||
-rw-r--r-- | compiler/basicTypes/Module.hs | 9 | ||||
-rw-r--r-- | compiler/basicTypes/Name.hs | 17 | ||||
-rw-r--r-- | compiler/typecheck/TcGenDeriv.hs | 34 | ||||
-rw-r--r-- | compiler/utils/Encoding.hs | 51 |
5 files changed, 97 insertions, 59 deletions
diff --git a/compiler/backpack/ShPackageKey.hs b/compiler/backpack/ShPackageKey.hs index 9fc44ae5cb..f0d7c6575c 100644 --- a/compiler/backpack/ShPackageKey.hs +++ b/compiler/backpack/ShPackageKey.hs @@ -17,6 +17,7 @@ module ShPackageKey( import Module import Packages +import Encoding import FastString import UniqFM import UniqSet @@ -26,11 +27,8 @@ import DynFlags import System.IO.Unsafe ( unsafePerformIO ) import Control.Monad -import Numeric import Data.IORef import GHC.Fingerprint -import Data.Word -import qualified Data.Char as Char import Data.List import Data.Function @@ -237,44 +235,7 @@ canonicalizeModule dflags m = do | Just m' <- lookup (moduleName m) insts -> m' _ -> m -{- -************************************************************************ -* * - Base 62 -* * -************************************************************************ --} - --------------------------------------------------------------------------- --- Base 62 - --- The base-62 code is based off of 'locators' --- ((c) Operational Dynamics Consulting, BSD3 licensed) - --- Note: Instead of base-62 encoding a single 128-bit integer --- (ceil(21.49) characters), we'll base-62 a pair of 64-bit integers --- (2 * ceil(10.75) characters). Luckily for us, it's the same number of --- characters! In the long term, this should go in GHC.Fingerprint, --- but not now... - --- | Size of a 64-bit word when written as a base-62 string -word64Base62Len :: Int -word64Base62Len = 11 - --- | Converts a 64-bit word into a base-62 string -toBase62 :: Word64 -> String -toBase62 w = pad ++ str - where - pad = replicate len '0' - len = word64Base62Len - length str -- 11 == ceil(64 / lg 62) - str = showIntAtBase 62 represent w "" - represent :: Int -> Char - represent x - | x < 10 = Char.chr (48 + x) - | x < 36 = Char.chr (65 + x - 10) - | x < 62 = Char.chr (97 + x - 36) - | otherwise = error ("represent (base 62): impossible!") - fingerprintPackageKey :: Fingerprint -> PackageKey fingerprintPackageKey (Fingerprint a b) - = stringToPackageKey (toBase62 a ++ toBase62 b) + = stringToPackageKey (toBase62Padded a ++ toBase62Padded b) + -- See Note [Base 62 encoding 128-bit integers] diff --git a/compiler/basicTypes/Module.hs b/compiler/basicTypes/Module.hs index 2c60463c04..7725633447 100644 --- a/compiler/basicTypes/Module.hs +++ b/compiler/basicTypes/Module.hs @@ -10,6 +10,7 @@ the keys. -} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE RecordWildCards #-} module Module ( @@ -19,6 +20,7 @@ module Module moduleNameFS, moduleNameString, moduleNameSlashes, moduleNameColons, + moduleStableString, mkModuleName, mkModuleNameFS, stableModuleNameCmp, @@ -209,6 +211,13 @@ moduleNameFS (ModuleName mod) = mod moduleNameString :: ModuleName -> String moduleNameString (ModuleName mod) = unpackFS mod +-- | Get a string representation of a 'Module' that's unique and stable +-- across recompilations. +-- eg. "$aeson_70dylHtv1FFGeai1IoxcQr$Data.Aeson.Types.Internal" +moduleStableString :: Module -> String +moduleStableString Module{..} = + "$" ++ packageKeyString modulePackageKey ++ "$" ++ moduleNameString moduleName + mkModuleName :: String -> ModuleName mkModuleName s = ModuleName (mkFastString s) diff --git a/compiler/basicTypes/Name.hs b/compiler/basicTypes/Name.hs index 79f14ab93c..46c23b91bf 100644 --- a/compiler/basicTypes/Name.hs +++ b/compiler/basicTypes/Name.hs @@ -6,6 +6,7 @@ -} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE RecordWildCards #-} -- | -- #name_types# @@ -70,6 +71,7 @@ module Name ( getSrcLoc, getSrcSpan, getOccString, pprInfixName, pprPrefixName, pprModulePrefix, + nameStableString, -- Re-export the OccName stuff module OccName @@ -598,6 +600,21 @@ pprNameDefnLoc name | otherwise -> ptext (sLit "in") <+> quotes (ppr (nameModule name)) + +-- | Get a string representation of a 'Name' that's unique and stable +-- across recompilations. Used for deterministic generation of binds for +-- derived instances. +-- eg. "$aeson_70dylHtv1FFGeai1IoxcQr$Data.Aeson.Types.Internal$String" +nameStableString :: Name -> String +nameStableString Name{..} = + nameSortStableString n_sort ++ "$" ++ occNameString n_occ + +nameSortStableString :: NameSort -> String +nameSortStableString System = "$_sys" +nameSortStableString Internal = "$_in" +nameSortStableString (External mod) = moduleStableString mod +nameSortStableString (WiredIn mod _ _) = moduleStableString mod + {- ************************************************************************ * * diff --git a/compiler/typecheck/TcGenDeriv.hs b/compiler/typecheck/TcGenDeriv.hs index 4a1ce4f815..5f6a021a4c 100644 --- a/compiler/typecheck/TcGenDeriv.hs +++ b/compiler/typecheck/TcGenDeriv.hs @@ -36,6 +36,8 @@ import RdrName import BasicTypes import DataCon import Name +import Fingerprint +import Encoding import DynFlags import PrelInfo @@ -2295,20 +2297,20 @@ mkAuxBinderName :: Name -> (OccName -> OccName) -> RdrName -- ^ Make a top-level binder name for an auxiliary binding for a parent name -- See Note [Auxiliary binders] mkAuxBinderName parent occ_fun - = mkRdrUnqual (occ_fun uniq_parent_occ) + = mkRdrUnqual (occ_fun stable_parent_occ) where - uniq_parent_occ = mkOccName (occNameSpace parent_occ) uniq_string - - uniq_string - | opt_PprStyle_Debug - = showSDocUnsafe (ppr parent_occ <> underscore <> ppr parent_uniq) - | otherwise - = show parent_uniq - -- The debug thing is just to generate longer, but perhaps more perspicuous, names - - parent_uniq = nameUnique parent + stable_parent_occ = mkOccName (occNameSpace parent_occ) stable_string + stable_string + | opt_PprStyle_Debug = parent_stable + | otherwise = parent_stable_hash + parent_stable = nameStableString parent + parent_stable_hash = + let Fingerprint high low = fingerprintString parent_stable + in toBase62 high ++ toBase62Padded low + -- See Note [Base 62 encoding 128-bit integers] parent_occ = nameOccName parent + {- Note [Auxiliary binders] ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2325,12 +2327,12 @@ generating RdrNames here. We can't just use the TyCon or DataCon to distinguish because with standalone deriving two imported TyCons might both be called T! (See Trac #7947.) -So we use the *unique* from the parent name (T in this example) as part of the -OccName we generate for the new binding. +So we use package name, module name and the name of the parent +(T in this example) as part of the OccName we generate for the new binding. +To make the symbol names short we take a base62 hash of the full name. -In the past we used mkDerivedRdrName name occ_fun, which made an original name -But: (a) that does not work well for standalone-deriving either - (b) an unqualified name is just fine, provided it can't clash with user code +In the past we used the *unique* from the parent, but that's not stable across +recompilations as uniques are nondeterministic. Note [DeriveFoldable with ExistentialQuantification] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs index c8dcea24a7..712de6ca82 100644 --- a/compiler/utils/Encoding.hs +++ b/compiler/utils/Encoding.hs @@ -25,11 +25,16 @@ module Encoding ( -- * Z-encoding zEncodeString, - zDecodeString + zDecodeString, + + -- * Base62-encoding + toBase62, + toBase62Padded ) where import Foreign import Data.Char +import qualified Data.Char as Char import Numeric import GHC.Exts @@ -385,3 +390,47 @@ maybe_tuple _ = Nothing count_commas :: Int -> String -> (Int, String) count_commas n (',' : cs) = count_commas (n+1) cs count_commas n cs = (n,cs) + + +{- +************************************************************************ +* * + Base 62 +* * +************************************************************************ + +Note [Base 62 encoding 128-bit integers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Instead of base-62 encoding a single 128-bit integer +(ceil(21.49) characters), we'll base-62 a pair of 64-bit integers +(2 * ceil(10.75) characters). Luckily for us, it's the same number of +characters! +-} + +-------------------------------------------------------------------------- +-- Base 62 + +-- The base-62 code is based off of 'locators' +-- ((c) Operational Dynamics Consulting, BSD3 licensed) + +-- | Size of a 64-bit word when written as a base-62 string +word64Base62Len :: Int +word64Base62Len = 11 + +-- | Converts a 64-bit word into a base-62 string +toBase62Padded :: Word64 -> String +toBase62Padded w = pad ++ str + where + pad = replicate len '0' + len = word64Base62Len - length str -- 11 == ceil(64 / lg 62) + str = toBase62 w + +toBase62 :: Word64 -> String +toBase62 w = showIntAtBase 62 represent w "" + where + represent :: Int -> Char + represent x + | x < 10 = Char.chr (48 + x) + | x < 36 = Char.chr (65 + x - 10) + | x < 62 = Char.chr (97 + x - 36) + | otherwise = error "represent (base 62): impossible!" |