diff options
-rw-r--r-- | compiler/backpack/ShPackageKey.hs | 45 | ||||
-rw-r--r-- | compiler/basicTypes/Module.hs | 9 | ||||
-rw-r--r-- | compiler/basicTypes/Name.hs | 17 | ||||
-rw-r--r-- | compiler/typecheck/TcGenDeriv.hs | 34 | ||||
-rw-r--r-- | compiler/utils/Encoding.hs | 51 |
5 files changed, 97 insertions, 59 deletions
diff --git a/compiler/backpack/ShPackageKey.hs b/compiler/backpack/ShPackageKey.hs index 9fc44ae5cb..f0d7c6575c 100644 --- a/compiler/backpack/ShPackageKey.hs +++ b/compiler/backpack/ShPackageKey.hs @@ -17,6 +17,7 @@ module ShPackageKey( import Module import Packages +import Encoding import FastString import UniqFM import UniqSet @@ -26,11 +27,8 @@ import DynFlags import System.IO.Unsafe ( unsafePerformIO ) import Control.Monad -import Numeric import Data.IORef import GHC.Fingerprint -import Data.Word -import qualified Data.Char as Char import Data.List import Data.Function @@ -237,44 +235,7 @@ canonicalizeModule dflags m = do | Just m' <- lookup (moduleName m) insts -> m' _ -> m -{- -************************************************************************ -* * - Base 62 -* * -************************************************************************ --} - --------------------------------------------------------------------------- --- Base 62 - --- The base-62 code is based off of 'locators' --- ((c) Operational Dynamics Consulting, BSD3 licensed) - --- Note: Instead of base-62 encoding a single 128-bit integer --- (ceil(21.49) characters), we'll base-62 a pair of 64-bit integers --- (2 * ceil(10.75) characters). Luckily for us, it's the same number of --- characters! In the long term, this should go in GHC.Fingerprint, --- but not now... - --- | Size of a 64-bit word when written as a base-62 string -word64Base62Len :: Int -word64Base62Len = 11 - --- | Converts a 64-bit word into a base-62 string -toBase62 :: Word64 -> String -toBase62 w = pad ++ str - where - pad = replicate len '0' - len = word64Base62Len - length str -- 11 == ceil(64 / lg 62) - str = showIntAtBase 62 represent w "" - represent :: Int -> Char - represent x - | x < 10 = Char.chr (48 + x) - | x < 36 = Char.chr (65 + x - 10) - | x < 62 = Char.chr (97 + x - 36) - | otherwise = error ("represent (base 62): impossible!") - fingerprintPackageKey :: Fingerprint -> PackageKey fingerprintPackageKey (Fingerprint a b) - = stringToPackageKey (toBase62 a ++ toBase62 b) + = stringToPackageKey (toBase62Padded a ++ toBase62Padded b) + -- See Note [Base 62 encoding 128-bit integers] diff --git a/compiler/basicTypes/Module.hs b/compiler/basicTypes/Module.hs index 2c60463c04..7725633447 100644 --- a/compiler/basicTypes/Module.hs +++ b/compiler/basicTypes/Module.hs @@ -10,6 +10,7 @@ the keys. -} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE RecordWildCards #-} module Module ( @@ -19,6 +20,7 @@ module Module moduleNameFS, moduleNameString, moduleNameSlashes, moduleNameColons, + moduleStableString, mkModuleName, mkModuleNameFS, stableModuleNameCmp, @@ -209,6 +211,13 @@ moduleNameFS (ModuleName mod) = mod moduleNameString :: ModuleName -> String moduleNameString (ModuleName mod) = unpackFS mod +-- | Get a string representation of a 'Module' that's unique and stable +-- across recompilations. +-- eg. "$aeson_70dylHtv1FFGeai1IoxcQr$Data.Aeson.Types.Internal" +moduleStableString :: Module -> String +moduleStableString Module{..} = + "$" ++ packageKeyString modulePackageKey ++ "$" ++ moduleNameString moduleName + mkModuleName :: String -> ModuleName mkModuleName s = ModuleName (mkFastString s) diff --git a/compiler/basicTypes/Name.hs b/compiler/basicTypes/Name.hs index 79f14ab93c..46c23b91bf 100644 --- a/compiler/basicTypes/Name.hs +++ b/compiler/basicTypes/Name.hs @@ -6,6 +6,7 @@ -} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE RecordWildCards #-} -- | -- #name_types# @@ -70,6 +71,7 @@ module Name ( getSrcLoc, getSrcSpan, getOccString, pprInfixName, pprPrefixName, pprModulePrefix, + nameStableString, -- Re-export the OccName stuff module OccName @@ -598,6 +600,21 @@ pprNameDefnLoc name | otherwise -> ptext (sLit "in") <+> quotes (ppr (nameModule name)) + +-- | Get a string representation of a 'Name' that's unique and stable +-- across recompilations. Used for deterministic generation of binds for +-- derived instances. +-- eg. "$aeson_70dylHtv1FFGeai1IoxcQr$Data.Aeson.Types.Internal$String" +nameStableString :: Name -> String +nameStableString Name{..} = + nameSortStableString n_sort ++ "$" ++ occNameString n_occ + +nameSortStableString :: NameSort -> String +nameSortStableString System = "$_sys" +nameSortStableString Internal = "$_in" +nameSortStableString (External mod) = moduleStableString mod +nameSortStableString (WiredIn mod _ _) = moduleStableString mod + {- ************************************************************************ * * diff --git a/compiler/typecheck/TcGenDeriv.hs b/compiler/typecheck/TcGenDeriv.hs index 4a1ce4f815..5f6a021a4c 100644 --- a/compiler/typecheck/TcGenDeriv.hs +++ b/compiler/typecheck/TcGenDeriv.hs @@ -36,6 +36,8 @@ import RdrName import BasicTypes import DataCon import Name +import Fingerprint +import Encoding import DynFlags import PrelInfo @@ -2295,20 +2297,20 @@ mkAuxBinderName :: Name -> (OccName -> OccName) -> RdrName -- ^ Make a top-level binder name for an auxiliary binding for a parent name -- See Note [Auxiliary binders] mkAuxBinderName parent occ_fun - = mkRdrUnqual (occ_fun uniq_parent_occ) + = mkRdrUnqual (occ_fun stable_parent_occ) where - uniq_parent_occ = mkOccName (occNameSpace parent_occ) uniq_string - - uniq_string - | opt_PprStyle_Debug - = showSDocUnsafe (ppr parent_occ <> underscore <> ppr parent_uniq) - | otherwise - = show parent_uniq - -- The debug thing is just to generate longer, but perhaps more perspicuous, names - - parent_uniq = nameUnique parent + stable_parent_occ = mkOccName (occNameSpace parent_occ) stable_string + stable_string + | opt_PprStyle_Debug = parent_stable + | otherwise = parent_stable_hash + parent_stable = nameStableString parent + parent_stable_hash = + let Fingerprint high low = fingerprintString parent_stable + in toBase62 high ++ toBase62Padded low + -- See Note [Base 62 encoding 128-bit integers] parent_occ = nameOccName parent + {- Note [Auxiliary binders] ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2325,12 +2327,12 @@ generating RdrNames here. We can't just use the TyCon or DataCon to distinguish because with standalone deriving two imported TyCons might both be called T! (See Trac #7947.) -So we use the *unique* from the parent name (T in this example) as part of the -OccName we generate for the new binding. +So we use package name, module name and the name of the parent +(T in this example) as part of the OccName we generate for the new binding. +To make the symbol names short we take a base62 hash of the full name. -In the past we used mkDerivedRdrName name occ_fun, which made an original name -But: (a) that does not work well for standalone-deriving either - (b) an unqualified name is just fine, provided it can't clash with user code +In the past we used the *unique* from the parent, but that's not stable across +recompilations as uniques are nondeterministic. Note [DeriveFoldable with ExistentialQuantification] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/utils/Encoding.hs b/compiler/utils/Encoding.hs index c8dcea24a7..712de6ca82 100644 --- a/compiler/utils/Encoding.hs +++ b/compiler/utils/Encoding.hs @@ -25,11 +25,16 @@ module Encoding ( -- * Z-encoding zEncodeString, - zDecodeString + zDecodeString, + + -- * Base62-encoding + toBase62, + toBase62Padded ) where import Foreign import Data.Char +import qualified Data.Char as Char import Numeric import GHC.Exts @@ -385,3 +390,47 @@ maybe_tuple _ = Nothing count_commas :: Int -> String -> (Int, String) count_commas n (',' : cs) = count_commas (n+1) cs count_commas n cs = (n,cs) + + +{- +************************************************************************ +* * + Base 62 +* * +************************************************************************ + +Note [Base 62 encoding 128-bit integers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Instead of base-62 encoding a single 128-bit integer +(ceil(21.49) characters), we'll base-62 a pair of 64-bit integers +(2 * ceil(10.75) characters). Luckily for us, it's the same number of +characters! +-} + +-------------------------------------------------------------------------- +-- Base 62 + +-- The base-62 code is based off of 'locators' +-- ((c) Operational Dynamics Consulting, BSD3 licensed) + +-- | Size of a 64-bit word when written as a base-62 string +word64Base62Len :: Int +word64Base62Len = 11 + +-- | Converts a 64-bit word into a base-62 string +toBase62Padded :: Word64 -> String +toBase62Padded w = pad ++ str + where + pad = replicate len '0' + len = word64Base62Len - length str -- 11 == ceil(64 / lg 62) + str = toBase62 w + +toBase62 :: Word64 -> String +toBase62 w = showIntAtBase 62 represent w "" + where + represent :: Int -> Char + represent x + | x < 10 = Char.chr (48 + x) + | x < 36 = Char.chr (65 + x - 10) + | x < 62 = Char.chr (97 + x - 36) + | otherwise = error "represent (base 62): impossible!" |