summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZubin Duggal <zubin@cmi.ac.in>2019-05-23 17:13:33 +0530
committerMatthew Pickering <matthewtpickering@gmail.com>2019-05-31 07:34:57 +0100
commit0e0d87da2fd25e2fb255417fcb15f93f508c1250 (patch)
tree38966454a6f12a01773c2c92fe7437e3fb37095b
parent495a65cbc48d5209f30fd4248fc11ab06b05d4c3 (diff)
downloadhaskell-0e0d87da2fd25e2fb255417fcb15f93f508c1250.tar.gz
Fix and enforce validation of header for .hie files
Implements #16686 The files version is automatically generated from the current GHC version in the same manner as normal interface files. This means that clients can first read the version and then decide how to read the rest of the file.
-rw-r--r--compiler/hieFile/HieAst.hs9
-rw-r--r--compiler/hieFile/HieBin.hs123
-rw-r--r--compiler/hieFile/HieDebug.hs3
-rw-r--r--compiler/hieFile/HieTypes.hs23
-rw-r--r--compiler/main/HscMain.hs4
m---------utils/haddock0
6 files changed, 134 insertions, 28 deletions
diff --git a/compiler/hieFile/HieAst.hs b/compiler/hieFile/HieAst.hs
index 4f1b2a3a6d..7c3ceb6138 100644
--- a/compiler/hieFile/HieAst.hs
+++ b/compiler/hieFile/HieAst.hs
@@ -1,3 +1,6 @@
+{-
+Main functions for .hie file generation
+-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE UndecidableInstances #-}
@@ -20,7 +23,6 @@ import BooleanFormula
import Class ( FunDep )
import CoreUtils ( exprType )
import ConLike ( conLikeName )
-import Config ( cProjectVersion )
import Desugar ( deSugarExpr )
import FieldLabel
import HsSyn
@@ -42,7 +44,6 @@ import HieUtils
import qualified Data.Array as A
import qualified Data.ByteString as BS
-import qualified Data.ByteString.Char8 as BSC
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Data ( Data, Typeable )
@@ -98,9 +99,7 @@ mkHieFile ms ts rs = do
let Just src_file = ml_hs_file $ ms_location ms
src <- liftIO $ BS.readFile src_file
return $ HieFile
- { hie_version = curHieVersion
- , hie_ghc_version = BSC.pack cProjectVersion
- , hie_hs_file = src_file
+ { hie_hs_file = src_file
, hie_module = ms_mod ms
, hie_types = arr
, hie_asts = asts'
diff --git a/compiler/hieFile/HieBin.hs b/compiler/hieFile/HieBin.hs
index 2734a9fce9..6c72dca034 100644
--- a/compiler/hieFile/HieBin.hs
+++ b/compiler/hieFile/HieBin.hs
@@ -1,8 +1,11 @@
+{-
+Binary serialization for .hie files.
+-}
{-# LANGUAGE ScopedTypeVariables #-}
-module HieBin ( readHieFile, writeHieFile, HieName(..), toHieName ) where
+module HieBin ( readHieFile, readHieFileWithVersion, HieHeader, writeHieFile, HieName(..), toHieName, HieFileResult(..), hieMagic) where
+import Config ( cProjectVersion )
import GhcPrelude
-
import Binary
import BinIface ( getDictFastString )
import FastMutInt
@@ -14,17 +17,23 @@ import Outputable
import PrelInfo
import SrcLoc
import UniqSupply ( takeUniqFromSupply )
+import Util ( maybeRead )
import Unique
import UniqFM
import qualified Data.Array as A
import Data.IORef
+import Data.ByteString ( ByteString )
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as BSC
import Data.List ( mapAccumR )
-import Data.Word ( Word32 )
-import Control.Monad ( replicateM )
+import Data.Word ( Word8, Word32 )
+import Control.Monad ( replicateM, when )
import System.Directory ( createDirectoryIfMissing )
import System.FilePath ( takeDirectory )
+import HieTypes
+
-- | `Name`'s get converted into `HieName`'s before being written into @.hie@
-- files. See 'toHieName' and 'fromHieName' for logic on how to convert between
-- these two types.
@@ -63,10 +72,33 @@ data HieDictionary = HieDictionary
initBinMemSize :: Int
initBinMemSize = 1024*1024
-writeHieFile :: Binary a => FilePath -> a -> IO ()
+-- | The header for HIE files - Capital ASCII letters "HIE".
+hieMagic :: [Word8]
+hieMagic = [72,73,69]
+
+hieMagicLen :: Int
+hieMagicLen = length hieMagic
+
+ghcVersion :: ByteString
+ghcVersion = BSC.pack cProjectVersion
+
+putBinLine :: BinHandle -> ByteString -> IO ()
+putBinLine bh xs = do
+ mapM_ (putByte bh) $ BS.unpack xs
+ putByte bh 10 -- newline char
+
+-- | Write a `HieFile` to the given `FilePath`, with a proper header and
+-- symbol tables for `Name`s and `FastString`s
+writeHieFile :: FilePath -> HieFile -> IO ()
writeHieFile hie_file_path hiefile = do
bh0 <- openBinMem initBinMemSize
+ -- Write the header: hieHeader followed by the
+ -- hieVersion and the GHC version used to generate this file
+ mapM_ (putByte bh0) hieMagic
+ putBinLine bh0 $ BSC.pack $ show hieVersion
+ putBinLine bh0 $ ghcVersion
+
-- remember where the dictionary pointer will go
dict_p_p <- tellBin bh0
put_ bh0 dict_p_p
@@ -105,7 +137,7 @@ writeHieFile hie_file_path hiefile = do
symtab_map' <- readIORef symtab_map
putSymbolTable bh symtab_next' symtab_map'
- -- write the dictionary pointer at the fornt of the file
+ -- write the dictionary pointer at the front of the file
dict_p <- tellBin bh
putAt bh dict_p_p dict_p
seekBin bh dict_p
@@ -120,10 +152,87 @@ writeHieFile hie_file_path hiefile = do
writeBinMem bh hie_file_path
return ()
-readHieFile :: Binary a => NameCache -> FilePath -> IO (a, NameCache)
+data HieFileResult
+ = HieFileResult
+ { hie_file_result_version :: Integer
+ , hie_file_result_ghc_version :: ByteString
+ , hie_file_result :: HieFile
+ }
+
+type HieHeader = (Integer, ByteString)
+
+-- | Read a `HieFile` from a `FilePath`. Can use
+-- an existing `NameCache`. Allows you to specify
+-- which versions of hieFile to attempt to read.
+-- `Left` case returns the failing header versions.
+readHieFileWithVersion :: (HieHeader -> Bool) -> NameCache -> FilePath -> IO (Either HieHeader (HieFileResult, NameCache))
+readHieFileWithVersion readVersion nc file = do
+ bh0 <- readBinMem file
+
+ (hieVersion, ghcVersion) <- readHieFileHeader file bh0
+
+ if readVersion (hieVersion, ghcVersion)
+ then do
+ (hieFile, nc') <- readHieFileContents bh0 nc
+ return $ Right (HieFileResult hieVersion ghcVersion hieFile, nc')
+ else return $ Left (hieVersion, ghcVersion)
+
+
+-- | Read a `HieFile` from a `FilePath`. Can use
+-- an existing `NameCache`.
+readHieFile :: NameCache -> FilePath -> IO (HieFileResult, NameCache)
readHieFile nc file = do
+
bh0 <- readBinMem file
+ (readHieVersion, ghcVersion) <- readHieFileHeader file bh0
+
+ -- Check if the versions match
+ when (readHieVersion /= hieVersion) $
+ panic $ unwords ["readHieFile: hie file versions don't match for file:"
+ , file
+ , "Expected"
+ , show hieVersion
+ , "but got", show readHieVersion
+ ]
+ (hieFile, nc') <- readHieFileContents bh0 nc
+ return $ (HieFileResult hieVersion ghcVersion hieFile, nc')
+
+readBinLine :: BinHandle -> IO ByteString
+readBinLine bh = BS.pack . reverse <$> loop []
+ where
+ loop acc = do
+ char <- get bh :: IO Word8
+ if char == 10 -- ASCII newline '\n'
+ then return acc
+ else loop (char : acc)
+
+readHieFileHeader :: FilePath -> BinHandle -> IO HieHeader
+readHieFileHeader file bh0 = do
+ -- Read the header
+ magic <- replicateM hieMagicLen (get bh0)
+ version <- BSC.unpack <$> readBinLine bh0
+ case maybeRead version of
+ Nothing ->
+ panic $ unwords ["readHieFileHeader: hieVersion isn't an Integer:"
+ , show version
+ ]
+ Just readHieVersion -> do
+ ghcVersion <- readBinLine bh0
+
+ -- Check if the header is valid
+ when (magic /= hieMagic) $
+ panic $ unwords ["readHieFileHeader: headers don't match for file:"
+ , file
+ , "Expected"
+ , show hieMagic
+ , "but got", show magic
+ ]
+ return (readHieVersion, ghcVersion)
+
+readHieFileContents :: BinHandle -> NameCache -> IO (HieFile, NameCache)
+readHieFileContents bh0 nc = do
+
dict <- get_dictionary bh0
-- read the symbol table so we are capable of reading the actual data
diff --git a/compiler/hieFile/HieDebug.hs b/compiler/hieFile/HieDebug.hs
index 7896cf7720..ffdfe431d3 100644
--- a/compiler/hieFile/HieDebug.hs
+++ b/compiler/hieFile/HieDebug.hs
@@ -1,3 +1,6 @@
+{-
+Functions to validate and check .hie file ASTs generated by GHC.
+-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
diff --git a/compiler/hieFile/HieTypes.hs b/compiler/hieFile/HieTypes.hs
index 1b1d8c5275..7f500a7453 100644
--- a/compiler/hieFile/HieTypes.hs
+++ b/compiler/hieFile/HieTypes.hs
@@ -1,3 +1,8 @@
+{-
+Types for the .hie file format are defined here.
+
+For more information see https://gitlab.haskell.org/ghc/ghc/wikis/hie-files
+-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE TypeSynonymInstances #-}
@@ -7,6 +12,7 @@ module HieTypes where
import GhcPrelude
+import Config
import Binary
import FastString ( FastString )
import IfaceType
@@ -28,8 +34,8 @@ import Control.Applicative ( (<|>) )
type Span = RealSrcSpan
-- | Current version of @.hie@ files
-curHieVersion :: Word8
-curHieVersion = 0
+hieVersion :: Integer
+hieVersion = read (cProjectVersionInt ++ cProjectPatchLevel) :: Integer
{- |
GHC builds up a wealth of information about Haskell source as it compiles it.
@@ -48,13 +54,7 @@ Besides saving compilation cycles, @.hie@ files also offer a more stable
interface than the GHC API.
-}
data HieFile = HieFile
- { hie_version :: Word8
- -- ^ version of the HIE format
-
- , hie_ghc_version :: ByteString
- -- ^ Version of GHC that produced this file
-
- , hie_hs_file :: FilePath
+ { hie_hs_file :: FilePath
-- ^ Initial Haskell source file path
, hie_module :: Module
@@ -74,11 +74,8 @@ data HieFile = HieFile
, hie_hs_src :: ByteString
-- ^ Raw bytes of the initial Haskell source
}
-
instance Binary HieFile where
put_ bh hf = do
- put_ bh $ hie_version hf
- put_ bh $ hie_ghc_version hf
put_ bh $ hie_hs_file hf
put_ bh $ hie_module hf
put_ bh $ hie_types hf
@@ -93,8 +90,6 @@ instance Binary HieFile where
<*> get bh
<*> get bh
<*> get bh
- <*> get bh
- <*> get bh
{-
diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs
index 26d794e819..90b5ef594e 100644
--- a/compiler/main/HscMain.hs
+++ b/compiler/main/HscMain.hs
@@ -174,7 +174,7 @@ import Data.Set (Set)
import HieAst ( mkHieFile )
import HieTypes ( getAsts, hie_asts )
-import HieBin ( readHieFile, writeHieFile )
+import HieBin ( readHieFile, writeHieFile , hie_file_result)
import HieDebug ( diffFile, validateScopes )
#include "HsVersions.h"
@@ -434,7 +434,7 @@ extract_renamed_stuff mod_summary tc_result = do
-- Roundtrip testing
nc <- readIORef $ hsc_NC hs_env
(file', _) <- readHieFile nc out_file
- case diffFile hieFile file' of
+ case diffFile hieFile (hie_file_result file') of
[] ->
putMsg dflags $ text "Got no roundtrip errors"
xs -> do
diff --git a/utils/haddock b/utils/haddock
-Subproject f01473ed28e7c2700ff8e87b00ab87a802c9edd
+Subproject 83bb9870a117f9426e6f6cff6fec3bb6e93a7c1