diff options
| author | Austin Seipp <austin@well-typed.com> | 2016-05-20 03:25:08 +0000 | 
|---|---|---|
| committer | Austin Seipp <austin@well-typed.com> | 2016-05-21 16:55:16 +0000 | 
| commit | d9cb7a8a94daa4d20aa042cd053e20b491315633 (patch) | |
| tree | aedce747b5202ab49da01a17c8dbebc13e313116 /compiler/utils | |
| parent | a1f3bb8ca454f05fa35cb6b5c64e92f640380802 (diff) | |
| download | haskell-d9cb7a8a94daa4d20aa042cd053e20b491315633.tar.gz | |
compiler/iface: compress .hi files
Compress all interface files generated by the compiler with LZ4. While
being only a tiny amount of code, LZ4 is both fast at compression and
decompression, and has good compression ratios.
Non-scientific size test: size of stage2 compiler .hi files:
`find ./compiler/stage2 -type f -iname '*.hi' -exec du -ch {} + | grep total$`
Without this patch: 22MB of .hi files for stage2.
With this patch: 9.2MB of .hi files for stage2.
Signed-off-by: Austin Seipp <austin@well-typed.com>
Reviewed By: bgamari
Differential Revision: https://phabricator.haskell.org/D1159
Diffstat (limited to 'compiler/utils')
| -rw-r--r-- | compiler/utils/Binary.hs | 37 | ||||
| -rw-r--r-- | compiler/utils/LZ4.hs | 126 | 
2 files changed, 144 insertions, 19 deletions
| diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs index 8800d98f9c..684cdc6bc4 100644 --- a/compiler/utils/Binary.hs +++ b/compiler/utils/Binary.hs @@ -66,18 +66,19 @@ import FastMutInt  import Fingerprint  import BasicTypes  import SrcLoc +import qualified LZ4 as LZ4  import Foreign  import Data.Array  import Data.ByteString (ByteString) +import qualified Data.ByteString          as B  import qualified Data.ByteString.Internal as BS  import qualified Data.ByteString.Unsafe   as BS  import Data.IORef  import Data.Char                ( ord, chr )  import Data.Time  import Data.Typeable -import Control.Monad            ( when ) -import System.IO as IO +import Control.Monad            ( when, liftM )  import System.IO.Unsafe         ( unsafeInterleaveIO )  import System.IO.Error          ( mkIOError, eofErrorType )  import GHC.Real                 ( Ratio(..) ) @@ -176,29 +177,27 @@ isEOFBin (BinMem _ ix_r sz_r _) = do  writeBinMem :: BinHandle -> FilePath -> IO ()  writeBinMem (BinMem _ ix_r _ arr_r) fn = do -  h <- openBinaryFile fn WriteMode    arr <- readIORef arr_r    ix  <- readFastMutInt ix_r -  withForeignPtr arr $ \p -> hPutBuf h p ix -  hClose h +  let bs = LZ4.compress $ BS.fromForeignPtr arr 0 ix +  case bs of +    Nothing -> error "Binary.writeBinMem: compression failed" +    Just x  -> B.writeFile fn x  readBinMem :: FilePath -> IO BinHandle  -- Return a BinHandle with a totally undefined State  readBinMem filename = do -  h <- openBinaryFile filename ReadMode -  filesize' <- hFileSize h -  let filesize = fromIntegral filesize' -  arr <- mallocForeignPtrBytes (filesize*2) -  count <- withForeignPtr arr $ \p -> hGetBuf h p filesize -  when (count /= filesize) $ -       error ("Binary.readBinMem: only read " ++ show count ++ " bytes") -  hClose h -  arr_r <- newIORef arr -  ix_r <- newFastMutInt -  writeFastMutInt ix_r 0 -  sz_r <- newFastMutInt -  writeFastMutInt sz_r filesize -  return (BinMem noUserData ix_r sz_r arr_r) +  bs <- liftM LZ4.decompress (B.readFile filename) +  case bs of +    Nothing -> error "Binary.readBinMem: decompression failed" +    Just x  -> do +      let (arr, ix, size) = BS.toForeignPtr x +      arr_r <- newIORef arr +      ix_r <- newFastMutInt +      writeFastMutInt ix_r ix +      sz_r <- newFastMutInt +      writeFastMutInt sz_r size +      return (BinMem noUserData ix_r sz_r arr_r)  fingerprintBinMem :: BinHandle -> IO Fingerprint  fingerprintBinMem (BinMem _ ix_r _ arr_r) = do diff --git a/compiler/utils/LZ4.hs b/compiler/utils/LZ4.hs new file mode 100644 index 0000000000..2e413d79e1 --- /dev/null +++ b/compiler/utils/LZ4.hs @@ -0,0 +1,126 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} +-- | +-- Module      : LZ4 +-- Copyright   : (c) Mark Wotton, Austin Seipp 2012-2015 +-- License     : BSD3 +-- +-- Compression utilities (currently utilizing @LZ4 r127@). +-- +module LZ4 +       ( compress            -- :: S.ByteString -> S.ByteString +       , decompress          -- :: S.ByteString -> Maybe S.ByteString +       ) where + +import Prelude hiding (max) +import Data.Word +import Data.Bits +import Foreign.Ptr +import Foreign.C +import System.IO.Unsafe (unsafePerformIO) +#if __GLASGOW_HASKELL__ < 709 +import Control.Applicative +#endif +--import Control.Monad +import Data.Monoid ((<>)) + +import qualified Data.ByteString as S +import qualified Data.ByteString.Internal as SI +import qualified Data.ByteString.Unsafe as U + +-------------------------------------------------------------------------------- +-- Compression + +-- | Compresses the input 'ByteString'. +-- +-- Will return 'Nothing' if the compression fails. Otherwise, returns +-- @Just xs@ with the compressed string. +compress :: S.ByteString -> Maybe S.ByteString +compress xs | S.null xs = Just S.empty +compress xs = unsafePerformIO $ +  U.unsafeUseAsCStringLen xs $ \(cstr,len) -> do +    let len' = fromIntegral len :: CInt +    let max = c_LZ4_compressBound len' +    bs <- SI.createAndTrim (fromIntegral max) $ \output -> +            fromIntegral <$> c_LZ4_compress cstr output len' +    case (S.null bs) of +      True -> return Nothing +      -- Prefix the compressed string with the uncompressed length +      False -> return $ Just (format (fromIntegral len) bs) +{-# INLINEABLE compress #-} + +-------------------------------------------------------------------------------- +-- Decompression + +-- | Decompress the input 'ByteString'. +decompress :: S.ByteString -> Maybe S.ByteString +decompress xs | S.null xs = Just S.empty +-- Get the length of the uncompressed buffer and do our thing +decompress xs = maybe Nothing (unsafePerformIO . go) (unformat xs) +  where +    go (l, str) = +      U.unsafeUseAsCString str $ \cstr -> do +        out <- SI.createAndTrim l $ \p -> do +          r :: Int <- fromIntegral <$> c_LZ4_uncompress cstr p (fromIntegral l) +          --- NOTE: r is the count of bytes c_LZ4_uncompress read from +          --- input buffer, and NOT the count of bytes used in result +          --- buffer +          return $! if (r <= 0) then 0 else l +        return $! if (S.null out) then Nothing else (Just out) +{-# INLINEABLE decompress #-} + +-------------------------------------------------------------------------------- +-- Utilities + +-- | Pushes a Word32 and a ByteString into the format we use to correctly +-- encode/decode. +format :: Word32 -> S.ByteString -> S.ByteString +format l xs = write32LE l <> write32LE (fromIntegral $ S.length xs) <> xs + +write32LE :: Word32 -> S.ByteString +write32LE w +  = S.pack [ fromIntegral (w `shiftR` 0)  :: Word8 +           , fromIntegral (w `shiftR` 8)  :: Word8 +           , fromIntegral (w `shiftR` 16) :: Word8 +           , fromIntegral (w `shiftR` 24) :: Word8 +           ] + +-- | Gets a ByteString and it's length from the compressed format. +unformat :: S.ByteString -> Maybe (Int, S.ByteString) +unformat xs +  | S.length xs < 8        = Nothing -- Need at least 8 bytes +  | bsLen /= S.length rest = Nothing -- Header doesn't match real size +  | otherwise              = Just (fromIntegral origLen, rest) +  where +    origLen = fromIntegral (read32LE l0 l1 l2 l3) :: Int +    bsLen   = fromIntegral (read32LE s0 s1 s2 s3) :: Int + +    [l0,l1,l2,l3] = S.unpack (S.take 4 xs) +    [s0,s1,s2,s3] = S.unpack (S.take 4 $ S.drop 4 xs) +    rest = S.drop 8 xs + +read32LE :: Word8 -> Word8 -> Word8 -> Word8 -> Word32 +read32LE x0 x1 x2 x3 +  = fi x0 + (fi x1 `shiftL` 8) + (fi x2 `shiftL` 16) + (fi x3 `shiftL` 24) +  where fi = fromIntegral :: Word8 -> Word32 + +-------------------------------------------------------------------------------- +-- FFI Bindings + +-- | Worst case compression bounds on an input string. +foreign import ccall unsafe "LZ4_compressBound" +  c_LZ4_compressBound :: CInt -> CInt + +-- | Compresses a string. +foreign import ccall unsafe "LZ4_compress" +  c_LZ4_compress :: Ptr a   -- ^ Source +                 -> Ptr b   -- ^ Dest +                 -> CInt    -- ^ Input size +                 -> IO CInt -- ^ Result + +-- | Decompresses a string. +foreign import ccall unsafe "LZ4_decompress_fast" +  c_LZ4_uncompress :: Ptr a   -- ^ Source +                   -> Ptr b   -- ^ Dest +                   -> CInt    -- ^ Size of ORIGINAL INPUT +                   -> IO CInt -- ^ Result | 
