diff options
| author | Austin Seipp <austin@well-typed.com> | 2016-05-20 03:25:08 +0000 |
|---|---|---|
| committer | Austin Seipp <austin@well-typed.com> | 2016-05-21 16:55:16 +0000 |
| commit | d9cb7a8a94daa4d20aa042cd053e20b491315633 (patch) | |
| tree | aedce747b5202ab49da01a17c8dbebc13e313116 /compiler/utils | |
| parent | a1f3bb8ca454f05fa35cb6b5c64e92f640380802 (diff) | |
| download | haskell-d9cb7a8a94daa4d20aa042cd053e20b491315633.tar.gz | |
compiler/iface: compress .hi files
Compress all interface files generated by the compiler with LZ4. While
being only a tiny amount of code, LZ4 is both fast at compression and
decompression, and has good compression ratios.
Non-scientific size test: size of stage2 compiler .hi files:
`find ./compiler/stage2 -type f -iname '*.hi' -exec du -ch {} + | grep total$`
Without this patch: 22MB of .hi files for stage2.
With this patch: 9.2MB of .hi files for stage2.
Signed-off-by: Austin Seipp <austin@well-typed.com>
Reviewed By: bgamari
Differential Revision: https://phabricator.haskell.org/D1159
Diffstat (limited to 'compiler/utils')
| -rw-r--r-- | compiler/utils/Binary.hs | 37 | ||||
| -rw-r--r-- | compiler/utils/LZ4.hs | 126 |
2 files changed, 144 insertions, 19 deletions
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs index 8800d98f9c..684cdc6bc4 100644 --- a/compiler/utils/Binary.hs +++ b/compiler/utils/Binary.hs @@ -66,18 +66,19 @@ import FastMutInt import Fingerprint import BasicTypes import SrcLoc +import qualified LZ4 as LZ4 import Foreign import Data.Array import Data.ByteString (ByteString) +import qualified Data.ByteString as B import qualified Data.ByteString.Internal as BS import qualified Data.ByteString.Unsafe as BS import Data.IORef import Data.Char ( ord, chr ) import Data.Time import Data.Typeable -import Control.Monad ( when ) -import System.IO as IO +import Control.Monad ( when, liftM ) import System.IO.Unsafe ( unsafeInterleaveIO ) import System.IO.Error ( mkIOError, eofErrorType ) import GHC.Real ( Ratio(..) ) @@ -176,29 +177,27 @@ isEOFBin (BinMem _ ix_r sz_r _) = do writeBinMem :: BinHandle -> FilePath -> IO () writeBinMem (BinMem _ ix_r _ arr_r) fn = do - h <- openBinaryFile fn WriteMode arr <- readIORef arr_r ix <- readFastMutInt ix_r - withForeignPtr arr $ \p -> hPutBuf h p ix - hClose h + let bs = LZ4.compress $ BS.fromForeignPtr arr 0 ix + case bs of + Nothing -> error "Binary.writeBinMem: compression failed" + Just x -> B.writeFile fn x readBinMem :: FilePath -> IO BinHandle -- Return a BinHandle with a totally undefined State readBinMem filename = do - h <- openBinaryFile filename ReadMode - filesize' <- hFileSize h - let filesize = fromIntegral filesize' - arr <- mallocForeignPtrBytes (filesize*2) - count <- withForeignPtr arr $ \p -> hGetBuf h p filesize - when (count /= filesize) $ - error ("Binary.readBinMem: only read " ++ show count ++ " bytes") - hClose h - arr_r <- newIORef arr - ix_r <- newFastMutInt - writeFastMutInt ix_r 0 - sz_r <- newFastMutInt - writeFastMutInt sz_r filesize - return (BinMem noUserData ix_r sz_r arr_r) + bs <- liftM LZ4.decompress (B.readFile filename) + case bs of + Nothing -> error "Binary.readBinMem: decompression failed" + Just x -> do + let (arr, ix, size) = BS.toForeignPtr x + arr_r <- newIORef arr + ix_r <- newFastMutInt + writeFastMutInt ix_r ix + sz_r <- newFastMutInt + writeFastMutInt sz_r size + return (BinMem noUserData ix_r sz_r arr_r) fingerprintBinMem :: BinHandle -> IO Fingerprint fingerprintBinMem (BinMem _ ix_r _ arr_r) = do diff --git a/compiler/utils/LZ4.hs b/compiler/utils/LZ4.hs new file mode 100644 index 0000000000..2e413d79e1 --- /dev/null +++ b/compiler/utils/LZ4.hs @@ -0,0 +1,126 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} +-- | +-- Module : LZ4 +-- Copyright : (c) Mark Wotton, Austin Seipp 2012-2015 +-- License : BSD3 +-- +-- Compression utilities (currently utilizing @LZ4 r127@). +-- +module LZ4 + ( compress -- :: S.ByteString -> S.ByteString + , decompress -- :: S.ByteString -> Maybe S.ByteString + ) where + +import Prelude hiding (max) +import Data.Word +import Data.Bits +import Foreign.Ptr +import Foreign.C +import System.IO.Unsafe (unsafePerformIO) +#if __GLASGOW_HASKELL__ < 709 +import Control.Applicative +#endif +--import Control.Monad +import Data.Monoid ((<>)) + +import qualified Data.ByteString as S +import qualified Data.ByteString.Internal as SI +import qualified Data.ByteString.Unsafe as U + +-------------------------------------------------------------------------------- +-- Compression + +-- | Compresses the input 'ByteString'. +-- +-- Will return 'Nothing' if the compression fails. Otherwise, returns +-- @Just xs@ with the compressed string. +compress :: S.ByteString -> Maybe S.ByteString +compress xs | S.null xs = Just S.empty +compress xs = unsafePerformIO $ + U.unsafeUseAsCStringLen xs $ \(cstr,len) -> do + let len' = fromIntegral len :: CInt + let max = c_LZ4_compressBound len' + bs <- SI.createAndTrim (fromIntegral max) $ \output -> + fromIntegral <$> c_LZ4_compress cstr output len' + case (S.null bs) of + True -> return Nothing + -- Prefix the compressed string with the uncompressed length + False -> return $ Just (format (fromIntegral len) bs) +{-# INLINEABLE compress #-} + +-------------------------------------------------------------------------------- +-- Decompression + +-- | Decompress the input 'ByteString'. +decompress :: S.ByteString -> Maybe S.ByteString +decompress xs | S.null xs = Just S.empty +-- Get the length of the uncompressed buffer and do our thing +decompress xs = maybe Nothing (unsafePerformIO . go) (unformat xs) + where + go (l, str) = + U.unsafeUseAsCString str $ \cstr -> do + out <- SI.createAndTrim l $ \p -> do + r :: Int <- fromIntegral <$> c_LZ4_uncompress cstr p (fromIntegral l) + --- NOTE: r is the count of bytes c_LZ4_uncompress read from + --- input buffer, and NOT the count of bytes used in result + --- buffer + return $! if (r <= 0) then 0 else l + return $! if (S.null out) then Nothing else (Just out) +{-# INLINEABLE decompress #-} + +-------------------------------------------------------------------------------- +-- Utilities + +-- | Pushes a Word32 and a ByteString into the format we use to correctly +-- encode/decode. +format :: Word32 -> S.ByteString -> S.ByteString +format l xs = write32LE l <> write32LE (fromIntegral $ S.length xs) <> xs + +write32LE :: Word32 -> S.ByteString +write32LE w + = S.pack [ fromIntegral (w `shiftR` 0) :: Word8 + , fromIntegral (w `shiftR` 8) :: Word8 + , fromIntegral (w `shiftR` 16) :: Word8 + , fromIntegral (w `shiftR` 24) :: Word8 + ] + +-- | Gets a ByteString and it's length from the compressed format. +unformat :: S.ByteString -> Maybe (Int, S.ByteString) +unformat xs + | S.length xs < 8 = Nothing -- Need at least 8 bytes + | bsLen /= S.length rest = Nothing -- Header doesn't match real size + | otherwise = Just (fromIntegral origLen, rest) + where + origLen = fromIntegral (read32LE l0 l1 l2 l3) :: Int + bsLen = fromIntegral (read32LE s0 s1 s2 s3) :: Int + + [l0,l1,l2,l3] = S.unpack (S.take 4 xs) + [s0,s1,s2,s3] = S.unpack (S.take 4 $ S.drop 4 xs) + rest = S.drop 8 xs + +read32LE :: Word8 -> Word8 -> Word8 -> Word8 -> Word32 +read32LE x0 x1 x2 x3 + = fi x0 + (fi x1 `shiftL` 8) + (fi x2 `shiftL` 16) + (fi x3 `shiftL` 24) + where fi = fromIntegral :: Word8 -> Word32 + +-------------------------------------------------------------------------------- +-- FFI Bindings + +-- | Worst case compression bounds on an input string. +foreign import ccall unsafe "LZ4_compressBound" + c_LZ4_compressBound :: CInt -> CInt + +-- | Compresses a string. +foreign import ccall unsafe "LZ4_compress" + c_LZ4_compress :: Ptr a -- ^ Source + -> Ptr b -- ^ Dest + -> CInt -- ^ Input size + -> IO CInt -- ^ Result + +-- | Decompresses a string. +foreign import ccall unsafe "LZ4_decompress_fast" + c_LZ4_uncompress :: Ptr a -- ^ Source + -> Ptr b -- ^ Dest + -> CInt -- ^ Size of ORIGINAL INPUT + -> IO CInt -- ^ Result |
