summaryrefslogtreecommitdiff
path: root/compiler/utils
diff options
context:
space:
mode:
authorAustin Seipp <austin@well-typed.com>2016-05-20 03:25:08 +0000
committerAustin Seipp <austin@well-typed.com>2016-05-21 16:55:16 +0000
commitd9cb7a8a94daa4d20aa042cd053e20b491315633 (patch)
treeaedce747b5202ab49da01a17c8dbebc13e313116 /compiler/utils
parenta1f3bb8ca454f05fa35cb6b5c64e92f640380802 (diff)
downloadhaskell-d9cb7a8a94daa4d20aa042cd053e20b491315633.tar.gz
compiler/iface: compress .hi files
Compress all interface files generated by the compiler with LZ4. While being only a tiny amount of code, LZ4 is both fast at compression and decompression, and has good compression ratios. Non-scientific size test: size of stage2 compiler .hi files: `find ./compiler/stage2 -type f -iname '*.hi' -exec du -ch {} + | grep total$` Without this patch: 22MB of .hi files for stage2. With this patch: 9.2MB of .hi files for stage2. Signed-off-by: Austin Seipp <austin@well-typed.com> Reviewed By: bgamari Differential Revision: https://phabricator.haskell.org/D1159
Diffstat (limited to 'compiler/utils')
-rw-r--r--compiler/utils/Binary.hs37
-rw-r--r--compiler/utils/LZ4.hs126
2 files changed, 144 insertions, 19 deletions
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs
index 8800d98f9c..684cdc6bc4 100644
--- a/compiler/utils/Binary.hs
+++ b/compiler/utils/Binary.hs
@@ -66,18 +66,19 @@ import FastMutInt
import Fingerprint
import BasicTypes
import SrcLoc
+import qualified LZ4 as LZ4
import Foreign
import Data.Array
import Data.ByteString (ByteString)
+import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS
import Data.IORef
import Data.Char ( ord, chr )
import Data.Time
import Data.Typeable
-import Control.Monad ( when )
-import System.IO as IO
+import Control.Monad ( when, liftM )
import System.IO.Unsafe ( unsafeInterleaveIO )
import System.IO.Error ( mkIOError, eofErrorType )
import GHC.Real ( Ratio(..) )
@@ -176,29 +177,27 @@ isEOFBin (BinMem _ ix_r sz_r _) = do
writeBinMem :: BinHandle -> FilePath -> IO ()
writeBinMem (BinMem _ ix_r _ arr_r) fn = do
- h <- openBinaryFile fn WriteMode
arr <- readIORef arr_r
ix <- readFastMutInt ix_r
- withForeignPtr arr $ \p -> hPutBuf h p ix
- hClose h
+ let bs = LZ4.compress $ BS.fromForeignPtr arr 0 ix
+ case bs of
+ Nothing -> error "Binary.writeBinMem: compression failed"
+ Just x -> B.writeFile fn x
readBinMem :: FilePath -> IO BinHandle
-- Return a BinHandle with a totally undefined State
readBinMem filename = do
- h <- openBinaryFile filename ReadMode
- filesize' <- hFileSize h
- let filesize = fromIntegral filesize'
- arr <- mallocForeignPtrBytes (filesize*2)
- count <- withForeignPtr arr $ \p -> hGetBuf h p filesize
- when (count /= filesize) $
- error ("Binary.readBinMem: only read " ++ show count ++ " bytes")
- hClose h
- arr_r <- newIORef arr
- ix_r <- newFastMutInt
- writeFastMutInt ix_r 0
- sz_r <- newFastMutInt
- writeFastMutInt sz_r filesize
- return (BinMem noUserData ix_r sz_r arr_r)
+ bs <- liftM LZ4.decompress (B.readFile filename)
+ case bs of
+ Nothing -> error "Binary.readBinMem: decompression failed"
+ Just x -> do
+ let (arr, ix, size) = BS.toForeignPtr x
+ arr_r <- newIORef arr
+ ix_r <- newFastMutInt
+ writeFastMutInt ix_r ix
+ sz_r <- newFastMutInt
+ writeFastMutInt sz_r size
+ return (BinMem noUserData ix_r sz_r arr_r)
fingerprintBinMem :: BinHandle -> IO Fingerprint
fingerprintBinMem (BinMem _ ix_r _ arr_r) = do
diff --git a/compiler/utils/LZ4.hs b/compiler/utils/LZ4.hs
new file mode 100644
index 0000000000..2e413d79e1
--- /dev/null
+++ b/compiler/utils/LZ4.hs
@@ -0,0 +1,126 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+-- |
+-- Module : LZ4
+-- Copyright : (c) Mark Wotton, Austin Seipp 2012-2015
+-- License : BSD3
+--
+-- Compression utilities (currently utilizing @LZ4 r127@).
+--
+module LZ4
+ ( compress -- :: S.ByteString -> S.ByteString
+ , decompress -- :: S.ByteString -> Maybe S.ByteString
+ ) where
+
+import Prelude hiding (max)
+import Data.Word
+import Data.Bits
+import Foreign.Ptr
+import Foreign.C
+import System.IO.Unsafe (unsafePerformIO)
+#if __GLASGOW_HASKELL__ < 709
+import Control.Applicative
+#endif
+--import Control.Monad
+import Data.Monoid ((<>))
+
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Internal as SI
+import qualified Data.ByteString.Unsafe as U
+
+--------------------------------------------------------------------------------
+-- Compression
+
+-- | Compresses the input 'ByteString'.
+--
+-- Will return 'Nothing' if the compression fails. Otherwise, returns
+-- @Just xs@ with the compressed string.
+compress :: S.ByteString -> Maybe S.ByteString
+compress xs | S.null xs = Just S.empty
+compress xs = unsafePerformIO $
+ U.unsafeUseAsCStringLen xs $ \(cstr,len) -> do
+ let len' = fromIntegral len :: CInt
+ let max = c_LZ4_compressBound len'
+ bs <- SI.createAndTrim (fromIntegral max) $ \output ->
+ fromIntegral <$> c_LZ4_compress cstr output len'
+ case (S.null bs) of
+ True -> return Nothing
+ -- Prefix the compressed string with the uncompressed length
+ False -> return $ Just (format (fromIntegral len) bs)
+{-# INLINEABLE compress #-}
+
+--------------------------------------------------------------------------------
+-- Decompression
+
+-- | Decompress the input 'ByteString'.
+decompress :: S.ByteString -> Maybe S.ByteString
+decompress xs | S.null xs = Just S.empty
+-- Get the length of the uncompressed buffer and do our thing
+decompress xs = maybe Nothing (unsafePerformIO . go) (unformat xs)
+ where
+ go (l, str) =
+ U.unsafeUseAsCString str $ \cstr -> do
+ out <- SI.createAndTrim l $ \p -> do
+ r :: Int <- fromIntegral <$> c_LZ4_uncompress cstr p (fromIntegral l)
+ --- NOTE: r is the count of bytes c_LZ4_uncompress read from
+ --- input buffer, and NOT the count of bytes used in result
+ --- buffer
+ return $! if (r <= 0) then 0 else l
+ return $! if (S.null out) then Nothing else (Just out)
+{-# INLINEABLE decompress #-}
+
+--------------------------------------------------------------------------------
+-- Utilities
+
+-- | Pushes a Word32 and a ByteString into the format we use to correctly
+-- encode/decode.
+format :: Word32 -> S.ByteString -> S.ByteString
+format l xs = write32LE l <> write32LE (fromIntegral $ S.length xs) <> xs
+
+write32LE :: Word32 -> S.ByteString
+write32LE w
+ = S.pack [ fromIntegral (w `shiftR` 0) :: Word8
+ , fromIntegral (w `shiftR` 8) :: Word8
+ , fromIntegral (w `shiftR` 16) :: Word8
+ , fromIntegral (w `shiftR` 24) :: Word8
+ ]
+
+-- | Gets a ByteString and it's length from the compressed format.
+unformat :: S.ByteString -> Maybe (Int, S.ByteString)
+unformat xs
+ | S.length xs < 8 = Nothing -- Need at least 8 bytes
+ | bsLen /= S.length rest = Nothing -- Header doesn't match real size
+ | otherwise = Just (fromIntegral origLen, rest)
+ where
+ origLen = fromIntegral (read32LE l0 l1 l2 l3) :: Int
+ bsLen = fromIntegral (read32LE s0 s1 s2 s3) :: Int
+
+ [l0,l1,l2,l3] = S.unpack (S.take 4 xs)
+ [s0,s1,s2,s3] = S.unpack (S.take 4 $ S.drop 4 xs)
+ rest = S.drop 8 xs
+
+read32LE :: Word8 -> Word8 -> Word8 -> Word8 -> Word32
+read32LE x0 x1 x2 x3
+ = fi x0 + (fi x1 `shiftL` 8) + (fi x2 `shiftL` 16) + (fi x3 `shiftL` 24)
+ where fi = fromIntegral :: Word8 -> Word32
+
+--------------------------------------------------------------------------------
+-- FFI Bindings
+
+-- | Worst case compression bounds on an input string.
+foreign import ccall unsafe "LZ4_compressBound"
+ c_LZ4_compressBound :: CInt -> CInt
+
+-- | Compresses a string.
+foreign import ccall unsafe "LZ4_compress"
+ c_LZ4_compress :: Ptr a -- ^ Source
+ -> Ptr b -- ^ Dest
+ -> CInt -- ^ Input size
+ -> IO CInt -- ^ Result
+
+-- | Decompresses a string.
+foreign import ccall unsafe "LZ4_decompress_fast"
+ c_LZ4_uncompress :: Ptr a -- ^ Source
+ -> Ptr b -- ^ Dest
+ -> CInt -- ^ Size of ORIGINAL INPUT
+ -> IO CInt -- ^ Result