diff options
author | Edward Z. Yang <ezyang@cs.stanford.edu> | 2017-02-23 13:46:02 -0800 |
---|---|---|
committer | Edward Z. Yang <ezyang@cs.stanford.edu> | 2017-02-26 01:23:35 -0800 |
commit | a0b4a2ac5015e9accd4fb71290a68ce1a1d3d630 (patch) | |
tree | a7c762f501bc072c81d27c71e0640f9490a36819 /libraries/ghc-compact | |
parent | 8f20844d3435094583db92a30550ca319d2be863 (diff) | |
download | haskell-a0b4a2ac5015e9accd4fb71290a68ce1a1d3d630.tar.gz |
Rename compact to ghc-compact.
Summary:
The plan is to release a separate library, 'compact', which gives a
friendly user-facing interface. This library is just enough so that we
can make sure the functionality is working in GHC.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
Test Plan: validate
Reviewers: bgamari, dfeuer, austin, simonmar, hvr
Subscribers: thomie, erikd, snowleopard
Differential Revision: https://phabricator.haskell.org/D3206
Diffstat (limited to 'libraries/ghc-compact')
36 files changed, 1058 insertions, 0 deletions
diff --git a/libraries/ghc-compact/.gitignore b/libraries/ghc-compact/.gitignore new file mode 100644 index 0000000000..89cf73d0b3 --- /dev/null +++ b/libraries/ghc-compact/.gitignore @@ -0,0 +1,4 @@ +GNUmakefile +/dist-install/ +/dist/ +ghc.mk diff --git a/libraries/ghc-compact/GHC/Compact.hs b/libraries/ghc-compact/GHC/Compact.hs new file mode 100644 index 0000000000..e3efaf24bc --- /dev/null +++ b/libraries/ghc-compact/GHC/Compact.hs @@ -0,0 +1,264 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# OPTIONS_GHC -Wno-redundant-constraints -Wno-name-shadowing #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.Compact +-- Copyright : (c) The University of Glasgow 2001-2009 +-- (c) Giovanni Campagna <gcampagn@cs.stanford.edu> 2014 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : libraries@haskell.org +-- Stability : unstable +-- Portability : non-portable (GHC Extensions) +-- +-- This module provides a data structure, called a 'Compact', for +-- holding immutable, fully evaluated data in a consecutive block of memory. +-- Compact regions are good for two things: +-- +-- 1. Data in a compact region is not traversed during GC; any +-- incoming pointer to a compact region keeps the entire region +-- live. Thus, if you put a long-lived data structure in a compact +-- region, you may save a lot of cycles during major collections, +-- since you will no longer be (uselessly) retraversing this +-- data structure. +-- +-- 2. Because the data is stored contiguously, you can easily +-- dump the memory to disk and/or send it over the network. +-- For applications that are not bandwidth bound (GHC's heap +-- representation can be as much of a x4 expansion over a +-- binary serialization), this can lead to substantial speed ups. +-- +-- For example, suppose you have a function @loadBigStruct :: IO BigStruct@, +-- which loads a large data structure from the file system. You can "compact" +-- the structure with the following code: +-- +-- @ +-- do r <- 'compact' =<< loadBigStruct +-- let x = 'getCompact' r :: BigStruct +-- -- Do things with x +-- @ +-- +-- Note that 'compact' will not preserve internal sharing; use +-- 'compactWithSharing' (which is 10x slower) if you have cycles and/or +-- must preserve sharing. The 'Compact' pointer @r@ can be used +-- to add more data to a compact region; see 'compactAdd' or +-- 'compactAddWithSharing'. +-- +-- The implementation of compact regions is described by: +-- +-- * Edward Z. Yang, Giovanni Campagna, Ömer Ağacan, Ahmed El-Hassany, Abhishek +-- Kulkarni, Ryan Newton. \"/Efficient communication and Collection with Compact +-- Normal Forms/\". In Proceedings of the 20th ACM SIGPLAN International +-- Conference on Functional Programming. September 2015. <http://ezyang.com/compact.html> +-- +-- This library is supported by GHC 8.2 and later. + +module GHC.Compact ( + -- * The Compact type + Compact(..), + + -- * Compacting data + compact, + compactWithSharing, + compactAdd, + compactAddWithSharing, + + -- * Inspecting a Compact + getCompact, + inCompact, + isCompact, + compactSize, + + -- * Other utilities + compactResize, + + -- * Internal operations + mkCompact, + compactSized, + ) where + +import Control.Concurrent.MVar +import GHC.Prim +import GHC.Types + +-- | A 'Compact' contains fully evaluated, pure, immutable data. +-- +-- 'Compact' serves two purposes: +-- +-- * Data stored in a 'Compact' has no garbage collection overhead. +-- The garbage collector considers the whole 'Compact' to be alive +-- if there is a reference to any object within it. +-- +-- * A 'Compact' can be serialized, stored, and deserialized again. +-- The serialized data can only be deserialized by the exact binary +-- that created it, but it can be stored indefinitely before +-- deserialization. +-- +-- Compacts are self-contained, so compacting data involves copying +-- it; if you have data that lives in two 'Compact's, each will have a +-- separate copy of the data. +-- +-- The cost of compaction is similar to the cost of GC for the same +-- data, but it is perfomed only once. However, because +-- "Data.Compact.compact" does not stop-the-world, retaining internal +-- sharing during the compaction process is very costly. The user +-- can choose wether to 'compact' or 'compactWithSharing'. +-- +-- When you have a @'Compact' a@, you can get a pointer to the actual object +-- in the region using "Data.Compact.getCompact". The 'Compact' type +-- serves as handle on the region itself; you can use this handle +-- to add data to a specific 'Compact' with 'compactAdd' or +-- 'compactAddWithSharing' (giving you a new handle which corresponds +-- to the same compact region, but points to the newly added object +-- in the region). At the moment, due to technical reasons, +-- it's not possible to get the @'Compact' a@ if you only have an @a@, +-- so make sure you hold on to the handle as necessary. +-- +-- Data in a compact doesn't ever move, so compacting data is also a +-- way to pin arbitrary data structures in memory. +-- +-- There are some limitations on what can be compacted: +-- +-- * Functions. Compaction only applies to data. +-- +-- * Pinned 'ByteArray#' objects cannot be compacted. This is for a +-- good reason: the memory is pinned so that it can be referenced by +-- address (the address might be stored in a C data structure, for +-- example), so we can't make a copy of it to store in the 'Compact'. +-- +-- * Objects with mutable pointer fields also cannot be compacted, +-- because subsequent mutation would destroy the property that a compact is +-- self-contained. +-- +-- If compaction encounters any of the above, a 'CompactionFailed' +-- exception will be thrown by the compaction operation. +-- +data Compact a = Compact Compact# a (MVar ()) + -- we can *read* from a Compact without taking a lock, but only + -- one thread can be writing to the compact at any given time. + -- The MVar here is to enforce mutual exclusion among writers. + -- Note: the MVar protects the Compact# only, not the pure value 'a' + +-- | Make a new 'Compact' object, given a pointer to the true +-- underlying region. You must uphold the invariant that @a@ lives +-- in the compact region. +-- +mkCompact + :: Compact# -> a -> State# RealWorld -> (# State# RealWorld, Compact a #) +mkCompact compact# a s = + case unIO (newMVar ()) s of { (# s1, lock #) -> + (# s1, Compact compact# a lock #) } + where + unIO (IO a) = a + +-- | Transfer @a@ into a new compact region, with a preallocated size, +-- possibly preserving sharing or not. If you know how big the data +-- structure in question is, you can save time by picking an appropriate +-- block size for the compact region. +-- +compactSized :: Int -> Bool -> a -> IO (Compact a) +compactSized (I# size) share a = IO $ \s0 -> + case compactNew# (int2Word# size) s0 of { (# s1, compact# #) -> + case compactAddPrim compact# a s1 of { (# s2, pk #) -> + mkCompact compact# pk s2 }} + where + compactAddPrim + | share = compactAddWithSharing# + | otherwise = compactAdd# + +-- | Retrieve a direct pointer to the value pointed at by a 'Compact' reference. +-- If you have used 'compactAdd', there may be multiple 'Compact' references +-- into the same compact region. Upholds the property: +-- +-- > inCompact c (getCompact c) == True +-- +getCompact :: Compact a -> a +getCompact (Compact _ obj _) = obj + +-- | Compact a value. /O(size of unshared data)/ +-- +-- If the structure contains any internal sharing, the shared data +-- will be duplicated during the compaction process. This will +-- not terminate if the structure contains cycles (use 'compactWithSharing' +-- instead). +-- +-- The object in question must not contain any functions or mutable data; if it +-- does, 'compact' will raise an exception. In the future, we may add a type +-- class which will help statically check if this is the case or not. +-- +compact :: a -> IO (Compact a) +compact = compactSized 31268 False + +-- | Compact a value, retaining any internal sharing and +-- cycles. /O(size of data)/ +-- +-- This is typically about 10x slower than 'compact', because it works +-- by maintaining a hash table mapping uncompacted objects to +-- compacted objects. +-- +-- The object in question must not contain any functions or mutable data; if it +-- does, 'compact' will raise an exception. In the future, we may add a type +-- class which will help statically check if this is the case or not. +-- +compactWithSharing :: a -> IO (Compact a) +compactWithSharing = compactSized 31268 True + +-- | Add a value to an existing 'Compact'. This will help you avoid +-- copying when the value contains pointers into the compact region, +-- but remember that after compaction this value will only be deallocated +-- with the entire compact region. +-- +-- Behaves exactly like 'compact' with respect to sharing and what data +-- it accepts. +-- +compactAdd :: Compact b -> a -> IO (Compact a) +compactAdd (Compact compact# _ lock) a = withMVar lock $ \_ -> IO $ \s -> + case compactAdd# compact# a s of { (# s1, pk #) -> + (# s1, Compact compact# pk lock #) } + +-- | Add a value to an existing 'Compact', like 'compactAdd', +-- but behaving exactly like 'compactWithSharing' with respect to sharing and +-- what data it accepts. +-- +compactAddWithSharing :: Compact b -> a -> IO (Compact a) +compactAddWithSharing (Compact compact# _ lock) a = + withMVar lock $ \_ -> IO $ \s -> + case compactAddWithSharing# compact# a s of { (# s1, pk #) -> + (# s1, Compact compact# pk lock #) } + +-- | Check if the second argument is inside the passed 'Compact'. +-- +inCompact :: Compact b -> a -> IO Bool +inCompact (Compact buffer _ _) !val = + IO (\s -> case compactContains# buffer val s of + (# s', v #) -> (# s', isTrue# v #) ) + +-- | Check if the argument is in any 'Compact'. If true, the value in question +-- is also fully evaluated, since any value in a compact region must +-- be fully evaluated. +-- +isCompact :: a -> IO Bool +isCompact !val = + IO (\s -> case compactContainsAny# val s of + (# s', v #) -> (# s', isTrue# v #) ) + +-- | Returns the size in bytes of the compact region. +-- +compactSize :: Compact a -> IO Word +compactSize (Compact buffer _ lock) = withMVar lock $ \_ -> IO $ \s0 -> + case compactSize# buffer s0 of (# s1, sz #) -> (# s1, W# sz #) + +-- | *Experimental.* This function doesn't actually resize a compact +-- region; rather, it changes the default block size which we allocate +-- when the current block runs out of space, and also appends a block +-- to the compact region. +-- +compactResize :: Compact a -> Word -> IO () +compactResize (Compact oldBuffer _ lock) (W# new_size) = + withMVar lock $ \_ -> IO $ \s -> + case compactResize# oldBuffer new_size s of + s' -> (# s', () #) diff --git a/libraries/ghc-compact/GHC/Compact/Serialized.hs b/libraries/ghc-compact/GHC/Compact/Serialized.hs new file mode 100644 index 0000000000..0263cdf9f1 --- /dev/null +++ b/libraries/ghc-compact/GHC/Compact/Serialized.hs @@ -0,0 +1,210 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.Compact.Serialized +-- Copyright : (c) The University of Glasgow 2001-2009 +-- (c) Giovanni Campagna <gcampagn@cs.stanford.edu> 2015 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : libraries@haskell.org +-- Stability : unstable +-- Portability : non-portable (GHC Extensions) +-- +-- This module contains support for serializing a Compact for network +-- transmission and on-disk storage. +-- +-- /Since: 1.0.0/ + +module GHC.Compact.Serialized( + SerializedCompact(..), + withSerializedCompact, + importCompact, + importCompactByteStrings, +) where + +import GHC.Prim +import GHC.Types +import GHC.Word (Word8) + +import GHC.Ptr (Ptr(..), plusPtr) + +import Control.Concurrent +import qualified Data.ByteString as ByteString +import Data.ByteString.Internal(toForeignPtr) +import Data.IORef(newIORef, readIORef, writeIORef) +import Foreign.ForeignPtr(withForeignPtr) +import Foreign.Marshal.Utils(copyBytes) + +import GHC.Compact + +-- | A serialized version of the 'Compact' metadata (each block with +-- address and size and the address of the root). This structure is +-- meant to be sent alongside the actual 'Compact' data. It can be +-- sent out of band in advance if the data is to be sent over RDMA +-- (which requires both sender and receiver to have pinned buffers). +data SerializedCompact a = SerializedCompact + { serializedCompactBlockList :: [(Ptr a, Word)] + , serializedCompactRoot :: Ptr a + } + +addrIsNull :: Addr# -> Bool +addrIsNull addr = isTrue# (nullAddr# `eqAddr#` addr) + +compactGetFirstBlock :: Compact# -> IO (Ptr a, Word) +compactGetFirstBlock buffer = + IO (\s -> case compactGetFirstBlock# buffer s of + (# s', addr, size #) -> (# s', (Ptr addr, W# size) #) ) + +compactGetNextBlock :: Compact# -> Addr# -> IO (Ptr a, Word) +compactGetNextBlock buffer block = + IO (\s -> case compactGetNextBlock# buffer block s of + (# s', addr, size #) -> (# s', (Ptr addr, W# size) #) ) + +mkBlockList :: Compact# -> IO [(Ptr a, Word)] +mkBlockList buffer = compactGetFirstBlock buffer >>= go + where + go :: (Ptr a, Word) -> IO [(Ptr a, Word)] + go (Ptr block, _) | addrIsNull block = return [] + go item@(Ptr block, _) = do + next <- compactGetNextBlock buffer block + rest <- go next + return $ item : rest + +-- We MUST mark withSerializedCompact as NOINLINE +-- Otherwise the compiler will eliminate the call to touch# +-- causing the Compact# to be potentially GCed too eagerly, +-- before func had a chance to copy everything into its own +-- buffers/sockets/whatever + +-- | Serialize the 'Compact', and call the provided function with +-- with the 'Compact' serialized representation. It is not safe +-- to return the pointer from the action and use it after +-- the action completes: all uses must be inside this bracket, +-- since we cannot guarantee that the compact region will stay +-- live from the 'Ptr' object. For example, it would be +-- unsound to use 'unsafeInterleaveIO' to lazily construct +-- a lazy bytestring from the 'Ptr'. +-- +{-# NOINLINE withSerializedCompact #-} +withSerializedCompact :: Compact a -> + (SerializedCompact a -> IO c) -> IO c +withSerializedCompact (Compact buffer root lock) func = withMVar lock $ \_ -> do + rootPtr <- IO (\s -> case anyToAddr# root s of + (# s', rootAddr #) -> (# s', Ptr rootAddr #) ) + blockList <- mkBlockList buffer + let serialized = SerializedCompact blockList rootPtr + r <- func serialized + IO (\s -> case touch# buffer s of + s' -> (# s', r #) ) + +fixupPointers :: Addr# -> Addr# -> State# RealWorld -> + (# State# RealWorld, Maybe (Compact a) #) +fixupPointers firstBlock rootAddr s = + case compactFixupPointers# firstBlock rootAddr s of + (# s', buffer, adjustedRoot #) -> + if addrIsNull adjustedRoot then (# s', Nothing #) + else case addrToAny# adjustedRoot of + (# root #) -> case mkCompact buffer root s' of + (# s'', c #) -> (# s'', Just c #) + +-- | Deserialize a 'SerializedCompact' into a in-memory 'Compact'. The +-- provided function will be called with the address and size of each +-- newly allocated block in succession, and should fill the memory +-- from the external source (eg. by reading from a socket or from disk) +-- 'importCompact' can return Nothing if the 'Compact' was corrupt +-- or it had pointers that could not be adjusted. +importCompact :: SerializedCompact a -> (Ptr b -> Word -> IO ()) -> + IO (Maybe (Compact a)) + +-- what we would like is +{- + importCompactPtrs ((firstAddr, firstSize):rest) = do + (firstBlock, compact) <- compactAllocateAt firstAddr firstSize + #nullAddr + fillBlock firstBlock firstAddr firstSize + let go prev [] = return () + go prev ((addr, size):rest) = do + (block, _) <- compactAllocateAt addr size prev + fillBlock block addr size + go block rest + go firstBlock rest + if isTrue# (compactFixupPointers compact) then + return $ Just compact + else + return Nothing + +But we can't do that because IO Addr# is not valid (kind mismatch) +This check exists to prevent a polymorphic data constructor from using +an unlifted type (which would break GC) - it would not a problem for IO +because IO stores a function, not a value, but the kind check is there +anyway. +Note that by the reasoning, we cannot do IO (# Addr#, Word# #), nor +we can do IO (Addr#, Word#) (that would break the GC for real!) + +And therefore we need to do everything with State# explicitly. +-} + +-- just do shut up GHC +importCompact (SerializedCompact [] _) _ = return Nothing +importCompact (SerializedCompact blocks root) filler = do + -- I'm not sure why we need a bang pattern here, given that + -- these are obviously strict lets, but ghc complains otherwise + let !((_, W# firstSize):otherBlocks) = blocks + let !(Ptr rootAddr) = root + IO $ \s0 -> + case compactAllocateBlock# firstSize nullAddr# s0 of { + (# s1, firstBlock #) -> + case fillBlock firstBlock firstSize s1 of { s2 -> + case go firstBlock otherBlocks s2 of { s3 -> + fixupPointers firstBlock rootAddr s3 + }}} + where + -- note that the case statements above are strict even though + -- they don't seem to inspect their argument because State# + -- is an unlifted type + fillBlock :: Addr# -> Word# -> State# RealWorld -> State# RealWorld + fillBlock addr size s = case filler (Ptr addr) (W# size) of + IO action -> case action s of + (# s', _ #) -> s' + + go :: Addr# -> [(Ptr a, Word)] -> State# RealWorld -> State# RealWorld + go _ [] s = s + go previous ((_, W# size):rest) s = + case compactAllocateBlock# size previous s of + (# s', block #) -> case fillBlock block size s' of + s'' -> go block rest s'' + +sanityCheckByteStrings :: SerializedCompact a -> [ByteString.ByteString] -> Bool +sanityCheckByteStrings (SerializedCompact scl _) bsl = go scl bsl + where + go [] [] = True + go (_:_) [] = False + go [] (_:_) = False + go ((_, size):scs) (bs:bss) = + fromIntegral size == ByteString.length bs && go scs bss + +-- | Convenience function for importing a compact region that is represented +-- by a list of strict 'ByteString's. +-- +importCompactByteStrings :: SerializedCompact a -> [ByteString.ByteString] -> + IO (Maybe (Compact a)) +importCompactByteStrings serialized stringList = + -- sanity check stringList first - if we throw an exception later we leak + -- memory! + if not (sanityCheckByteStrings serialized stringList) then + return Nothing + else do + state <- newIORef stringList + let filler :: Ptr Word8 -> Word -> IO () + filler to size = do + -- this pattern match will never fail + (next:rest) <- readIORef state + let (fp, off, _) = toForeignPtr next + withForeignPtr fp $ \from -> do + copyBytes to (from `plusPtr` off) (fromIntegral size) + writeIORef state rest + importCompact serialized filler diff --git a/libraries/ghc-compact/LICENSE b/libraries/ghc-compact/LICENSE new file mode 100644 index 0000000000..06b2599694 --- /dev/null +++ b/libraries/ghc-compact/LICENSE @@ -0,0 +1,41 @@ +This library (compact) is derived from code from the GHC project which +is largely (c) The University of Glasgow, and distributable under a +BSD-style license (see below). +Portions of this library were written by Giovanni Campagna +(gcampagn@cs.stanford.edu). They are available under the same license. + +----------------------------------------------------------------------------- + +The Glasgow Haskell Compiler License + +Copyright 2001-2014, The University Court of the University of Glasgow. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +- Neither name of the University nor the names of its contributors may be +used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY COURT OF THE UNIVERSITY OF +GLASGOW AND THE CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +UNIVERSITY COURT OF THE UNIVERSITY OF GLASGOW OR THE CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. + +----------------------------------------------------------------------------- diff --git a/libraries/ghc-compact/README.md b/libraries/ghc-compact/README.md new file mode 100644 index 0000000000..0b7d197c88 --- /dev/null +++ b/libraries/ghc-compact/README.md @@ -0,0 +1,5 @@ +The `compact` Package +===================== + +Exposes a single data structure, called a Compact, which contains +fully evaluated data closed under pointer reachability. diff --git a/libraries/ghc-compact/Setup.hs b/libraries/ghc-compact/Setup.hs new file mode 100644 index 0000000000..6fa548caf7 --- /dev/null +++ b/libraries/ghc-compact/Setup.hs @@ -0,0 +1,6 @@ +module Main (main) where + +import Distribution.Simple + +main :: IO () +main = defaultMain diff --git a/libraries/ghc-compact/ghc-compact.cabal b/libraries/ghc-compact/ghc-compact.cabal new file mode 100644 index 0000000000..829e56c4f1 --- /dev/null +++ b/libraries/ghc-compact/ghc-compact.cabal @@ -0,0 +1,45 @@ +name: ghc-compact +version: 0.1.0.0 +-- NOTE: Don't forget to update ./changelog.md +license: BSD3 +license-file: LICENSE +maintainer: libraries@haskell.org +bug-reports: http://ghc.haskell.org/trac/ghc/newticket?component=libraries/ghc-compact +synopsis: In memory storage of deeply evaluated data structure +category: Data +description: + This package provides minimal functionality for working with + "compact regions", which hold a fully evaluated Haskell object graph. + These regions maintain the invariant that no pointers live inside the struct + that point outside it, which ensures efficient garbage collection without + ever reading the structure contents (effectively, it works as a manually + managed "oldest generation" which is never freed until the whole is + released). + + Internally, the struct is stored a single contiguous block of memory, + which allows efficient serialization and deserialization of structs + for distributed computing. +build-type: Simple +cabal-version: >=1.10 +tested-with: GHC==7.11 + +source-repository head + type: git + location: http://git.haskell.org/ghc.git + subdir: libraries/ghc-compact + +library + default-language: Haskell2010 + other-extensions: + MagicHash + BangPatterns + UnboxedTuples + CPP + + build-depends: ghc-prim == 0.5.0.0, + base >= 4.9.0 && < 4.11, + bytestring >= 0.10.6.0 + ghc-options: -Wall + + exposed-modules: GHC.Compact + GHC.Compact.Serialized diff --git a/libraries/ghc-compact/tests/.gitignore b/libraries/ghc-compact/tests/.gitignore new file mode 100644 index 0000000000..8887a1bbea --- /dev/null +++ b/libraries/ghc-compact/tests/.gitignore @@ -0,0 +1,18 @@ +.hpc.* +*.eventlog +*.genscript +compact_append +compact_simple +compact_nospace +compact_noshare +compact_loop +compact_resize +compact_inc_append +compact_inc_simple +compact_inc_nospace +compact_inc_noshare +compact_autoexpand +compact_inc_custom +compact_inc_incremental +compact_inc_monad +compact_simple_symbols diff --git a/libraries/ghc-compact/tests/Makefile b/libraries/ghc-compact/tests/Makefile new file mode 100644 index 0000000000..6a0abcf1cf --- /dev/null +++ b/libraries/ghc-compact/tests/Makefile @@ -0,0 +1,7 @@ +# This Makefile runs the tests using GHC's testsuite framework. It +# assumes the package is part of a GHC build tree with the testsuite +# installed in ../../../testsuite. + +TOP=../../../testsuite +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/libraries/ghc-compact/tests/all.T b/libraries/ghc-compact/tests/all.T new file mode 100644 index 0000000000..753592e733 --- /dev/null +++ b/libraries/ghc-compact/tests/all.T @@ -0,0 +1,19 @@ +setTestOpts(extra_ways(['sanity'])) + +test('compact_simple', normal, compile_and_run, ['']) +test('compact_loop', normal, compile_and_run, ['']) +test('compact_append', normal, compile_and_run, ['']) +test('compact_autoexpand', normal, compile_and_run, ['']) +test('compact_simple_array', normal, compile_and_run, ['']) +test('compact_huge_array', normal, compile_and_run, ['']) +test('compact_serialize', normal, compile_and_run, ['']) +test('compact_largemap', normal, compile_and_run, ['']) +test('compact_threads', [ extra_run_opts('1000') ], compile_and_run, ['']) +test('compact_cycle', extra_run_opts('+RTS -K1m'), compile_and_run, ['']) +test('compact_function', exit_code(1), compile_and_run, ['']) +test('compact_mutable', exit_code(1), compile_and_run, ['']) +test('compact_pinned', exit_code(1), compile_and_run, ['']) +test('compact_gc', ignore_stdout, compile_and_run, ['']) +test('compact_share', normal, compile_and_run, ['']) +test('compact_bench', [ ignore_stdout, extra_run_opts('100') ], + compile_and_run, ['']) diff --git a/libraries/ghc-compact/tests/compact_append.hs b/libraries/ghc-compact/tests/compact_append.hs new file mode 100644 index 0000000000..274c0bf429 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_append.hs @@ -0,0 +1,38 @@ +module Main where + +import Control.Exception +import System.Mem + +import GHC.Compact + +assertFail :: String -> IO () +assertFail msg = throwIO $ AssertionFailed msg + +assertEquals :: (Eq a, Show a) => a -> a -> IO () +assertEquals expected actual = + if expected == actual then return () + else assertFail $ "expected " ++ (show expected) + ++ ", got " ++ (show actual) + +main = do + let val = ("hello", Just 42) :: (String, Maybe Int) + str <- compactWithSharing val + + let val2 = ("world", 42) :: (String, Int) + str2 <- compactAddWithSharing str val2 + + -- check that values where not corrupted + assertEquals ("hello", Just 42) val + assertEquals ("world", 42) val2 + -- check the values in the compact + assertEquals ("hello", Just 42) (getCompact str) + assertEquals ("world", 42) (getCompact str2) + + performMajorGC + + -- same checks again + assertEquals ("hello", Just 42) val + assertEquals ("world", 42) val2 + -- check the values in the compact + assertEquals ("hello", Just 42) (getCompact str) + assertEquals ("world", 42) (getCompact str2) diff --git a/libraries/ghc-compact/tests/compact_autoexpand.hs b/libraries/ghc-compact/tests/compact_autoexpand.hs new file mode 100644 index 0000000000..c4d27d08f6 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_autoexpand.hs @@ -0,0 +1,27 @@ +module Main where + +import Control.Exception +import System.Mem + +import GHC.Compact + +assertFail :: String -> IO () +assertFail msg = throwIO $ AssertionFailed msg + +assertEquals :: (Eq a, Show a) => a -> a -> IO () +assertEquals expected actual = + if expected == actual then return () + else assertFail $ "expected " ++ (show expected) + ++ ", got " ++ (show actual) + +main = do + -- create a compact large 4096 bytes (minus the size of header) + -- add a value that is 1024 cons cells, pointing to 7 INTLIKE + -- each cons cell is 1 word header, 1 word data, 1 word next + -- so total 3072 words, 12288 bytes on x86, 24576 on x86_64 + -- it should not fit in one block + let val = replicate 4096 7 :: [Int] + str <- compactSized 1 True val + assertEquals val (getCompact str) + performMajorGC + assertEquals val (getCompact str) diff --git a/libraries/ghc-compact/tests/compact_bench.hs b/libraries/ghc-compact/tests/compact_bench.hs new file mode 100644 index 0000000000..fa249dcc36 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_bench.hs @@ -0,0 +1,27 @@ +import Control.Exception +import GHC.Compact +import qualified Data.Map as Map +import Data.Time.Clock +import Text.Printf +import System.Environment +import System.Mem +import Control.DeepSeq + +-- Benchmark compact against compactWithSharing. e.g. +-- ./compact_bench 1000000 + +main = do + [n] <- map read <$> getArgs + let m = Map.fromList [(x,[x*1000..x*1000+10]) | x <- [1..(n::Integer)]] + evaluate (force m) + timeIt "compact" $ compact m >>= compactSize >>= print + timeIt "compactWithSharing" $ compactWithSharing m >>= compactSize >>= print + +timeIt :: String -> IO a -> IO a +timeIt str io = do + performMajorGC + t0 <- getCurrentTime + a <- io + t1 <- getCurrentTime + printf "%s: %.2f\n" str (realToFrac (t1 `diffUTCTime` t0) :: Double) + return a diff --git a/libraries/ghc-compact/tests/compact_bytestring.hs b/libraries/ghc-compact/tests/compact_bytestring.hs new file mode 100644 index 0000000000..61a50df9c2 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_bytestring.hs @@ -0,0 +1,7 @@ +import qualified Data.ByteString.Char8 as B +import GHC.Compact +import qualified Data.Map as Map + +main = do + c <- compact (Map.fromList [(B.pack (show x), x) | x <- [1..(10000::Int)]]) + print (getCompact c) diff --git a/libraries/ghc-compact/tests/compact_cycle.hs b/libraries/ghc-compact/tests/compact_cycle.hs new file mode 100644 index 0000000000..54047e0c76 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_cycle.hs @@ -0,0 +1,9 @@ +import Control.Exception +import GHC.Compact +import qualified Data.Map as Map +import System.Exit + +main = do + c <- compactWithSharing (cycle "abc") -- magic! + print (length (show (take 100 (getCompact c)))) + print =<< compactSize c diff --git a/libraries/ghc-compact/tests/compact_cycle.stdout b/libraries/ghc-compact/tests/compact_cycle.stdout new file mode 100644 index 0000000000..6fc8a53046 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_cycle.stdout @@ -0,0 +1,2 @@ +102 +32768 diff --git a/libraries/ghc-compact/tests/compact_function.hs b/libraries/ghc-compact/tests/compact_function.hs new file mode 100644 index 0000000000..166f345552 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_function.hs @@ -0,0 +1,6 @@ +import Control.Exception +import GHC.Compact + +data HiddenFunction = HiddenFunction (Int -> Int) + +main = compact (HiddenFunction (+1)) diff --git a/libraries/ghc-compact/tests/compact_function.stderr b/libraries/ghc-compact/tests/compact_function.stderr new file mode 100644 index 0000000000..197da0460b --- /dev/null +++ b/libraries/ghc-compact/tests/compact_function.stderr @@ -0,0 +1 @@ +compact_function: compaction failed: cannot compact functions diff --git a/libraries/ghc-compact/tests/compact_gc.hs b/libraries/ghc-compact/tests/compact_gc.hs new file mode 100644 index 0000000000..2e13bafdbe --- /dev/null +++ b/libraries/ghc-compact/tests/compact_gc.hs @@ -0,0 +1,11 @@ +import Control.Monad +import GHC.Compact +import qualified Data.Map as Map + +main = do + let m = Map.fromList [(x,show x) | x <- [1..(10000::Int)]] + c <- compactWithSharing m + print =<< compactSize c + c <- foldM (\c _ -> do c <- compactWithSharing (getCompact c); print =<< compactSize c; return c) c [1..10] + print (length (show (getCompact c))) + print =<< compactSize c diff --git a/libraries/ghc-compact/tests/compact_huge_array.hs b/libraries/ghc-compact/tests/compact_huge_array.hs new file mode 100644 index 0000000000..85694f5d9a --- /dev/null +++ b/libraries/ghc-compact/tests/compact_huge_array.hs @@ -0,0 +1,56 @@ +module Main where + +import Control.Exception +import Control.Monad +import System.Mem + +import Control.Monad.ST +import Data.Array +import Data.Array.ST +import qualified Data.Array.Unboxed as U + +import GHC.Compact + +assertFail :: String -> IO () +assertFail msg = throwIO $ AssertionFailed msg + +assertEquals :: (Eq a, Show a) => a -> a -> IO () +assertEquals expected actual = + if expected == actual then return () + else assertFail $ "expected " ++ (show expected) + ++ ", got " ++ (show actual) + +arrTest :: (Monad m, MArray a e m, Num e) => m (a Int e) +arrTest = do + arr <- newArray (1, 10) 0 + forM_ [1..10] $ \j -> do + writeArray arr j (fromIntegral $ 2*j + 1) + return arr + +-- test :: (Word -> a -> IO (Maybe (Compact a))) -> IO () +test func = do + let fromList :: Array Int Int + fromList = listArray (1, 300000) [1..] + frozen :: Array Int Int + frozen = runST $ do + arr <- arrTest :: ST s (STArray s Int Int) + freeze arr + stFrozen :: Array Int Int + stFrozen = runSTArray arrTest + unboxedFrozen :: U.UArray Int Int + unboxedFrozen = runSTUArray arrTest + + let val = (fromList, frozen, stFrozen, unboxedFrozen) + str <- func val + + -- check that val is still good + assertEquals (fromList, frozen, stFrozen, unboxedFrozen) val + -- check the value in the compact + assertEquals val (getCompact str) + performMajorGC + -- check again the value in the compact + assertEquals val (getCompact str) + +main = do + test (compactSized 4096 True) + test (compactSized 4096 False) diff --git a/libraries/ghc-compact/tests/compact_largemap.hs b/libraries/ghc-compact/tests/compact_largemap.hs new file mode 100644 index 0000000000..bc918c905b --- /dev/null +++ b/libraries/ghc-compact/tests/compact_largemap.hs @@ -0,0 +1,9 @@ +import GHC.Compact +import qualified Data.Map as Map + +main = do + let m = Map.fromList [(x,show x) | x <- [1..(10000::Integer)]] + c <- compactWithSharing m + print (length (show (getCompact c))) + c <- compact m + print (length (show (getCompact c))) diff --git a/libraries/ghc-compact/tests/compact_largemap.stdout b/libraries/ghc-compact/tests/compact_largemap.stdout new file mode 100644 index 0000000000..4825984a93 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_largemap.stdout @@ -0,0 +1,2 @@ +137798 +137798 diff --git a/libraries/ghc-compact/tests/compact_loop.hs b/libraries/ghc-compact/tests/compact_loop.hs new file mode 100644 index 0000000000..40e0817dfe --- /dev/null +++ b/libraries/ghc-compact/tests/compact_loop.hs @@ -0,0 +1,42 @@ +module Main where + +import Control.Exception +import System.Mem +import Text.Show + +import GHC.Compact + +assertFail :: String -> IO () +assertFail msg = throwIO $ AssertionFailed msg + +assertEquals :: (Eq a, Show a) => a -> a -> IO () +assertEquals expected actual = + if expected == actual then return () + else assertFail $ "expected " ++ (show expected) + ++ ", got " ++ (show actual) + +data Tree = Nil | Node Tree Tree Tree + +instance Eq Tree where + Nil == Nil = True + Node _ l1 r1 == Node _ l2 r2 = l1 == l2 && r1 == r2 + _ == _ = False + +instance Show Tree where + showsPrec _ Nil = showString "Nil" + showsPrec _ (Node _ l r) = showString "(Node " . shows l . + showString " " . shows r . showString ")" + +{-# NOINLINE test #-} +test x = do + let a = Node Nil x b + b = Node a Nil Nil + str <- compactSized 4096 True a + + -- check the value in the compact + assertEquals a (getCompact str) + performMajorGC + -- check again the value in the compact + assertEquals a (getCompact str) + +main = test Nil diff --git a/libraries/ghc-compact/tests/compact_mutable.hs b/libraries/ghc-compact/tests/compact_mutable.hs new file mode 100644 index 0000000000..33a405452d --- /dev/null +++ b/libraries/ghc-compact/tests/compact_mutable.hs @@ -0,0 +1,9 @@ +import Control.Concurrent +import Control.Exception +import GHC.Compact + +data HiddenMVar = HiddenMVar (MVar ()) + +main = do + m <- newEmptyMVar + compact (HiddenMVar m) diff --git a/libraries/ghc-compact/tests/compact_mutable.stderr b/libraries/ghc-compact/tests/compact_mutable.stderr new file mode 100644 index 0000000000..9a4bd2892e --- /dev/null +++ b/libraries/ghc-compact/tests/compact_mutable.stderr @@ -0,0 +1 @@ +compact_mutable: compaction failed: cannot compact mutable objects diff --git a/libraries/ghc-compact/tests/compact_pinned.hs b/libraries/ghc-compact/tests/compact_pinned.hs new file mode 100644 index 0000000000..16eff0da8a --- /dev/null +++ b/libraries/ghc-compact/tests/compact_pinned.hs @@ -0,0 +1,5 @@ +import Control.Exception +import qualified Data.ByteString.Char8 as B +import GHC.Compact + +main = compact (B.pack ['a'..'c']) diff --git a/libraries/ghc-compact/tests/compact_pinned.stderr b/libraries/ghc-compact/tests/compact_pinned.stderr new file mode 100644 index 0000000000..1f470a0d49 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_pinned.stderr @@ -0,0 +1 @@ +compact_pinned: compaction failed: cannot compact pinned objects diff --git a/libraries/ghc-compact/tests/compact_serialize.hs b/libraries/ghc-compact/tests/compact_serialize.hs new file mode 100644 index 0000000000..ff8e0cfa14 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_serialize.hs @@ -0,0 +1,52 @@ +module Main where + +import Control.Exception +import Control.Monad +import System.Mem + +import Data.IORef +import Data.ByteString (ByteString, packCStringLen) +import Foreign.Ptr + +import GHC.Compact +import GHC.Compact.Serialized + +assertFail :: String -> IO () +assertFail msg = throwIO $ AssertionFailed msg + +assertEquals :: (Eq a, Show a) => a -> a -> IO () +assertEquals expected actual = + if expected == actual then return () + else assertFail $ "expected " ++ (show expected) + ++ ", got " ++ (show actual) + +serialize :: a -> IO (SerializedCompact a, [ByteString]) +serialize val = do + cnf <- compactSized 4096 True val + + bytestrref <- newIORef undefined + scref <- newIORef undefined + withSerializedCompact cnf $ \sc -> do + writeIORef scref sc + performMajorGC + bytestrs <- forM (serializedCompactBlockList sc) $ \(ptr, size) -> do + packCStringLen (castPtr ptr, fromIntegral size) + writeIORef bytestrref bytestrs + + performMajorGC + + bytestrs <- readIORef bytestrref + sc <- readIORef scref + return (sc, bytestrs) + +main = do + let val = ("hello", 1, 42, 42, Just 42) :: + (String, Int, Int, Integer, Maybe Int) + + (sc, bytestrs) <- serialize val + performMajorGC + + mcnf <- importCompactByteStrings sc bytestrs + case mcnf of + Nothing -> assertFail "import failed" + Just cnf -> assertEquals val (getCompact cnf) diff --git a/libraries/ghc-compact/tests/compact_serialize.stderr b/libraries/ghc-compact/tests/compact_serialize.stderr new file mode 100644 index 0000000000..2483efa009 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_serialize.stderr @@ -0,0 +1 @@ +Compact imported at the wrong address, will fix up internal pointers diff --git a/libraries/ghc-compact/tests/compact_share.hs b/libraries/ghc-compact/tests/compact_share.hs new file mode 100644 index 0000000000..323c179cca --- /dev/null +++ b/libraries/ghc-compact/tests/compact_share.hs @@ -0,0 +1,13 @@ +import GHC.Compact +import qualified Data.Map as Map + +main = do + let m1 = Map.fromList [(x,show x) | x <- [1..(10000::Integer)]] + m2 = Map.fromList [(x,y) | x <- [1..(10000::Integer)], + Just y <- [Map.lookup x m1]] + c <- compact (m1,m2) + print (length (show (getCompact c))) + print =<< compactSize c + c <- compactWithSharing (m1,m2) + print (length (show (getCompact c))) + print =<< compactSize c diff --git a/libraries/ghc-compact/tests/compact_share.stdout b/libraries/ghc-compact/tests/compact_share.stdout new file mode 100644 index 0000000000..0969fdf956 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_share.stdout @@ -0,0 +1,4 @@ +275599 +3801088 +275599 +2228224 diff --git a/libraries/ghc-compact/tests/compact_simple.hs b/libraries/ghc-compact/tests/compact_simple.hs new file mode 100644 index 0000000000..28575d20d0 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_simple.hs @@ -0,0 +1,37 @@ +module Main where + +import Control.Exception +import System.Mem + +import GHC.Compact + +assertFail :: String -> IO () +assertFail msg = throwIO $ AssertionFailed msg + +assertEquals :: (Eq a, Show a) => a -> a -> IO () +assertEquals expected actual = + if expected == actual then return () + else assertFail $ "expected " ++ (show expected) + ++ ", got " ++ (show actual) + +-- test :: (Word -> a -> IO (Maybe (Compact a))) -> IO () +test func = do + let val = ("hello", 1, 42, 42, Just 42) :: + (String, Int, Int, Integer, Maybe Int) + str <- func val + + -- check that val is still good + assertEquals ("hello", 1, 42, 42, Just 42) val + -- check the value in the compact + assertEquals ("hello", 1, 42, 42, Just 42) (getCompact str) + performMajorGC + -- check again val + assertEquals ("hello", 1, 42, 42, Just 42) val + -- check again the value in the compact + assertEquals ("hello", 1, 42, 42, Just 42) (getCompact str) + + print =<< compactSize str + +main = do + test compactWithSharing + test compact diff --git a/libraries/ghc-compact/tests/compact_simple.stdout b/libraries/ghc-compact/tests/compact_simple.stdout new file mode 100644 index 0000000000..5549a58580 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_simple.stdout @@ -0,0 +1,2 @@ +32768 +32768 diff --git a/libraries/ghc-compact/tests/compact_simple_array.hs b/libraries/ghc-compact/tests/compact_simple_array.hs new file mode 100644 index 0000000000..b897e610f4 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_simple_array.hs @@ -0,0 +1,56 @@ +module Main where + +import Control.Exception +import Control.Monad +import System.Mem + +import Control.Monad.ST +import Data.Array +import Data.Array.ST +import qualified Data.Array.Unboxed as U + +import GHC.Compact + +assertFail :: String -> IO () +assertFail msg = throwIO $ AssertionFailed msg + +assertEquals :: (Eq a, Show a) => a -> a -> IO () +assertEquals expected actual = + if expected == actual then return () + else assertFail $ "expected " ++ (show expected) + ++ ", got " ++ (show actual) + +arrTest :: (Monad m, MArray a e m, Num e) => m (a Int e) +arrTest = do + arr <- newArray (1, 10) 0 + forM_ [1..10] $ \j -> do + writeArray arr j (fromIntegral $ 2*j + 1) + return arr + +-- test :: (Word -> a -> IO (Maybe (Compact a))) -> IO () +test func = do + let fromList :: Array Int Int + fromList = listArray (1, 10) [1..] + frozen :: Array Int Int + frozen = runST $ do + arr <- arrTest :: ST s (STArray s Int Int) + freeze arr + stFrozen :: Array Int Int + stFrozen = runSTArray arrTest + unboxedFrozen :: U.UArray Int Int + unboxedFrozen = runSTUArray arrTest + + let val = (fromList, frozen, stFrozen, unboxedFrozen) + str <- func val + + -- check that val is still good + assertEquals (fromList, frozen, stFrozen, unboxedFrozen) val + -- check the value in the compact + assertEquals val (getCompact str) + performMajorGC + -- check again the value in the compact + assertEquals val (getCompact str) + +main = do + test (compactSized 4096 True) + test (compactSized 4096 False) diff --git a/libraries/ghc-compact/tests/compact_threads.hs b/libraries/ghc-compact/tests/compact_threads.hs new file mode 100644 index 0000000000..162612d034 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_threads.hs @@ -0,0 +1,20 @@ +import Control.Concurrent +import Control.Monad +import GHC.Compact +import qualified Data.Map as Map +import Data.Maybe +import System.Environment + +main = do + [n] <- map read <$> getArgs + c <- compact () + as <- forM [1..(n::Int)] $ \i -> async (compactAdd c (Just i)) + bs <- forM as $ \a -> async (getCompact <$> takeMVar a) + xs <- mapM takeMVar bs + print (sum (catMaybes xs)) + +async :: IO a -> IO (MVar a) +async io = do + m <- newEmptyMVar + forkIO (io >>= putMVar m) + return m diff --git a/libraries/ghc-compact/tests/compact_threads.stdout b/libraries/ghc-compact/tests/compact_threads.stdout new file mode 100644 index 0000000000..837e12b406 --- /dev/null +++ b/libraries/ghc-compact/tests/compact_threads.stdout @@ -0,0 +1 @@ +500500 |