summaryrefslogtreecommitdiff
path: root/libraries/compact/Data/Compact.hs
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-07-29 14:11:03 +0100
committerSimon Marlow <marlowsd@gmail.com>2016-12-07 10:59:35 +0000
commit7036fde9df61b6eae9719c7f6c656778c756bec9 (patch)
treea9d8eeaaf0d611dc7f29f2d5734b5be8218f32fc /libraries/compact/Data/Compact.hs
parent4dd6b37fd540ad0243057f4aa29a93590d98de88 (diff)
downloadhaskell-7036fde9df61b6eae9719c7f6c656778c756bec9.tar.gz
Overhaul of Compact Regions (#12455)
Summary: This commit makes various improvements and addresses some issues with Compact Regions (aka Compact Normal Forms). This was the most important thing I wanted to fix. Compaction previously prevented GC from running until it was complete, which would be a problem in a multicore setting. Now, we compact using a hand-written Cmm routine that can be interrupted at any point. When a GC is triggered during a sharing-enabled compaction, the GC has to traverse and update the hash table, so this hash table is now stored in the StgCompactNFData object. Previously, compaction consisted of a deepseq using the NFData class, followed by a traversal in C code to copy the data. This is now done in a single pass with hand-written Cmm (see rts/Compact.cmm). We no longer use the NFData instances, instead the Cmm routine evaluates components directly as it compacts. The new compaction is about 50% faster than the old one with no sharing, and a little faster on average with sharing (the cost of the hash table dominates when we're doing sharing). Static objects that don't (transitively) refer to any CAFs don't need to be copied into the compact region. In particular this means we often avoid copying Char values and small Int values, because these are static closures in the runtime. Each Compact# object can support a single compactAdd# operation at any given time, so the Data.Compact library now enforces mutual exclusion using an MVar stored in the Compact object. We now get exceptions rather than killing everything with a barf() when we encounter an object that cannot be compacted (a function, or a mutable object). We now also detect pinned objects, which can't be compacted either. The Data.Compact API has been refactored and cleaned up. A new compactSize operation returns the size (in bytes) of the compact object. Most of the documentation is in the Haddock docs for the compact library, which I've expanded and improved here. Various comments in the code have been improved, especially the main Note [Compact Normal Forms] in rts/sm/CNF.c. I've added a few tests, and expanded a few of the tests that were there. We now also run the tests with GHCi, and in a new test way that enables sanity checking (+RTS -DS). There's a benchmark in libraries/compact/tests/compact_bench.hs for measuring compaction speed and comparing sharing vs. no sharing. The field totalDataW in StgCompactNFData was unnecessary. Test Plan: * new unit tests * validate * tested manually that we can compact Data.Aeson data Reviewers: gcampax, bgamari, ezyang, austin, niteria, hvr, erikd Subscribers: thomie, simonpj Differential Revision: https://phabricator.haskell.org/D2751 GHC Trac Issues: #12455
Diffstat (limited to 'libraries/compact/Data/Compact.hs')
-rw-r--r--libraries/compact/Data/Compact.hs151
1 files changed, 91 insertions, 60 deletions
diff --git a/libraries/compact/Data/Compact.hs b/libraries/compact/Data/Compact.hs
index 7cedd1c27a..85d1b623b4 100644
--- a/libraries/compact/Data/Compact.hs
+++ b/libraries/compact/Data/Compact.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
+{-# OPTIONS_GHC -Wno-redundant-constraints -Wno-name-shadowing #-}
-----------------------------------------------------------------------------
-- |
@@ -18,72 +19,102 @@
-- holding fully evaluated data in a consecutive block of memory.
--
-- /Since: 1.0.0/
+
module Data.Compact (
+ -- * The Compact type
Compact,
+
+ -- * Compacting data
+ compact,
+ compactWithSharing,
+ compactAdd,
+ compactAddWithSharing,
+
+ -- * Inspecting a Compact
getCompact,
inCompact,
isCompact,
+ compactSize,
- newCompact,
- newCompactNoShare,
- appendCompact,
- appendCompactNoShare,
+ -- * Other utilities
+ compactResize,
) where
--- Write down all GHC.Prim deps explicitly to keep them at minimum
-import GHC.Prim (Compact#,
- compactNew#,
- State#,
- RealWorld,
- Int#,
- )
--- We need to import Word from GHC.Types to see the representation
--- and to able to access the Word# to pass down the primops
-import GHC.Types (IO(..), Word(..))
-
-import Control.DeepSeq (NFData, force)
-
-import Data.Compact.Internal(Compact(..),
- isCompact,
- inCompact,
- compactAppendEvaledInternal)
-
--- |Retrieve the object that was stored in a Compact
+import Control.Concurrent
+import Control.DeepSeq (NFData)
+import GHC.Prim
+import GHC.Types
+
+import Data.Compact.Internal as Internal
+
+-- | Retrieve the object that was stored in a 'Compact'
getCompact :: Compact a -> a
-getCompact (Compact _ obj) = obj
-
-compactAppendInternal :: NFData a => Compact# -> a -> Int# ->
- State# RealWorld -> (# State# RealWorld, Compact a #)
-compactAppendInternal buffer root share s =
- case force root of
- !eval -> compactAppendEvaledInternal buffer eval share s
-
-compactAppendInternalIO :: NFData a => Int# -> Compact b -> a -> IO (Compact a)
-compactAppendInternalIO share (Compact buffer _) root =
- IO (\s -> compactAppendInternal buffer root share s)
-
--- |Append a value to a 'Compact', and return a new 'Compact'
--- that shares the same buffer but a different root object.
-appendCompact :: NFData a => Compact b -> a -> IO (Compact a)
-appendCompact = compactAppendInternalIO 1#
-
--- |Append a value to a 'Compact'. This function differs from
--- 'appendCompact' in that it will not preserve internal sharing
--- in the passed in value (and it will diverge on cyclic structures).
-appendCompactNoShare :: NFData a => Compact b -> a -> IO (Compact a)
-appendCompactNoShare = compactAppendInternalIO 0#
-
-compactNewInternal :: NFData a => Int# -> Word -> a -> IO (Compact a)
-compactNewInternal share (W# size) root =
- IO (\s -> case compactNew# size s of
- (# s', buffer #) -> compactAppendInternal buffer root share s' )
-
--- |Create a new 'Compact', with the provided value as suggested block
--- size (which will be adjusted if unsuitable), and append the given
--- value to it, as if calling 'appendCompact'
-newCompact :: NFData a => Word -> a -> IO (Compact a)
-newCompact = compactNewInternal 1#
-
--- |Create a new 'Compact', but append the value using 'appendCompactNoShare'
-newCompactNoShare :: NFData a => Word -> a -> IO (Compact a)
-newCompactNoShare = compactNewInternal 0#
+getCompact (Compact _ obj _) = obj
+
+-- | Compact a value. /O(size of unshared data)/
+--
+-- If the structure contains any internal sharing, the shared data
+-- will be duplicated during the compaction process. Loops if the
+-- structure constains cycles.
+--
+-- The NFData constraint is just to ensure that the object contains no
+-- functions, 'compact' does not actually use it. If your object
+-- contains any functions, then 'compact' will fail. (and your
+-- 'NFData' instance is lying).
+--
+compact :: NFData a => a -> IO (Compact a)
+compact = Internal.compactSized 31268 False
+
+-- | Compact a value, retaining any internal sharing and
+-- cycles. /O(size of data)/
+--
+-- This is typically about 10x slower than 'compact', because it works
+-- by maintaining a hash table mapping uncompacted objects to
+-- compacted objects.
+--
+-- The 'NFData' constraint is just to ensure that the object contains no
+-- functions, `compact` does not actually use it. If your object
+-- contains any functions, then 'compactWithSharing' will fail. (and
+-- your 'NFData' instance is lying).
+--
+compactWithSharing :: NFData a => a -> IO (Compact a)
+compactWithSharing = Internal.compactSized 31268 True
+
+-- | Add a value to an existing 'Compact'. Behaves exactly like
+-- 'compact' with respect to sharing and the 'NFData' constraint.
+compactAdd :: NFData a => Compact b -> a -> IO (Compact a)
+compactAdd (Compact compact# _ lock) a = withMVar lock $ \_ -> IO $ \s ->
+ case compactAdd# compact# a s of { (# s1, pk #) ->
+ (# s1, Compact compact# pk lock #) }
+
+-- | Add a value to an existing 'Compact'. Behaves exactly like
+-- 'compactWithSharing' with respect to sharing and the 'NFData'
+-- constraint.
+compactAddWithSharing :: NFData a => Compact b -> a -> IO (Compact a)
+compactAddWithSharing (Compact compact# _ lock) a =
+ withMVar lock $ \_ -> IO $ \s ->
+ case compactAddWithSharing# compact# a s of { (# s1, pk #) ->
+ (# s1, Compact compact# pk lock #) }
+
+
+-- | Check if the second argument is inside the 'Compact'
+inCompact :: Compact b -> a -> IO Bool
+inCompact (Compact buffer _ _) !val =
+ IO (\s -> case compactContains# buffer val s of
+ (# s', v #) -> (# s', isTrue# v #) )
+
+-- | Check if the argument is in any 'Compact'
+isCompact :: a -> IO Bool
+isCompact !val =
+ IO (\s -> case compactContainsAny# val s of
+ (# s', v #) -> (# s', isTrue# v #) )
+
+compactSize :: Compact a -> IO Word
+compactSize (Compact buffer _ lock) = withMVar lock $ \_ -> IO $ \s0 ->
+ case compactSize# buffer s0 of (# s1, sz #) -> (# s1, W# sz #)
+
+compactResize :: Compact a -> Word -> IO ()
+compactResize (Compact oldBuffer _ lock) (W# new_size) =
+ withMVar lock $ \_ -> IO $ \s ->
+ case compactResize# oldBuffer new_size s of
+ s' -> (# s', () #)