summaryrefslogtreecommitdiff
path: root/compiler/utils/FastString.lhs
diff options
context:
space:
mode:
authorPatrick Palka <patrick@parcs.ath.cx>2013-08-21 16:03:37 -0400
committerPatrick Palka <patrick@parcs.ath.cx>2013-08-26 22:21:16 -0400
commitd295a9446975e1ed789ac98e17dd9d2435cd8210 (patch)
tree598a89b8b3bb0cd755ee08f5bb4fb8caadbfdce6 /compiler/utils/FastString.lhs
parentb0a20f26a777566a6f87f2e597682004513ef186 (diff)
downloadhaskell-d295a9446975e1ed789ac98e17dd9d2435cd8210.tar.gz
FastString: make the string table thread-safe
While we're at it, consolidate duplicate code into a helper function and strictify a few arguments.
Diffstat (limited to 'compiler/utils/FastString.lhs')
-rw-r--r--compiler/utils/FastString.lhs188
1 files changed, 102 insertions, 86 deletions
diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs
index 25f98021f4..4e4a46800d 100644
--- a/compiler/utils/FastString.lhs
+++ b/compiler/utils/FastString.lhs
@@ -102,6 +102,7 @@ import FastFunctions
import Panic
import Util
+import Control.Monad
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
@@ -112,11 +113,12 @@ import GHC.Exts
import System.IO
import System.IO.Unsafe ( unsafePerformIO )
import Data.Data
-import Data.IORef ( IORef, newIORef, readIORef, writeIORef )
+import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef )
import Data.Maybe ( isJust )
import Data.Char
+import Data.List ( elemIndex )
-import GHC.IO ( IO(..) )
+import GHC.IO ( IO(..), unsafeDupablePerformIO )
import Foreign.Safe
@@ -218,30 +220,37 @@ foreign import ccall unsafe "ghc_memcmp"
-- Construction
{-
-Internally, the compiler will maintain a fast string symbol
-table, providing sharing and fast comparison. Creation of
-new @FastString@s then covertly does a lookup, re-using the
-@FastString@ if there was a hit.
--}
+Internally, the compiler will maintain a fast string symbol table, providing
+sharing and fast comparison. Creation of new @FastString@s then covertly does a
+lookup, re-using the @FastString@ if there was a hit.
+
+The design of the FastString hash table allows for lockless concurrent reads
+and updates to multiple buckets with low synchronization overhead.
+See Note [Updating the FastString table] on how it's updated.
+-}
data FastStringTable =
FastStringTable
- {-# UNPACK #-} !Int
- (MutableArray# RealWorld [FastString])
+ {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
+ (MutableArray# RealWorld (IORef [FastString])) -- the array of mutable buckets
-string_table :: IORef FastStringTable
+string_table :: FastStringTable
{-# NOINLINE string_table #-}
string_table = unsafePerformIO $ do
- tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
+ uid <- newIORef 0
+ tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED (panic "string_table") s1# of
(# s2#, arr# #) ->
- (# s2#, FastStringTable 0 arr# #)
- ref <- newIORef tab
+ (# s2#, FastStringTable uid arr# #)
+ forM_ [0..hASH_TBL_SIZE-1] $ \i -> do
+ bucket <- newIORef []
+ updTbl tab i bucket
+
-- use the support wired into the RTS to share this CAF among all images of
-- libHSghc
#if STAGE < 2
- return ref
+ return tab
#else
- sharedCAF ref getOrSetLibHSghcFastStringTable
+ sharedCAF tab getOrSetLibHSghcFastStringTable
-- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous
-- RTS might not have this symbol
@@ -287,87 +296,92 @@ lower-level `sharedCAF` mechanism that relies on Globals.c.
-}
-lookupTbl :: FastStringTable -> Int -> IO [FastString]
+lookupTbl :: FastStringTable -> Int -> IO (IORef [FastString])
lookupTbl (FastStringTable _ arr#) (I# i#) =
IO $ \ s# -> readArray# arr# i# s#
-updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
-updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
+updTbl :: FastStringTable -> Int -> IORef [FastString] -> IO ()
+updTbl (FastStringTable _uid arr#) (I# i#) ls = do
(IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
- writeIORef fs_table_var (FastStringTable (uid+1) arr#)
mkFastString# :: Addr# -> FastString
mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
where ptr = Ptr a#
+{- Note [Updating the FastString table]
+
+The procedure goes like this:
+
+1. Read the relevant bucket and perform a look up of the string.
+2. If it exists, return it.
+3. Otherwise grab a unique ID, create a new FastString and atomically attempt
+ to update the relevant bucket with this FastString:
+
+ * Double check that the string is not in the bucket. Another thread may have
+ inserted it while we were creating our string.
+ * Return the existing FastString if it exists. The one we preemptively
+ created will get GCed.
+ * Otherwise, insert and return the string we created.
+-}
+
+{- Note [Double-checking the bucket]
+
+It is not necessary to check the entire bucket the second time. We only have to
+check the strings that are new to the bucket since the last time we read it.
+-}
+
+mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
+mkFastStringWith mk_fs !ptr !len = do
+ let hash = hashStr ptr len
+ bucket <- lookupTbl string_table hash
+ ls1 <- readIORef bucket
+ res <- bucket_match ls1 len ptr
+ case res of
+ Just v -> return v
+ Nothing -> do
+ n <- get_uid
+ new_fs <- mk_fs n
+
+ atomicModifyIORef bucket $ \ls2 ->
+ -- Note [Double-checking the bucket]
+ let delta_ls = case ls1 of
+ [] -> ls2
+ l:_ -> case l `elemIndex` ls2 of
+ Nothing -> panic "mkFastStringWith"
+ Just idx -> take idx ls2
+
+ -- NB: Might as well use inlinePerformIO, since the call to
+ -- bucket_match doesn't perform any IO that could be floated
+ -- out of this closure or erroneously duplicated.
+ in case inlinePerformIO (bucket_match delta_ls len ptr) of
+ Nothing -> (new_fs:ls2, new_fs)
+ Just fs -> (ls2,fs)
+ where
+ !(FastStringTable uid _arr) = string_table
+
+ get_uid = atomicModifyIORef uid $ \n -> (n+1,n)
+
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
-mkFastStringBytes ptr len = unsafePerformIO $ do
- ft@(FastStringTable uid _) <- readIORef string_table
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- copyNewFastString uid ptr len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+mkFastStringBytes !ptr !len =
+ -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
+ -- idempotent.
+ unsafeDupablePerformIO $
+ mkFastStringWith (copyNewFastString ptr len) ptr len
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
-- the bytes if the string is new to the table.
mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
-mkFastStringForeignPtr ptr fp len = do
- ft@(FastStringTable uid _) <- readIORef string_table
--- _trace ("hashed: "++show (I# h)) $
- let
- h = hashStr ptr len
- add_it ls = do
- fs <- mkNewFastString uid ptr fp len
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+mkFastStringForeignPtr ptr !fp len
+ = mkFastStringWith (mkNewFastString fp ptr len) ptr len
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
-- the bytes if the string is new to the table.
mkFastStringByteString :: ByteString -> IO FastString
mkFastStringByteString bs = BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
- ft@(FastStringTable uid _) <- readIORef string_table
--- _trace ("hashed: "++show (I# h)) $
- let
- ptr' = castPtr ptr
- h = hashStr ptr' len
- add_it ls = do
- fs <- mkNewFastStringByteString uid ptr' len bs
- updTbl string_table ft h (fs:ls)
- {- _trace ("new: " ++ show f_str) $ -}
- return fs
- --
- lookup_result <- lookupTbl ft h
- case lookup_result of
- [] -> add_it []
- ls -> do
- b <- bucket_match ls len ptr'
- case b of
- Nothing -> add_it ls
- Just v -> {- _trace ("re-use: "++show v) $ -} return v
+ let ptr' = castPtr ptr
+ mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
@@ -404,22 +418,22 @@ bucket_match (v@(FastString _ _ bs _):ls) len ptr
| otherwise =
bucket_match ls len ptr
-mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
+mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
-> IO FastString
-mkNewFastString uid ptr fp len = do
+mkNewFastString fp ptr len uid = do
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
-mkNewFastStringByteString :: Int -> Ptr Word8 -> Int -> ByteString
+mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
-> IO FastString
-mkNewFastStringByteString uid ptr len bs = do
+mkNewFastStringByteString bs ptr len uid = do
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
return (FastString uid n_chars bs ref)
-copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString
-copyNewFastString uid ptr len = do
+copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
+copyNewFastString ptr len uid = do
fp <- copyBytesToForeignPtr ptr len
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
@@ -488,9 +502,10 @@ zEncodeFS fs@(FastString _ _ _ ref) =
case m of
Just zfs -> return zfs
Nothing -> do
- let zfs = mkZFastString (zEncodeString (unpackFS fs))
- writeIORef ref (Just zfs)
- return zfs
+ atomicModifyIORef ref $ \m' -> case m' of
+ Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
+ in (Just zfs, zfs)
+ Just zfs -> (m', zfs)
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = inlinePerformIO
@@ -529,8 +544,9 @@ nilFS = mkFastString ""
getFastStringTable :: IO [[FastString]]
getFastStringTable = do
- tbl <- readIORef string_table
- buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
+ buckets <- forM [0..hASH_TBL_SIZE-1] $ \idx -> do
+ bucket <- lookupTbl string_table idx
+ readIORef bucket
return buckets
-- -----------------------------------------------------------------------------