diff options
author | Patrick Palka <patrick@parcs.ath.cx> | 2013-08-21 16:03:37 -0400 |
---|---|---|
committer | Patrick Palka <patrick@parcs.ath.cx> | 2013-08-26 22:21:16 -0400 |
commit | d295a9446975e1ed789ac98e17dd9d2435cd8210 (patch) | |
tree | 598a89b8b3bb0cd755ee08f5bb4fb8caadbfdce6 /compiler/utils/FastString.lhs | |
parent | b0a20f26a777566a6f87f2e597682004513ef186 (diff) | |
download | haskell-d295a9446975e1ed789ac98e17dd9d2435cd8210.tar.gz |
FastString: make the string table thread-safe
While we're at it, consolidate duplicate code into a helper function and
strictify a few arguments.
Diffstat (limited to 'compiler/utils/FastString.lhs')
-rw-r--r-- | compiler/utils/FastString.lhs | 188 |
1 files changed, 102 insertions, 86 deletions
diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs index 25f98021f4..4e4a46800d 100644 --- a/compiler/utils/FastString.lhs +++ b/compiler/utils/FastString.lhs @@ -102,6 +102,7 @@ import FastFunctions import Panic import Util +import Control.Monad import Data.ByteString (ByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC @@ -112,11 +113,12 @@ import GHC.Exts import System.IO import System.IO.Unsafe ( unsafePerformIO ) import Data.Data -import Data.IORef ( IORef, newIORef, readIORef, writeIORef ) +import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef ) import Data.Maybe ( isJust ) import Data.Char +import Data.List ( elemIndex ) -import GHC.IO ( IO(..) ) +import GHC.IO ( IO(..), unsafeDupablePerformIO ) import Foreign.Safe @@ -218,30 +220,37 @@ foreign import ccall unsafe "ghc_memcmp" -- Construction {- -Internally, the compiler will maintain a fast string symbol -table, providing sharing and fast comparison. Creation of -new @FastString@s then covertly does a lookup, re-using the -@FastString@ if there was a hit. --} +Internally, the compiler will maintain a fast string symbol table, providing +sharing and fast comparison. Creation of new @FastString@s then covertly does a +lookup, re-using the @FastString@ if there was a hit. + +The design of the FastString hash table allows for lockless concurrent reads +and updates to multiple buckets with low synchronization overhead. +See Note [Updating the FastString table] on how it's updated. +-} data FastStringTable = FastStringTable - {-# UNPACK #-} !Int - (MutableArray# RealWorld [FastString]) + {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets + (MutableArray# RealWorld (IORef [FastString])) -- the array of mutable buckets -string_table :: IORef FastStringTable +string_table :: FastStringTable {-# NOINLINE string_table #-} string_table = unsafePerformIO $ do - tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of + uid <- newIORef 0 + tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED (panic "string_table") s1# of (# s2#, arr# #) -> - (# s2#, FastStringTable 0 arr# #) - ref <- newIORef tab + (# s2#, FastStringTable uid arr# #) + forM_ [0..hASH_TBL_SIZE-1] $ \i -> do + bucket <- newIORef [] + updTbl tab i bucket + -- use the support wired into the RTS to share this CAF among all images of -- libHSghc #if STAGE < 2 - return ref + return tab #else - sharedCAF ref getOrSetLibHSghcFastStringTable + sharedCAF tab getOrSetLibHSghcFastStringTable -- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous -- RTS might not have this symbol @@ -287,87 +296,92 @@ lower-level `sharedCAF` mechanism that relies on Globals.c. -} -lookupTbl :: FastStringTable -> Int -> IO [FastString] +lookupTbl :: FastStringTable -> Int -> IO (IORef [FastString]) lookupTbl (FastStringTable _ arr#) (I# i#) = IO $ \ s# -> readArray# arr# i# s# -updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO () -updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do +updTbl :: FastStringTable -> Int -> IORef [FastString] -> IO () +updTbl (FastStringTable _uid arr#) (I# i#) ls = do (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) }) - writeIORef fs_table_var (FastStringTable (uid+1) arr#) mkFastString# :: Addr# -> FastString mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr) where ptr = Ptr a# +{- Note [Updating the FastString table] + +The procedure goes like this: + +1. Read the relevant bucket and perform a look up of the string. +2. If it exists, return it. +3. Otherwise grab a unique ID, create a new FastString and atomically attempt + to update the relevant bucket with this FastString: + + * Double check that the string is not in the bucket. Another thread may have + inserted it while we were creating our string. + * Return the existing FastString if it exists. The one we preemptively + created will get GCed. + * Otherwise, insert and return the string we created. +-} + +{- Note [Double-checking the bucket] + +It is not necessary to check the entire bucket the second time. We only have to +check the strings that are new to the bucket since the last time we read it. +-} + +mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString +mkFastStringWith mk_fs !ptr !len = do + let hash = hashStr ptr len + bucket <- lookupTbl string_table hash + ls1 <- readIORef bucket + res <- bucket_match ls1 len ptr + case res of + Just v -> return v + Nothing -> do + n <- get_uid + new_fs <- mk_fs n + + atomicModifyIORef bucket $ \ls2 -> + -- Note [Double-checking the bucket] + let delta_ls = case ls1 of + [] -> ls2 + l:_ -> case l `elemIndex` ls2 of + Nothing -> panic "mkFastStringWith" + Just idx -> take idx ls2 + + -- NB: Might as well use inlinePerformIO, since the call to + -- bucket_match doesn't perform any IO that could be floated + -- out of this closure or erroneously duplicated. + in case inlinePerformIO (bucket_match delta_ls len ptr) of + Nothing -> (new_fs:ls2, new_fs) + Just fs -> (ls2,fs) + where + !(FastStringTable uid _arr) = string_table + + get_uid = atomicModifyIORef uid $ \n -> (n+1,n) + mkFastStringBytes :: Ptr Word8 -> Int -> FastString -mkFastStringBytes ptr len = unsafePerformIO $ do - ft@(FastStringTable uid _) <- readIORef string_table - let - h = hashStr ptr len - add_it ls = do - fs <- copyNewFastString uid ptr len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v +mkFastStringBytes !ptr !len = + -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is + -- idempotent. + unsafeDupablePerformIO $ + mkFastStringWith (copyNewFastString ptr len) ptr len -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference -- between this and 'mkFastStringBytes' is that we don't have to copy -- the bytes if the string is new to the table. mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString -mkFastStringForeignPtr ptr fp len = do - ft@(FastStringTable uid _) <- readIORef string_table --- _trace ("hashed: "++show (I# h)) $ - let - h = hashStr ptr len - add_it ls = do - fs <- mkNewFastString uid ptr fp len - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v +mkFastStringForeignPtr ptr !fp len + = mkFastStringWith (mkNewFastString fp ptr len) ptr len -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference -- between this and 'mkFastStringBytes' is that we don't have to copy -- the bytes if the string is new to the table. mkFastStringByteString :: ByteString -> IO FastString mkFastStringByteString bs = BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do - ft@(FastStringTable uid _) <- readIORef string_table --- _trace ("hashed: "++show (I# h)) $ - let - ptr' = castPtr ptr - h = hashStr ptr' len - add_it ls = do - fs <- mkNewFastStringByteString uid ptr' len bs - updTbl string_table ft h (fs:ls) - {- _trace ("new: " ++ show f_str) $ -} - return fs - -- - lookup_result <- lookupTbl ft h - case lookup_result of - [] -> add_it [] - ls -> do - b <- bucket_match ls len ptr' - case b of - Nothing -> add_it ls - Just v -> {- _trace ("re-use: "++show v) $ -} return v + let ptr' = castPtr ptr + mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len -- | Creates a UTF-8 encoded 'FastString' from a 'String' mkFastString :: String -> FastString @@ -404,22 +418,22 @@ bucket_match (v@(FastString _ _ bs _):ls) len ptr | otherwise = bucket_match ls len ptr -mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int +mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int -> IO FastString -mkNewFastString uid ptr fp len = do +mkNewFastString fp ptr len uid = do ref <- newIORef Nothing n_chars <- countUTF8Chars ptr len return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref) -mkNewFastStringByteString :: Int -> Ptr Word8 -> Int -> ByteString +mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int -> IO FastString -mkNewFastStringByteString uid ptr len bs = do +mkNewFastStringByteString bs ptr len uid = do ref <- newIORef Nothing n_chars <- countUTF8Chars ptr len return (FastString uid n_chars bs ref) -copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString -copyNewFastString uid ptr len = do +copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString +copyNewFastString ptr len uid = do fp <- copyBytesToForeignPtr ptr len ref <- newIORef Nothing n_chars <- countUTF8Chars ptr len @@ -488,9 +502,10 @@ zEncodeFS fs@(FastString _ _ _ ref) = case m of Just zfs -> return zfs Nothing -> do - let zfs = mkZFastString (zEncodeString (unpackFS fs)) - writeIORef ref (Just zfs) - return zfs + atomicModifyIORef ref $ \m' -> case m' of + Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs)) + in (Just zfs, zfs) + Just zfs -> (m', zfs) appendFS :: FastString -> FastString -> FastString appendFS fs1 fs2 = inlinePerformIO @@ -529,8 +544,9 @@ nilFS = mkFastString "" getFastStringTable :: IO [[FastString]] getFastStringTable = do - tbl <- readIORef string_table - buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE] + buckets <- forM [0..hASH_TBL_SIZE-1] $ \idx -> do + bucket <- lookupTbl string_table idx + readIORef bucket return buckets -- ----------------------------------------------------------------------------- |