summaryrefslogtreecommitdiff
path: root/compiler/utils/FastString.hs
diff options
context:
space:
mode:
authorMatthew Pickering <matthewtpickering@gmail.com>2019-08-17 18:35:09 +0100
committerMatthew Pickering <matthewtpickering@gmail.com>2019-08-17 18:35:09 +0100
commit5cff43d301b0773e2deeedb0a397da5e47cb89e1 (patch)
tree074d42d6a9cd8e82703b298d7f55b1078416dcd6 /compiler/utils/FastString.hs
parent47e162374051ed3e874ed7916cc811df288cbd95 (diff)
downloadhaskell-wip/faststring-allocs.tar.gz
FastString: Reimplement in terms of pointer equalitywip/faststring-allocs
GHC allocates a lot of FastStrings when compiling so reducing the overhead of a FastString will result in a large decrease in allocations. Therefore we remove all the unecessary fields from the FastString. 1. Unique: Computed from the pointer 2. Equality: Computed directly by comparing the pointer 3. Length: Only used in 3 places, so the overhead is not worth it. 4. HasZEncoding: Caching the zencoding for a string should be localised to code generation rather than any consumer having to pay a large allocation penalty. Comparing pointer equality is safe as the string table guarantees that each string is only allocated once. As the memory is pinned, we don't have to worry about GC moving around the locations of the strings. Thanks to Jamie (Hanue) Willis for suggesting the idea of using pointer equality.
Diffstat (limited to 'compiler/utils/FastString.hs')
-rw-r--r--compiler/utils/FastString.hs107
1 files changed, 41 insertions, 66 deletions
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 0db61ec93f..0023b2ee53 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -1,7 +1,7 @@
-- (c) The University of Glasgow, 1997-2006
{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
- GeneralizedNewtypeDeriving #-}
+ GeneralizedNewtypeDeriving, RecordWildCards, BangPatterns #-}
{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
-- We always optimise this, otherwise performance of a non-optimised
-- compiler is severely affected
@@ -62,7 +62,6 @@ module FastString
zEncodeFS,
-- ** Operations
- uniqueOfFS,
lengthFS,
nullFS,
appendFS,
@@ -78,7 +77,7 @@ module FastString
-- ** Internal
getFastStringTable,
- hasZEncoding,
+ uniqueOfFS,
-- * PtrStrings
PtrString (..),
@@ -117,7 +116,6 @@ import GHC.Exts
import System.IO
import Data.Data
import Data.IORef
-import Data.Maybe ( isJust )
import Data.Char
import Data.Semigroup as Semi
@@ -130,6 +128,7 @@ import GHC.Conc.Sync (sharedCAF)
#endif
import GHC.Base ( unpackCString#, unpackNBytes# )
+import GHC.ForeignPtr
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
@@ -148,7 +147,7 @@ unsafeMkByteString :: String -> ByteString
unsafeMkByteString = BSC.pack
hashFastString :: FastString -> Int
-hashFastString (FastString _ _ bs _)
+hashFastString (FastString bs)
= inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
return $ hashStr (castPtr ptr) len
@@ -181,15 +180,18 @@ Z-encoding used by the compiler internally.
'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
-}
-data FastString = FastString {
- uniq :: {-# UNPACK #-} !Int, -- unique id
- n_chars :: {-# UNPACK #-} !Int, -- number of chars
- fs_bs :: {-# UNPACK #-} !ByteString,
- fs_ref :: {-# UNPACK #-} !(IORef (Maybe FastZString))
+newtype FastString = FastString {
+ fs_bs :: ByteString
}
+
+-- It is sufficient to test pointer equality as we guarantee that
+-- each string is uniquely allocated.
instance Eq FastString where
- f1 == f2 = uniq f1 == uniq f2
+ f1 == f2 =
+ case (fs_bs f1, fs_bs f2) of
+ ((BS.PS fp i _len), (BS.PS fp' i' _len1)) -> fp == fp' && i == i'
+ {-# NOINLINE (==) #-}
instance Ord FastString where
-- Compares lexicographically, not by unique
@@ -224,7 +226,7 @@ instance Data FastString where
dataTypeOf _ = mkNoRepType "FastString"
cmpFS :: FastString -> FastString -> Ordering
-cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
+cmpFS f1@(FastString u1) f2@(FastString u2) =
if u1 == u2 then EQ else
compare (bytesFS f1) (bytesFS f2)
@@ -396,7 +398,7 @@ The procedure goes like this:
* Otherwise, insert and return the string we created.
-}
-mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
+mkFastStringWith :: IO FastString -> Ptr Word8 -> Int -> IO FastString
mkFastStringWith mk_fs !ptr !len = do
FastStringTableSegment lock _ buckets# <- readIORef segmentRef
let idx# = hashToIndex# buckets# hash#
@@ -408,16 +410,14 @@ mkFastStringWith mk_fs !ptr !len = do
-- The withMVar below is not dupable. It can lead to deadlock if it is
-- only run partially and putMVar is not called after takeMVar.
noDuplicate
- n <- get_uid
- new_fs <- mk_fs n
+ !new_fs <- mk_fs
withMVar lock $ \_ -> insert new_fs
where
- !(FastStringTable uid segments#) = stringTable
- get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
+ !(FastStringTable _uid segments#) = stringTable
!(I# hash#) = hashStr ptr len
(# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
- insert fs = do
+ insert !fs = do
FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
let idx# = hashToIndex# buckets# hash#
bucket <- IO $ readArray# buckets# idx#
@@ -435,7 +435,7 @@ mkFastStringWith mk_fs !ptr !len = do
bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
bucket_match [] _ _ = return Nothing
-bucket_match (v@(FastString _ _ bs _):ls) len ptr
+bucket_match (v@(FastString bs):ls) len ptr
| len == BS.length bs = do
b <- BS.unsafeUseAsCString bs $ \buf ->
cmpStringPrefix ptr (castPtr buf) len
@@ -456,7 +456,7 @@ mkFastStringBytes !ptr !len =
-- the bytes if the string is new to the table.
mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkFastStringForeignPtr ptr !fp len
- = mkFastStringWith (mkNewFastString fp ptr len) ptr len
+ = mkFastStringWith (mkNewFastString fp len) ptr len
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
@@ -466,7 +466,7 @@ mkFastStringByteString bs =
inlinePerformIO $
BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
let ptr' = castPtr ptr
- mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
+ mkFastStringWith (mkNewFastStringByteString bs) ptr' len
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
@@ -486,26 +486,20 @@ mkFastStringByteList str = mkFastStringByteString (BS.pack str)
mkZFastString :: String -> FastZString
mkZFastString = mkFastZStringString
-mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
+mkNewFastString :: ForeignPtr Word8 -> Int
-> IO FastString
-mkNewFastString fp ptr len uid = do
- ref <- newIORef Nothing
- n_chars <- countUTF8Chars ptr len
- return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
+mkNewFastString fp len = do
+ return (FastString (BS.fromForeignPtr fp 0 len))
-mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
+mkNewFastStringByteString :: ByteString
-> IO FastString
-mkNewFastStringByteString bs ptr len uid = do
- ref <- newIORef Nothing
- n_chars <- countUTF8Chars ptr len
- return (FastString uid n_chars bs ref)
+mkNewFastStringByteString bs = do
+ return (FastString bs)
-copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
-copyNewFastString ptr len uid = do
+copyNewFastString :: Ptr Word8 -> Int -> IO FastString
+copyNewFastString ptr len = do
fp <- copyBytesToForeignPtr ptr len
- ref <- newIORef Nothing
- n_chars <- countUTF8Chars ptr len
- return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
+ return (FastString (BS.fromForeignPtr fp 0 len))
copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
copyBytesToForeignPtr ptr len = do
@@ -534,15 +528,7 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0#
-- | Returns the length of the 'FastString' in characters
lengthFS :: FastString -> Int
-lengthFS f = n_chars f
-
--- | Returns @True@ if this 'FastString' is not Z-encoded but already has
--- a Z-encoding cached (used in producing stats).
-hasZEncoding :: FastString -> Bool
-hasZEncoding (FastString _ _ _ ref) =
- inlinePerformIO $ do
- m <- readIORef ref
- return (isJust m)
+lengthFS (FastString fs) = BS.length fs
-- | Returns @True@ if the 'FastString' is empty
nullFS :: FastString -> Bool
@@ -550,24 +536,10 @@ nullFS f = BS.null (fs_bs f)
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
-unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
+unpackFS (FastString bs) = utf8DecodeByteString bs
--- | Returns a Z-encoded version of a 'FastString'. This might be the
--- original, if it was already Z-encoded. The first time this
--- function is applied to a particular 'FastString', the results are
--- memoized.
---
zEncodeFS :: FastString -> FastZString
-zEncodeFS fs@(FastString _ _ _ ref) =
- inlinePerformIO $ do
- m <- readIORef ref
- case m of
- Just zfs -> return zfs
- Nothing -> do
- atomicModifyIORef' ref $ \m' -> case m' of
- Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
- in (Just zfs, zfs)
- Just zfs -> (m', zfs)
+zEncodeFS fs = mkZFastString (zEncodeString (unpackFS fs))
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = mkFastStringByteString
@@ -577,14 +549,13 @@ concatFS :: [FastString] -> FastString
concatFS = mkFastStringByteString . BS.concat . map fs_bs
headFS :: FastString -> Char
-headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
-headFS (FastString _ _ bs _) =
+headFS (FastString bs) =
inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
return (fst (utf8DecodeChar (castPtr ptr)))
tailFS :: FastString -> FastString
-tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
-tailFS (FastString _ _ bs _) =
+--tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
+tailFS (FastString bs) =
inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
do let (_, n) = utf8DecodeChar (castPtr ptr)
return $! mkFastStringByteString (BS.drop n bs)
@@ -592,8 +563,12 @@ tailFS (FastString _ _ bs _) =
consFS :: Char -> FastString -> FastString
consFS c fs = mkFastString (c : unpackFS fs)
+-- MP: I think this is safe because the bytestring will take up
+-- the start position + (length * words) space, so adding the offset will
+-- still be a memory position within the bytestring
uniqueOfFS :: FastString -> Int
-uniqueOfFS (FastString u _ _ _) = u
+uniqueOfFS (FastString (BS.PS (ForeignPtr a _) o _)) =
+ (I# (addr2Int# a)) + o
nilFS :: FastString
nilFS = mkFastString ""