FastString: Reimplement in terms of pointer equalitywip/faststring-allocs

GHC allocates a lot of FastStrings when compiling so reducing the overhead of a FastString will result in a large decrease in allocations. Therefore we remove all the unecessary fields from the FastString. 1. Unique: Computed from the pointer 2. Equality: Computed directly by comparing the pointer 3. Length: Only used in 3 places, so the overhead is not worth it. 4. HasZEncoding: Caching the zencoding for a string should be localised to code generation rather than any consumer having to pay a large allocation penalty. Comparing pointer equality is safe as the string table guarantees that each string is only allocated once. As the memory is pinned, we don't have to worry about GC moving around the locations of the strings. Thanks to Jamie (Hanue) Willis for suggesting the idea of using pointer equality.
author: Matthew Pickering <matthewtpickering@gmail.com> 2019-08-17 18:35:09 +0100
committer: Matthew Pickering <matthewtpickering@gmail.com> 2019-08-17 18:35:09 +0100
commit: 5cff43d301b0773e2deeedb0a397da5e47cb89e1 (patch)
tree: 074d42d6a9cd8e82703b298d7f55b1078416dcd6
parent: 47e162374051ed3e874ed7916cc811df288cbd95 (diff)
download: haskell-wip/faststring-allocs.tar.gz
2 files changed, 41 insertions, 70 deletions
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 0db61ec93f..0023b2ee53 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -1,7 +1,7 @@
 -- (c) The University of Glasgow, 1997-2006
 
 {-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
-    GeneralizedNewtypeDeriving #-}
+    GeneralizedNewtypeDeriving, RecordWildCards, BangPatterns #-}
 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
 -- We always optimise this, otherwise performance of a non-optimised
 -- compiler is severely affected
@@ -62,7 +62,6 @@ module FastString
         zEncodeFS,
 
         -- ** Operations
-        uniqueOfFS,
         lengthFS,
         nullFS,
         appendFS,
@@ -78,7 +77,7 @@ module FastString
 
         -- ** Internal
         getFastStringTable,
-        hasZEncoding,
+        uniqueOfFS,
 
         -- * PtrStrings
         PtrString (..),
@@ -117,7 +116,6 @@ import GHC.Exts
 import System.IO
 import Data.Data
 import Data.IORef
-import Data.Maybe       ( isJust )
 import Data.Char
 import Data.Semigroup as Semi
 
@@ -130,6 +128,7 @@ import GHC.Conc.Sync    (sharedCAF)
 #endif
 
 import GHC.Base         ( unpackCString#, unpackNBytes# )
+import GHC.ForeignPtr
 
 
 -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
@@ -148,7 +147,7 @@ unsafeMkByteString :: String -> ByteString
 unsafeMkByteString = BSC.pack
 
 hashFastString :: FastString -> Int
-hashFastString (FastString _ _ bs _)
+hashFastString (FastString bs)
     = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
       return $ hashStr (castPtr ptr) len
 
@@ -181,15 +180,18 @@ Z-encoding used by the compiler internally.
 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
 -}
 
-data FastString = FastString {
-      uniq    :: {-# UNPACK #-} !Int, -- unique id
-      n_chars :: {-# UNPACK #-} !Int, -- number of chars
-      fs_bs   :: {-# UNPACK #-} !ByteString,
-      fs_ref  :: {-# UNPACK #-} !(IORef (Maybe FastZString))
+newtype FastString = FastString {
+      fs_bs   :: ByteString
   }
 
+
+-- It is sufficient to test pointer equality as we guarantee that
+-- each string is uniquely allocated.
 instance Eq FastString where
-  f1 == f2  =  uniq f1 == uniq f2
+  f1 == f2  =
+    case (fs_bs f1, fs_bs f2) of
+      ((BS.PS fp i _len), (BS.PS fp' i' _len1)) -> fp == fp' && i == i'
+  {-# NOINLINE (==) #-}
 
 instance Ord FastString where
     -- Compares lexicographically, not by unique
@@ -224,7 +226,7 @@ instance Data FastString where
   dataTypeOf _ = mkNoRepType "FastString"
 
 cmpFS :: FastString -> FastString -> Ordering
-cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
+cmpFS f1@(FastString u1) f2@(FastString u2) =
   if u1 == u2 then EQ else
   compare (bytesFS f1) (bytesFS f2)
 
@@ -396,7 +398,7 @@ The procedure goes like this:
    * Otherwise, insert and return the string we created.
 -}
 
-mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
+mkFastStringWith :: IO FastString -> Ptr Word8 -> Int -> IO FastString
 mkFastStringWith mk_fs !ptr !len = do
   FastStringTableSegment lock _ buckets# <- readIORef segmentRef
   let idx# = hashToIndex# buckets# hash#
@@ -408,16 +410,14 @@ mkFastStringWith mk_fs !ptr !len = do
       -- The withMVar below is not dupable. It can lead to deadlock if it is
       -- only run partially and putMVar is not called after takeMVar.
       noDuplicate
-      n <- get_uid
-      new_fs <- mk_fs n
+      !new_fs <- mk_fs
       withMVar lock $ \_ -> insert new_fs
   where
-    !(FastStringTable uid segments#) = stringTable
-    get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
+    !(FastStringTable _uid segments#) = stringTable
 
     !(I# hash#) = hashStr ptr len
     (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
-    insert fs = do
+    insert !fs = do
       FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
       let idx# = hashToIndex# buckets# hash#
       bucket <- IO $ readArray# buckets# idx#
@@ -435,7 +435,7 @@ mkFastStringWith mk_fs !ptr !len = do
 
 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
 bucket_match [] _ _ = return Nothing
-bucket_match (v@(FastString _ _ bs _):ls) len ptr
+bucket_match (v@(FastString bs):ls) len ptr
       | len == BS.length bs = do
          b <- BS.unsafeUseAsCString bs $ \buf ->
              cmpStringPrefix ptr (castPtr buf) len
@@ -456,7 +456,7 @@ mkFastStringBytes !ptr !len =
 -- the bytes if the string is new to the table.
 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
 mkFastStringForeignPtr ptr !fp len
-    = mkFastStringWith (mkNewFastString fp ptr len) ptr len
+    = mkFastStringWith (mkNewFastString fp len) ptr len
 
 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
 -- between this and 'mkFastStringBytes' is that we don't have to copy
@@ -466,7 +466,7 @@ mkFastStringByteString bs =
     inlinePerformIO $
       BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
         let ptr' = castPtr ptr
-        mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
+        mkFastStringWith (mkNewFastStringByteString bs) ptr' len
 
 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
 mkFastString :: String -> FastString
@@ -486,26 +486,20 @@ mkFastStringByteList str = mkFastStringByteString (BS.pack str)
 mkZFastString :: String -> FastZString
 mkZFastString = mkFastZStringString
 
-mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
+mkNewFastString :: ForeignPtr Word8 -> Int
                 -> IO FastString
-mkNewFastString fp ptr len uid = do
-  ref <- newIORef Nothing
-  n_chars <- countUTF8Chars ptr len
-  return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
+mkNewFastString fp len = do
+  return (FastString (BS.fromForeignPtr fp 0 len))
 
-mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
+mkNewFastStringByteString :: ByteString
                           -> IO FastString
-mkNewFastStringByteString bs ptr len uid = do
-  ref <- newIORef Nothing
-  n_chars <- countUTF8Chars ptr len
-  return (FastString uid n_chars bs ref)
+mkNewFastStringByteString bs = do
+  return (FastString bs)
 
-copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
-copyNewFastString ptr len uid = do
+copyNewFastString :: Ptr Word8 -> Int -> IO FastString
+copyNewFastString ptr len = do
   fp <- copyBytesToForeignPtr ptr len
-  ref <- newIORef Nothing
-  n_chars <- countUTF8Chars ptr len
-  return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
+  return (FastString (BS.fromForeignPtr fp 0 len))
 
 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
 copyBytesToForeignPtr ptr len = do
@@ -534,15 +528,7 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0#
 
 -- | Returns the length of the 'FastString' in characters
 lengthFS :: FastString -> Int
-lengthFS f = n_chars f
-
--- | Returns @True@ if this 'FastString' is not Z-encoded but already has
--- a Z-encoding cached (used in producing stats).
-hasZEncoding :: FastString -> Bool
-hasZEncoding (FastString _ _ _ ref) =
-      inlinePerformIO $ do
-        m <- readIORef ref
-        return (isJust m)
+lengthFS (FastString fs) = BS.length fs
 
 -- | Returns @True@ if the 'FastString' is empty
 nullFS :: FastString -> Bool
@@ -550,24 +536,10 @@ nullFS f = BS.null (fs_bs f)
 
 -- | Unpacks and decodes the FastString
 unpackFS :: FastString -> String
-unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
+unpackFS (FastString bs) = utf8DecodeByteString bs
 
--- | Returns a Z-encoded version of a 'FastString'.  This might be the
--- original, if it was already Z-encoded.  The first time this
--- function is applied to a particular 'FastString', the results are
--- memoized.
---
 zEncodeFS :: FastString -> FastZString
-zEncodeFS fs@(FastString _ _ _ ref) =
-      inlinePerformIO $ do
-        m <- readIORef ref
-        case m of
-          Just zfs -> return zfs
-          Nothing -> do
-            atomicModifyIORef' ref $ \m' -> case m' of
-              Nothing  -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
-                          in (Just zfs, zfs)
-              Just zfs -> (m', zfs)
+zEncodeFS fs = mkZFastString (zEncodeString (unpackFS fs))
 
 appendFS :: FastString -> FastString -> FastString
 appendFS fs1 fs2 = mkFastStringByteString
@@ -577,14 +549,13 @@ concatFS :: [FastString] -> FastString
 concatFS = mkFastStringByteString . BS.concat . map fs_bs
 
 headFS :: FastString -> Char
-headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
-headFS (FastString _ _ bs _) =
+headFS (FastString bs) =
   inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
          return (fst (utf8DecodeChar (castPtr ptr)))
 
 tailFS :: FastString -> FastString
-tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
-tailFS (FastString _ _ bs _) =
+--tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
+tailFS (FastString bs) =
     inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
     do let (_, n) = utf8DecodeChar (castPtr ptr)
        return $! mkFastStringByteString (BS.drop n bs)
@@ -592,8 +563,12 @@ tailFS (FastString _ _ bs _) =
 consFS :: Char -> FastString -> FastString
 consFS c fs = mkFastString (c : unpackFS fs)
 
+-- MP: I think this is safe because the bytestring will take up
+-- the start position + (length * words) space, so adding the offset will
+-- still be a memory position within the bytestring
 uniqueOfFS :: FastString -> Int
-uniqueOfFS (FastString u _ _ _) = u
+uniqueOfFS (FastString (BS.PS (ForeignPtr a _) o _)) =
+  (I# (addr2Int# a)) + o
 
 nilFS :: FastString
 nilFS = mkFastString ""
diff --git a/ghc/Main.hs b/ghc/Main.hs
index 614b45f277..3b32c2de99 100644
--- a/ghc/Main.hs
+++ b/ghc/Main.hs
@@ -819,7 +819,6 @@ dumpFastStringStats dflags = do
       bucketsPerSegment = map length segments
       entriesPerBucket = map length buckets
       entries = sum entriesPerBucket
-      hasZ = sum $ map (length . filter hasZEncoding) buckets
       msg = text "FastString stats:" $$ nest 4 (vcat
         [ text "segments:         " <+> int (length segments)
         , text "buckets:          " <+> int (sum bucketsPerSegment)
@@ -827,7 +826,6 @@ dumpFastStringStats dflags = do
         , text "largest segment:  " <+> int (maximum bucketsPerSegment)
         , text "smallest segment: " <+> int (minimum bucketsPerSegment)
         , text "longest bucket:   " <+> int (maximum entriesPerBucket)
-        , text "has z-encoding:   " <+> (hasZ `pcntOf` entries)
         ])
         -- we usually get more "has z-encoding" than "z-encoded", because
         -- when we z-encode a string it might hash to the exact same string,
@@ -835,8 +833,6 @@ dumpFastStringStats dflags = do
         -- Z-encoding is different from the original string are counted in
         -- the "z-encoded" total.
   putMsg dflags msg
-  where
-   x `pcntOf` y = int ((x * 100) `quot` y) Outputable.<> char '%'
 
 showPackages, dumpPackages, dumpPackagesSimple :: DynFlags -> IO ()
 showPackages       dflags = putStrLn (showSDoc dflags (pprPackages dflags))
author	Matthew Pickering <matthewtpickering@gmail.com>	2019-08-17 18:35:09 +0100
committer	Matthew Pickering <matthewtpickering@gmail.com>	2019-08-17 18:35:09 +0100
commit	5cff43d301b0773e2deeedb0a397da5e47cb89e1 (patch)
tree	074d42d6a9cd8e82703b298d7f55b1078416dcd6
parent	47e162374051ed3e874ed7916cc811df288cbd95 (diff)
download	haskell-wip/faststring-allocs.tar.gz