summaryrefslogtreecommitdiff
path: root/compiler/utils/FastString.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/utils/FastString.hs')
-rw-r--r--compiler/utils/FastString.hs107
1 files changed, 41 insertions, 66 deletions
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 0db61ec93f..0023b2ee53 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -1,7 +1,7 @@
-- (c) The University of Glasgow, 1997-2006
{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
- GeneralizedNewtypeDeriving #-}
+ GeneralizedNewtypeDeriving, RecordWildCards, BangPatterns #-}
{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
-- We always optimise this, otherwise performance of a non-optimised
-- compiler is severely affected
@@ -62,7 +62,6 @@ module FastString
zEncodeFS,
-- ** Operations
- uniqueOfFS,
lengthFS,
nullFS,
appendFS,
@@ -78,7 +77,7 @@ module FastString
-- ** Internal
getFastStringTable,
- hasZEncoding,
+ uniqueOfFS,
-- * PtrStrings
PtrString (..),
@@ -117,7 +116,6 @@ import GHC.Exts
import System.IO
import Data.Data
import Data.IORef
-import Data.Maybe ( isJust )
import Data.Char
import Data.Semigroup as Semi
@@ -130,6 +128,7 @@ import GHC.Conc.Sync (sharedCAF)
#endif
import GHC.Base ( unpackCString#, unpackNBytes# )
+import GHC.ForeignPtr
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
@@ -148,7 +147,7 @@ unsafeMkByteString :: String -> ByteString
unsafeMkByteString = BSC.pack
hashFastString :: FastString -> Int
-hashFastString (FastString _ _ bs _)
+hashFastString (FastString bs)
= inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
return $ hashStr (castPtr ptr) len
@@ -181,15 +180,18 @@ Z-encoding used by the compiler internally.
'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
-}
-data FastString = FastString {
- uniq :: {-# UNPACK #-} !Int, -- unique id
- n_chars :: {-# UNPACK #-} !Int, -- number of chars
- fs_bs :: {-# UNPACK #-} !ByteString,
- fs_ref :: {-# UNPACK #-} !(IORef (Maybe FastZString))
+newtype FastString = FastString {
+ fs_bs :: ByteString
}
+
+-- It is sufficient to test pointer equality as we guarantee that
+-- each string is uniquely allocated.
instance Eq FastString where
- f1 == f2 = uniq f1 == uniq f2
+ f1 == f2 =
+ case (fs_bs f1, fs_bs f2) of
+ ((BS.PS fp i _len), (BS.PS fp' i' _len1)) -> fp == fp' && i == i'
+ {-# NOINLINE (==) #-}
instance Ord FastString where
-- Compares lexicographically, not by unique
@@ -224,7 +226,7 @@ instance Data FastString where
dataTypeOf _ = mkNoRepType "FastString"
cmpFS :: FastString -> FastString -> Ordering
-cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
+cmpFS f1@(FastString u1) f2@(FastString u2) =
if u1 == u2 then EQ else
compare (bytesFS f1) (bytesFS f2)
@@ -396,7 +398,7 @@ The procedure goes like this:
* Otherwise, insert and return the string we created.
-}
-mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
+mkFastStringWith :: IO FastString -> Ptr Word8 -> Int -> IO FastString
mkFastStringWith mk_fs !ptr !len = do
FastStringTableSegment lock _ buckets# <- readIORef segmentRef
let idx# = hashToIndex# buckets# hash#
@@ -408,16 +410,14 @@ mkFastStringWith mk_fs !ptr !len = do
-- The withMVar below is not dupable. It can lead to deadlock if it is
-- only run partially and putMVar is not called after takeMVar.
noDuplicate
- n <- get_uid
- new_fs <- mk_fs n
+ !new_fs <- mk_fs
withMVar lock $ \_ -> insert new_fs
where
- !(FastStringTable uid segments#) = stringTable
- get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
+ !(FastStringTable _uid segments#) = stringTable
!(I# hash#) = hashStr ptr len
(# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
- insert fs = do
+ insert !fs = do
FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
let idx# = hashToIndex# buckets# hash#
bucket <- IO $ readArray# buckets# idx#
@@ -435,7 +435,7 @@ mkFastStringWith mk_fs !ptr !len = do
bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
bucket_match [] _ _ = return Nothing
-bucket_match (v@(FastString _ _ bs _):ls) len ptr
+bucket_match (v@(FastString bs):ls) len ptr
| len == BS.length bs = do
b <- BS.unsafeUseAsCString bs $ \buf ->
cmpStringPrefix ptr (castPtr buf) len
@@ -456,7 +456,7 @@ mkFastStringBytes !ptr !len =
-- the bytes if the string is new to the table.
mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkFastStringForeignPtr ptr !fp len
- = mkFastStringWith (mkNewFastString fp ptr len) ptr len
+ = mkFastStringWith (mkNewFastString fp len) ptr len
-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
-- between this and 'mkFastStringBytes' is that we don't have to copy
@@ -466,7 +466,7 @@ mkFastStringByteString bs =
inlinePerformIO $
BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
let ptr' = castPtr ptr
- mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
+ mkFastStringWith (mkNewFastStringByteString bs) ptr' len
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
@@ -486,26 +486,20 @@ mkFastStringByteList str = mkFastStringByteString (BS.pack str)
mkZFastString :: String -> FastZString
mkZFastString = mkFastZStringString
-mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
+mkNewFastString :: ForeignPtr Word8 -> Int
-> IO FastString
-mkNewFastString fp ptr len uid = do
- ref <- newIORef Nothing
- n_chars <- countUTF8Chars ptr len
- return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
+mkNewFastString fp len = do
+ return (FastString (BS.fromForeignPtr fp 0 len))
-mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
+mkNewFastStringByteString :: ByteString
-> IO FastString
-mkNewFastStringByteString bs ptr len uid = do
- ref <- newIORef Nothing
- n_chars <- countUTF8Chars ptr len
- return (FastString uid n_chars bs ref)
+mkNewFastStringByteString bs = do
+ return (FastString bs)
-copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
-copyNewFastString ptr len uid = do
+copyNewFastString :: Ptr Word8 -> Int -> IO FastString
+copyNewFastString ptr len = do
fp <- copyBytesToForeignPtr ptr len
- ref <- newIORef Nothing
- n_chars <- countUTF8Chars ptr len
- return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
+ return (FastString (BS.fromForeignPtr fp 0 len))
copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
copyBytesToForeignPtr ptr len = do
@@ -534,15 +528,7 @@ hashStr (Ptr a#) (I# len#) = loop 0# 0#
-- | Returns the length of the 'FastString' in characters
lengthFS :: FastString -> Int
-lengthFS f = n_chars f
-
--- | Returns @True@ if this 'FastString' is not Z-encoded but already has
--- a Z-encoding cached (used in producing stats).
-hasZEncoding :: FastString -> Bool
-hasZEncoding (FastString _ _ _ ref) =
- inlinePerformIO $ do
- m <- readIORef ref
- return (isJust m)
+lengthFS (FastString fs) = BS.length fs
-- | Returns @True@ if the 'FastString' is empty
nullFS :: FastString -> Bool
@@ -550,24 +536,10 @@ nullFS f = BS.null (fs_bs f)
-- | Unpacks and decodes the FastString
unpackFS :: FastString -> String
-unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
+unpackFS (FastString bs) = utf8DecodeByteString bs
--- | Returns a Z-encoded version of a 'FastString'. This might be the
--- original, if it was already Z-encoded. The first time this
--- function is applied to a particular 'FastString', the results are
--- memoized.
---
zEncodeFS :: FastString -> FastZString
-zEncodeFS fs@(FastString _ _ _ ref) =
- inlinePerformIO $ do
- m <- readIORef ref
- case m of
- Just zfs -> return zfs
- Nothing -> do
- atomicModifyIORef' ref $ \m' -> case m' of
- Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
- in (Just zfs, zfs)
- Just zfs -> (m', zfs)
+zEncodeFS fs = mkZFastString (zEncodeString (unpackFS fs))
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = mkFastStringByteString
@@ -577,14 +549,13 @@ concatFS :: [FastString] -> FastString
concatFS = mkFastStringByteString . BS.concat . map fs_bs
headFS :: FastString -> Char
-headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
-headFS (FastString _ _ bs _) =
+headFS (FastString bs) =
inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
return (fst (utf8DecodeChar (castPtr ptr)))
tailFS :: FastString -> FastString
-tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
-tailFS (FastString _ _ bs _) =
+--tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
+tailFS (FastString bs) =
inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
do let (_, n) = utf8DecodeChar (castPtr ptr)
return $! mkFastStringByteString (BS.drop n bs)
@@ -592,8 +563,12 @@ tailFS (FastString _ _ bs _) =
consFS :: Char -> FastString -> FastString
consFS c fs = mkFastString (c : unpackFS fs)
+-- MP: I think this is safe because the bytestring will take up
+-- the start position + (length * words) space, so adding the offset will
+-- still be a memory position within the bytestring
uniqueOfFS :: FastString -> Int
-uniqueOfFS (FastString u _ _ _) = u
+uniqueOfFS (FastString (BS.PS (ForeignPtr a _) o _)) =
+ (I# (addr2Int# a)) + o
nilFS :: FastString
nilFS = mkFastString ""