summaryrefslogtreecommitdiff
path: root/compiler/GHC/Data/FastString.hs
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2020-04-20 16:54:38 +0200
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-04-26 13:55:14 -0400
commitaf332442123878c1b61d236dce46418efcbe8750 (patch)
treeec4b332843cdd4fedb4aa60b11b7b8dba82a0764 /compiler/GHC/Data/FastString.hs
parentb0fbfc7582fb81314dc28a056536737fb5eeaa6e (diff)
downloadhaskell-af332442123878c1b61d236dce46418efcbe8750.tar.gz
Modules: Utils and Data (#13009)
Update Haddock submodule Metric Increase: haddock.compiler
Diffstat (limited to 'compiler/GHC/Data/FastString.hs')
-rw-r--r--compiler/GHC/Data/FastString.hs693
1 files changed, 693 insertions, 0 deletions
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
new file mode 100644
index 0000000000..82f38601f5
--- /dev/null
+++ b/compiler/GHC/Data/FastString.hs
@@ -0,0 +1,693 @@
+-- (c) The University of Glasgow, 1997-2006
+
+{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
+ GeneralizedNewtypeDeriving #-}
+{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
+-- We always optimise this, otherwise performance of a non-optimised
+-- compiler is severely affected
+
+-- |
+-- There are two principal string types used internally by GHC:
+--
+-- ['FastString']
+--
+-- * A compact, hash-consed, representation of character strings.
+-- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
+-- * Generated by 'fsLit'.
+-- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
+--
+-- ['PtrString']
+--
+-- * Pointer and size of a Latin-1 encoded string.
+-- * Practically no operations.
+-- * Outputting them is fast.
+-- * Generated by 'sLit'.
+-- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
+-- * Requires manual memory management.
+-- Improper use may lead to memory leaks or dangling pointers.
+-- * It assumes Latin-1 as the encoding, therefore it cannot represent
+-- arbitrary Unicode strings.
+--
+-- Use 'PtrString' unless you want the facilities of 'FastString'.
+module GHC.Data.FastString
+ (
+ -- * ByteString
+ bytesFS, -- :: FastString -> ByteString
+ fastStringToByteString, -- = bytesFS (kept for haddock)
+ mkFastStringByteString,
+ fastZStringToByteString,
+ unsafeMkByteString,
+
+ -- * FastZString
+ FastZString,
+ hPutFZS,
+ zString,
+ lengthFZS,
+
+ -- * FastStrings
+ FastString(..), -- not abstract, for now.
+
+ -- ** Construction
+ fsLit,
+ mkFastString,
+ mkFastStringBytes,
+ mkFastStringByteList,
+ mkFastStringForeignPtr,
+ mkFastString#,
+
+ -- ** Deconstruction
+ unpackFS, -- :: FastString -> String
+
+ -- ** Encoding
+ zEncodeFS,
+
+ -- ** Operations
+ uniqueOfFS,
+ lengthFS,
+ nullFS,
+ appendFS,
+ headFS,
+ tailFS,
+ concatFS,
+ consFS,
+ nilFS,
+ isUnderscoreFS,
+
+ -- ** Outputting
+ hPutFS,
+
+ -- ** Internal
+ getFastStringTable,
+ getFastStringZEncCounter,
+
+ -- * PtrStrings
+ PtrString (..),
+
+ -- ** Construction
+ sLit,
+ mkPtrString#,
+ mkPtrString,
+
+ -- ** Deconstruction
+ unpackPtrString,
+
+ -- ** Operations
+ lengthPS
+ ) where
+
+#include "HsVersions.h"
+
+import GHC.Prelude as Prelude
+
+import GHC.Utils.Encoding
+import GHC.Utils.IO.Unsafe
+import GHC.Utils.Panic.Plain
+import GHC.Utils.Misc
+
+import Control.Concurrent.MVar
+import Control.DeepSeq
+import Control.Monad
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as BSC
+import qualified Data.ByteString.Internal as BS
+import qualified Data.ByteString.Unsafe as BS
+import Foreign.C
+import GHC.Exts
+import System.IO
+import Data.Data
+import Data.IORef
+import Data.Char
+import Data.Semigroup as Semi
+
+import GHC.IO
+
+import Foreign
+
+#if GHC_STAGE >= 2
+import GHC.Conc.Sync (sharedCAF)
+#endif
+
+import GHC.Base ( unpackCString#, unpackNBytes# )
+
+
+-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
+bytesFS :: FastString -> ByteString
+bytesFS f = fs_bs f
+
+{-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
+fastStringToByteString :: FastString -> ByteString
+fastStringToByteString = bytesFS
+
+fastZStringToByteString :: FastZString -> ByteString
+fastZStringToByteString (FastZString bs) = bs
+
+-- This will drop information if any character > '\xFF'
+unsafeMkByteString :: String -> ByteString
+unsafeMkByteString = BSC.pack
+
+hashFastString :: FastString -> Int
+hashFastString (FastString _ _ bs _)
+ = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
+ return $ hashStr (castPtr ptr) len
+
+-- -----------------------------------------------------------------------------
+
+newtype FastZString = FastZString ByteString
+ deriving NFData
+
+hPutFZS :: Handle -> FastZString -> IO ()
+hPutFZS handle (FastZString bs) = BS.hPut handle bs
+
+zString :: FastZString -> String
+zString (FastZString bs) =
+ inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
+
+lengthFZS :: FastZString -> Int
+lengthFZS (FastZString bs) = BS.length bs
+
+mkFastZStringString :: String -> FastZString
+mkFastZStringString str = FastZString (BSC.pack str)
+
+-- -----------------------------------------------------------------------------
+
+{-| A 'FastString' is a UTF-8 encoded string together with a unique ID. All
+'FastString's are stored in a global hashtable to support fast O(1)
+comparison.
+
+It is also associated with a lazy reference to the Z-encoding
+of this string which is used by the compiler internally.
+-}
+data FastString = FastString {
+ uniq :: {-# UNPACK #-} !Int, -- unique id
+ n_chars :: {-# UNPACK #-} !Int, -- number of chars
+ fs_bs :: {-# UNPACK #-} !ByteString,
+ fs_zenc :: FastZString
+ -- ^ Lazily computed z-encoding of this string.
+ --
+ -- Since 'FastString's are globally memoized this is computed at most
+ -- once for any given string.
+ }
+
+instance Eq FastString where
+ f1 == f2 = uniq f1 == uniq f2
+
+instance Ord FastString where
+ -- Compares lexicographically, not by unique
+ a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
+ a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
+ a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
+ a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
+ max x y | x >= y = x
+ | otherwise = y
+ min x y | x <= y = x
+ | otherwise = y
+ compare a b = cmpFS a b
+
+instance IsString FastString where
+ fromString = fsLit
+
+instance Semi.Semigroup FastString where
+ (<>) = appendFS
+
+instance Monoid FastString where
+ mempty = nilFS
+ mappend = (Semi.<>)
+ mconcat = concatFS
+
+instance Show FastString where
+ show fs = show (unpackFS fs)
+
+instance Data FastString where
+ -- don't traverse?
+ toConstr _ = abstractConstr "FastString"
+ gunfold _ _ = error "gunfold"
+ dataTypeOf _ = mkNoRepType "FastString"
+
+instance NFData FastString where
+ rnf fs = seq fs ()
+
+cmpFS :: FastString -> FastString -> Ordering
+cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
+ if u1 == u2 then EQ else
+ compare (bytesFS f1) (bytesFS f2)
+
+foreign import ccall unsafe "memcmp"
+ memcmp :: Ptr a -> Ptr b -> Int -> IO Int
+
+-- -----------------------------------------------------------------------------
+-- Construction
+
+{-
+Internally, the compiler will maintain a fast string symbol table, providing
+sharing and fast comparison. Creation of new @FastString@s then covertly does a
+lookup, re-using the @FastString@ if there was a hit.
+
+The design of the FastString hash table allows for lockless concurrent reads
+and updates to multiple buckets with low synchronization overhead.
+
+See Note [Updating the FastString table] on how it's updated.
+-}
+data FastStringTable = FastStringTable
+ {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
+ {-# UNPACK #-} !(IORef Int) -- number of computed z-encodings for all buckets
+ (Array# (IORef FastStringTableSegment)) -- concurrent segments
+
+data FastStringTableSegment = FastStringTableSegment
+ {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
+ {-# UNPACK #-} !(IORef Int) -- the number of elements
+ (MutableArray# RealWorld [FastString]) -- buckets in this segment
+
+{-
+Following parameters are determined based on:
+
+* Benchmark based on testsuite/tests/utils/should_run/T14854.hs
+* Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
+ on 2018-10-24, we have 13920 entries.
+-}
+segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
+segmentBits = 8
+numSegments = 256 -- bit segmentBits
+segmentMask = 0xff -- bit segmentBits - 1
+initialNumBuckets = 64
+
+hashToSegment# :: Int# -> Int#
+hashToSegment# hash# = hash# `andI#` segmentMask#
+ where
+ !(I# segmentMask#) = segmentMask
+
+hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
+hashToIndex# buckets# hash# =
+ (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
+ where
+ !(I# segmentBits#) = segmentBits
+ size# = sizeofMutableArray# buckets#
+
+maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
+maybeResizeSegment segmentRef = do
+ segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
+ let oldSize# = sizeofMutableArray# old#
+ newSize# = oldSize# *# 2#
+ (I# n#) <- readIORef counter
+ if isTrue# (n# <# newSize#) -- maximum load of 1
+ then return segment
+ else do
+ resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
+ case newArray# newSize# [] s1# of
+ (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
+ forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
+ fsList <- IO $ readArray# old# i#
+ forM_ fsList $ \fs -> do
+ let -- Shall we store in hash value in FastString instead?
+ !(I# hash#) = hashFastString fs
+ idx# = hashToIndex# new# hash#
+ IO $ \s1# ->
+ case readArray# new# idx# s1# of
+ (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
+ s3# -> (# s3#, () #)
+ writeIORef segmentRef resizedSegment
+ return resizedSegment
+
+{-# NOINLINE stringTable #-}
+stringTable :: FastStringTable
+stringTable = unsafePerformIO $ do
+ let !(I# numSegments#) = numSegments
+ !(I# initialNumBuckets#) = initialNumBuckets
+ loop a# i# s1#
+ | isTrue# (i# ==# numSegments#) = s1#
+ | otherwise = case newMVar () `unIO` s1# of
+ (# s2#, lock #) -> case newIORef 0 `unIO` s2# of
+ (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
+ (# s4#, buckets# #) -> case newIORef
+ (FastStringTableSegment lock counter buckets#) `unIO` s4# of
+ (# s5#, segment #) -> case writeArray# a# i# segment s5# of
+ s6# -> loop a# (i# +# 1#) s6#
+ uid <- newIORef 603979776 -- ord '$' * 0x01000000
+ n_zencs <- newIORef 0
+ tab <- IO $ \s1# ->
+ case newArray# numSegments# (panic "string_table") s1# of
+ (# s2#, arr# #) -> case loop arr# 0# s2# of
+ s3# -> case unsafeFreezeArray# arr# s3# of
+ (# s4#, segments# #) ->
+ (# s4#, FastStringTable uid n_zencs segments# #)
+
+ -- use the support wired into the RTS to share this CAF among all images of
+ -- libHSghc
+#if GHC_STAGE < 2
+ return tab
+#else
+ sharedCAF tab getOrSetLibHSghcFastStringTable
+
+-- from the RTS; thus we cannot use this mechanism when GHC_STAGE<2; the previous
+-- RTS might not have this symbol
+foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
+ getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
+#endif
+
+{-
+
+We include the FastString table in the `sharedCAF` mechanism because we'd like
+FastStrings created by a Core plugin to have the same uniques as corresponding
+strings created by the host compiler itself. For example, this allows plugins
+to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
+even re-invoke the parser.
+
+In particular, the following little sanity test was failing in a plugin
+prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
+be looked up /by the plugin/.
+
+ let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
+ putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
+
+`mkTcOcc` involves the lookup (or creation) of a FastString. Since the
+plugin's FastString.string_table is empty, constructing the RdrName also
+allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These
+uniques are almost certainly unequal to the ones that the host compiler
+originally assigned to those FastStrings. Thus the lookup fails since the
+domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
+unique.
+
+Maintaining synchronization of the two instances of this global is rather
+difficult because of the uses of `unsafePerformIO` in this module. Not
+synchronizing them risks breaking the rather major invariant that two
+FastStrings with the same unique have the same string. Thus we use the
+lower-level `sharedCAF` mechanism that relies on Globals.c.
+
+-}
+
+mkFastString# :: Addr# -> FastString
+mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
+ where ptr = Ptr a#
+
+{- Note [Updating the FastString table]
+
+We use a concurrent hashtable which contains multiple segments, each hash value
+always maps to the same segment. Read is lock-free, write to the a segment
+should acquire a lock for that segment to avoid race condition, writes to
+different segments are independent.
+
+The procedure goes like this:
+
+1. Find out which segment to operate on based on the hash value
+2. Read the relevant bucket and perform a look up of the string.
+3. If it exists, return it.
+4. Otherwise grab a unique ID, create a new FastString and atomically attempt
+ to update the relevant segment with this FastString:
+
+ * Resize the segment by doubling the number of buckets when the number of
+ FastStrings in this segment grows beyond the threshold.
+ * Double check that the string is not in the bucket. Another thread may have
+ inserted it while we were creating our string.
+ * Return the existing FastString if it exists. The one we preemptively
+ created will get GCed.
+ * Otherwise, insert and return the string we created.
+-}
+
+mkFastStringWith
+ :: (Int -> IORef Int-> IO FastString) -> Ptr Word8 -> Int -> IO FastString
+mkFastStringWith mk_fs !ptr !len = do
+ FastStringTableSegment lock _ buckets# <- readIORef segmentRef
+ let idx# = hashToIndex# buckets# hash#
+ bucket <- IO $ readArray# buckets# idx#
+ res <- bucket_match bucket len ptr
+ case res of
+ Just found -> return found
+ Nothing -> do
+ -- The withMVar below is not dupable. It can lead to deadlock if it is
+ -- only run partially and putMVar is not called after takeMVar.
+ noDuplicate
+ n <- get_uid
+ new_fs <- mk_fs n n_zencs
+ withMVar lock $ \_ -> insert new_fs
+ where
+ !(FastStringTable uid n_zencs segments#) = stringTable
+ get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
+
+ !(I# hash#) = hashStr ptr len
+ (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
+ insert fs = do
+ FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
+ let idx# = hashToIndex# buckets# hash#
+ bucket <- IO $ readArray# buckets# idx#
+ res <- bucket_match bucket len ptr
+ case res of
+ -- The FastString was added by another thread after previous read and
+ -- before we acquired the write lock.
+ Just found -> return found
+ Nothing -> do
+ IO $ \s1# ->
+ case writeArray# buckets# idx# (fs: bucket) s1# of
+ s2# -> (# s2#, () #)
+ modifyIORef' counter succ
+ return fs
+
+bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
+bucket_match [] _ _ = return Nothing
+bucket_match (v@(FastString _ _ bs _):ls) len ptr
+ | len == BS.length bs = do
+ b <- BS.unsafeUseAsCString bs $ \buf ->
+ cmpStringPrefix ptr (castPtr buf) len
+ if b then return (Just v)
+ else bucket_match ls len ptr
+ | otherwise =
+ bucket_match ls len ptr
+
+mkFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkFastStringBytes !ptr !len =
+ -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
+ -- idempotent.
+ unsafeDupablePerformIO $
+ mkFastStringWith (copyNewFastString ptr len) ptr len
+
+-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
+-- between this and 'mkFastStringBytes' is that we don't have to copy
+-- the bytes if the string is new to the table.
+mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkFastStringForeignPtr ptr !fp len
+ = mkFastStringWith (mkNewFastString fp ptr len) ptr len
+
+-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
+-- between this and 'mkFastStringBytes' is that we don't have to copy
+-- the bytes if the string is new to the table.
+mkFastStringByteString :: ByteString -> FastString
+mkFastStringByteString bs =
+ inlinePerformIO $
+ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
+ let ptr' = castPtr ptr
+ mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
+
+-- | Creates a UTF-8 encoded 'FastString' from a 'String'
+mkFastString :: String -> FastString
+mkFastString str =
+ inlinePerformIO $ do
+ let l = utf8EncodedLength str
+ buf <- mallocForeignPtrBytes l
+ withForeignPtr buf $ \ptr -> do
+ utf8EncodeString ptr str
+ mkFastStringForeignPtr ptr buf l
+
+-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
+mkFastStringByteList :: [Word8] -> FastString
+mkFastStringByteList str = mkFastStringByteString (BS.pack str)
+
+-- | Creates a (lazy) Z-encoded 'FastString' from a 'String' and account
+-- the number of forced z-strings into the passed 'IORef'.
+mkZFastString :: IORef Int -> ByteString -> FastZString
+mkZFastString n_zencs bs = unsafePerformIO $ do
+ atomicModifyIORef' n_zencs $ \n -> (n+1, ())
+ return $ mkFastZStringString (zEncodeString (utf8DecodeByteString bs))
+
+mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
+ -> IORef Int -> IO FastString
+mkNewFastString fp ptr len uid n_zencs = do
+ let bs = BS.fromForeignPtr fp 0 len
+ zstr = mkZFastString n_zencs bs
+ n_chars <- countUTF8Chars ptr len
+ return (FastString uid n_chars bs zstr)
+
+mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
+ -> IORef Int -> IO FastString
+mkNewFastStringByteString bs ptr len uid n_zencs = do
+ let zstr = mkZFastString n_zencs bs
+ n_chars <- countUTF8Chars ptr len
+ return (FastString uid n_chars bs zstr)
+
+copyNewFastString :: Ptr Word8 -> Int -> Int -> IORef Int -> IO FastString
+copyNewFastString ptr len uid n_zencs = do
+ fp <- copyBytesToForeignPtr ptr len
+ let bs = BS.fromForeignPtr fp 0 len
+ zstr = mkZFastString n_zencs bs
+ n_chars <- countUTF8Chars ptr len
+ return (FastString uid n_chars bs zstr)
+
+copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
+copyBytesToForeignPtr ptr len = do
+ fp <- mallocForeignPtrBytes len
+ withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
+ return fp
+
+cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
+cmpStringPrefix ptr1 ptr2 len =
+ do r <- memcmp ptr1 ptr2 len
+ return (r == 0)
+
+hashStr :: Ptr Word8 -> Int -> Int
+ -- use the Addr to produce a hash value between 0 & m (inclusive)
+hashStr (Ptr a#) (I# len#) = loop 0# 0#
+ where
+ loop h n =
+ if isTrue# (n ==# len#) then
+ I# h
+ else
+ let
+ -- DO NOT move this let binding! indexCharOffAddr# reads from the
+ -- pointer so we need to evaluate this based on the length check
+ -- above. Not doing this right caused #17909.
+ !c = ord# (indexCharOffAddr# a# n)
+ !h2 = (h *# 16777619#) `xorI#` c
+ in
+ loop h2 (n +# 1#)
+
+-- -----------------------------------------------------------------------------
+-- Operations
+
+-- | Returns the length of the 'FastString' in characters
+lengthFS :: FastString -> Int
+lengthFS f = n_chars f
+
+-- | Returns @True@ if the 'FastString' is empty
+nullFS :: FastString -> Bool
+nullFS f = BS.null (fs_bs f)
+
+-- | Unpacks and decodes the FastString
+unpackFS :: FastString -> String
+unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
+
+-- | Returns a Z-encoded version of a 'FastString'. This might be the
+-- original, if it was already Z-encoded. The first time this
+-- function is applied to a particular 'FastString', the results are
+-- memoized.
+--
+zEncodeFS :: FastString -> FastZString
+zEncodeFS (FastString _ _ _ ref) = ref
+
+appendFS :: FastString -> FastString -> FastString
+appendFS fs1 fs2 = mkFastStringByteString
+ $ BS.append (bytesFS fs1) (bytesFS fs2)
+
+concatFS :: [FastString] -> FastString
+concatFS = mkFastStringByteString . BS.concat . map fs_bs
+
+headFS :: FastString -> Char
+headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
+headFS (FastString _ _ bs _) =
+ inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
+ return (fst (utf8DecodeChar (castPtr ptr)))
+
+tailFS :: FastString -> FastString
+tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
+tailFS (FastString _ _ bs _) =
+ inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
+ do let (_, n) = utf8DecodeChar (castPtr ptr)
+ return $! mkFastStringByteString (BS.drop n bs)
+
+consFS :: Char -> FastString -> FastString
+consFS c fs = mkFastString (c : unpackFS fs)
+
+uniqueOfFS :: FastString -> Int
+uniqueOfFS (FastString u _ _ _) = u
+
+nilFS :: FastString
+nilFS = mkFastString ""
+
+isUnderscoreFS :: FastString -> Bool
+isUnderscoreFS fs = fs == fsLit "_"
+
+-- -----------------------------------------------------------------------------
+-- Stats
+
+getFastStringTable :: IO [[[FastString]]]
+getFastStringTable =
+ forM [0 .. numSegments - 1] $ \(I# i#) -> do
+ let (# segmentRef #) = indexArray# segments# i#
+ FastStringTableSegment _ _ buckets# <- readIORef segmentRef
+ let bucketSize = I# (sizeofMutableArray# buckets#)
+ forM [0 .. bucketSize - 1] $ \(I# j#) ->
+ IO $ readArray# buckets# j#
+ where
+ !(FastStringTable _ _ segments#) = stringTable
+
+getFastStringZEncCounter :: IO Int
+getFastStringZEncCounter = readIORef n_zencs
+ where
+ !(FastStringTable _ n_zencs _) = stringTable
+
+-- -----------------------------------------------------------------------------
+-- Outputting 'FastString's
+
+-- |Outputs a 'FastString' with /no decoding at all/, that is, you
+-- get the actual bytes in the 'FastString' written to the 'Handle'.
+hPutFS :: Handle -> FastString -> IO ()
+hPutFS handle fs = BS.hPut handle $ bytesFS fs
+
+-- ToDo: we'll probably want an hPutFSLocal, or something, to output
+-- in the current locale's encoding (for error messages and suchlike).
+
+-- -----------------------------------------------------------------------------
+-- PtrStrings, here for convenience only.
+
+-- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
+data PtrString = PtrString !(Ptr Word8) !Int
+
+-- | Wrap an unboxed address into a 'PtrString'.
+mkPtrString# :: Addr# -> PtrString
+mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
+
+-- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
+-- encoding. The original string must not contain non-Latin-1 characters
+-- (above codepoint @0xff@).
+{-# INLINE mkPtrString #-}
+mkPtrString :: String -> PtrString
+mkPtrString s =
+ -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
+ -- and because someone might be using `eqAddr#` to check for string equality.
+ unsafePerformIO (do
+ let len = length s
+ p <- mallocBytes len
+ let
+ loop :: Int -> String -> IO ()
+ loop !_ [] = return ()
+ loop n (c:cs) = do
+ pokeByteOff p n (fromIntegral (ord c) :: Word8)
+ loop (1+n) cs
+ loop 0 s
+ return (PtrString p len)
+ )
+
+-- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
+-- This does not free the memory associated with 'PtrString'.
+unpackPtrString :: PtrString -> String
+unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
+
+-- | Return the length of a 'PtrString'
+lengthPS :: PtrString -> Int
+lengthPS (PtrString _ n) = n
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+foreign import ccall unsafe "strlen"
+ ptrStrLength :: Ptr Word8 -> Int
+
+{-# NOINLINE sLit #-}
+sLit :: String -> PtrString
+sLit x = mkPtrString x
+
+{-# NOINLINE fsLit #-}
+fsLit :: String -> FastString
+fsLit x = mkFastString x
+
+{-# RULES "slit"
+ forall x . sLit (unpackCString# x) = mkPtrString# x #-}
+{-# RULES "fslit"
+ forall x . fsLit (unpackCString# x) = mkFastString# x #-}