Modules: Utils and Data (#13009)

Update Haddock submodule Metric Increase: haddock.compiler
author: Sylvain Henry <sylvain@haskus.fr> 2020-04-20 16:54:38 +0200
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2020-04-26 13:55:14 -0400
commit: af332442123878c1b61d236dce46418efcbe8750 (patch)
tree: ec4b332843cdd4fedb4aa60b11b7b8dba82a0764 /compiler/GHC/Data/FastString.hs
parent: b0fbfc7582fb81314dc28a056536737fb5eeaa6e (diff)
download: haskell-af332442123878c1b61d236dce46418efcbe8750.tar.gz
1 files changed, 693 insertions, 0 deletions
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
new file mode 100644
index 0000000000..82f38601f5
--- /dev/null
+++ b/compiler/GHC/Data/FastString.hs
@@ -0,0 +1,693 @@
+-- (c) The University of Glasgow, 1997-2006
+
+{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
+    GeneralizedNewtypeDeriving #-}
+{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
+-- We always optimise this, otherwise performance of a non-optimised
+-- compiler is severely affected
+
+-- |
+-- There are two principal string types used internally by GHC:
+--
+-- ['FastString']
+--
+--   * A compact, hash-consed, representation of character strings.
+--   * Comparison is O(1), and you can get a 'Unique.Unique' from them.
+--   * Generated by 'fsLit'.
+--   * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
+--
+-- ['PtrString']
+--
+--   * Pointer and size of a Latin-1 encoded string.
+--   * Practically no operations.
+--   * Outputting them is fast.
+--   * Generated by 'sLit'.
+--   * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
+--   * Requires manual memory management.
+--     Improper use may lead to memory leaks or dangling pointers.
+--   * It assumes Latin-1 as the encoding, therefore it cannot represent
+--     arbitrary Unicode strings.
+--
+-- Use 'PtrString' unless you want the facilities of 'FastString'.
+module GHC.Data.FastString
+       (
+        -- * ByteString
+        bytesFS,            -- :: FastString -> ByteString
+        fastStringToByteString, -- = bytesFS (kept for haddock)
+        mkFastStringByteString,
+        fastZStringToByteString,
+        unsafeMkByteString,
+
+        -- * FastZString
+        FastZString,
+        hPutFZS,
+        zString,
+        lengthFZS,
+
+        -- * FastStrings
+        FastString(..),     -- not abstract, for now.
+
+        -- ** Construction
+        fsLit,
+        mkFastString,
+        mkFastStringBytes,
+        mkFastStringByteList,
+        mkFastStringForeignPtr,
+        mkFastString#,
+
+        -- ** Deconstruction
+        unpackFS,           -- :: FastString -> String
+
+        -- ** Encoding
+        zEncodeFS,
+
+        -- ** Operations
+        uniqueOfFS,
+        lengthFS,
+        nullFS,
+        appendFS,
+        headFS,
+        tailFS,
+        concatFS,
+        consFS,
+        nilFS,
+        isUnderscoreFS,
+
+        -- ** Outputting
+        hPutFS,
+
+        -- ** Internal
+        getFastStringTable,
+        getFastStringZEncCounter,
+
+        -- * PtrStrings
+        PtrString (..),
+
+        -- ** Construction
+        sLit,
+        mkPtrString#,
+        mkPtrString,
+
+        -- ** Deconstruction
+        unpackPtrString,
+
+        -- ** Operations
+        lengthPS
+       ) where
+
+#include "HsVersions.h"
+
+import GHC.Prelude as Prelude
+
+import GHC.Utils.Encoding
+import GHC.Utils.IO.Unsafe
+import GHC.Utils.Panic.Plain
+import GHC.Utils.Misc
+
+import Control.Concurrent.MVar
+import Control.DeepSeq
+import Control.Monad
+import Data.ByteString (ByteString)
+import qualified Data.ByteString          as BS
+import qualified Data.ByteString.Char8    as BSC
+import qualified Data.ByteString.Internal as BS
+import qualified Data.ByteString.Unsafe   as BS
+import Foreign.C
+import GHC.Exts
+import System.IO
+import Data.Data
+import Data.IORef
+import Data.Char
+import Data.Semigroup as Semi
+
+import GHC.IO
+
+import Foreign
+
+#if GHC_STAGE >= 2
+import GHC.Conc.Sync    (sharedCAF)
+#endif
+
+import GHC.Base         ( unpackCString#, unpackNBytes# )
+
+
+-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
+bytesFS :: FastString -> ByteString
+bytesFS f = fs_bs f
+
+{-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
+fastStringToByteString :: FastString -> ByteString
+fastStringToByteString = bytesFS
+
+fastZStringToByteString :: FastZString -> ByteString
+fastZStringToByteString (FastZString bs) = bs
+
+-- This will drop information if any character > '\xFF'
+unsafeMkByteString :: String -> ByteString
+unsafeMkByteString = BSC.pack
+
+hashFastString :: FastString -> Int
+hashFastString (FastString _ _ bs _)
+    = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
+      return $ hashStr (castPtr ptr) len
+
+-- -----------------------------------------------------------------------------
+
+newtype FastZString = FastZString ByteString
+  deriving NFData
+
+hPutFZS :: Handle -> FastZString -> IO ()
+hPutFZS handle (FastZString bs) = BS.hPut handle bs
+
+zString :: FastZString -> String
+zString (FastZString bs) =
+    inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
+
+lengthFZS :: FastZString -> Int
+lengthFZS (FastZString bs) = BS.length bs
+
+mkFastZStringString :: String -> FastZString
+mkFastZStringString str = FastZString (BSC.pack str)
+
+-- -----------------------------------------------------------------------------
+
+{-| A 'FastString' is a UTF-8 encoded string together with a unique ID. All
+'FastString's are stored in a global hashtable to support fast O(1)
+comparison.
+
+It is also associated with a lazy reference to the Z-encoding
+of this string which is used by the compiler internally.
+-}
+data FastString = FastString {
+      uniq    :: {-# UNPACK #-} !Int, -- unique id
+      n_chars :: {-# UNPACK #-} !Int, -- number of chars
+      fs_bs   :: {-# UNPACK #-} !ByteString,
+      fs_zenc :: FastZString
+      -- ^ Lazily computed z-encoding of this string.
+      --
+      -- Since 'FastString's are globally memoized this is computed at most
+      -- once for any given string.
+  }
+
+instance Eq FastString where
+  f1 == f2  =  uniq f1 == uniq f2
+
+instance Ord FastString where
+    -- Compares lexicographically, not by unique
+    a <= b = case cmpFS a b of { LT -> True;  EQ -> True;  GT -> False }
+    a <  b = case cmpFS a b of { LT -> True;  EQ -> False; GT -> False }
+    a >= b = case cmpFS a b of { LT -> False; EQ -> True;  GT -> True  }
+    a >  b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True  }
+    max x y | x >= y    =  x
+            | otherwise =  y
+    min x y | x <= y    =  x
+            | otherwise =  y
+    compare a b = cmpFS a b
+
+instance IsString FastString where
+    fromString = fsLit
+
+instance Semi.Semigroup FastString where
+    (<>) = appendFS
+
+instance Monoid FastString where
+    mempty = nilFS
+    mappend = (Semi.<>)
+    mconcat = concatFS
+
+instance Show FastString where
+   show fs = show (unpackFS fs)
+
+instance Data FastString where
+  -- don't traverse?
+  toConstr _   = abstractConstr "FastString"
+  gunfold _ _  = error "gunfold"
+  dataTypeOf _ = mkNoRepType "FastString"
+
+instance NFData FastString where
+  rnf fs = seq fs ()
+
+cmpFS :: FastString -> FastString -> Ordering
+cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
+  if u1 == u2 then EQ else
+  compare (bytesFS f1) (bytesFS f2)
+
+foreign import ccall unsafe "memcmp"
+  memcmp :: Ptr a -> Ptr b -> Int -> IO Int
+
+-- -----------------------------------------------------------------------------
+-- Construction
+
+{-
+Internally, the compiler will maintain a fast string symbol table, providing
+sharing and fast comparison. Creation of new @FastString@s then covertly does a
+lookup, re-using the @FastString@ if there was a hit.
+
+The design of the FastString hash table allows for lockless concurrent reads
+and updates to multiple buckets with low synchronization overhead.
+
+See Note [Updating the FastString table] on how it's updated.
+-}
+data FastStringTable = FastStringTable
+  {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
+  {-# UNPACK #-} !(IORef Int) -- number of computed z-encodings for all buckets
+  (Array# (IORef FastStringTableSegment)) -- concurrent segments
+
+data FastStringTableSegment = FastStringTableSegment
+  {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
+  {-# UNPACK #-} !(IORef Int) -- the number of elements
+  (MutableArray# RealWorld [FastString]) -- buckets in this segment
+
+{-
+Following parameters are determined based on:
+
+* Benchmark based on testsuite/tests/utils/should_run/T14854.hs
+* Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
+  on 2018-10-24, we have 13920 entries.
+-}
+segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
+segmentBits = 8
+numSegments = 256   -- bit segmentBits
+segmentMask = 0xff  -- bit segmentBits - 1
+initialNumBuckets = 64
+
+hashToSegment# :: Int# -> Int#
+hashToSegment# hash# = hash# `andI#` segmentMask#
+  where
+    !(I# segmentMask#) = segmentMask
+
+hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
+hashToIndex# buckets# hash# =
+  (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
+  where
+    !(I# segmentBits#) = segmentBits
+    size# = sizeofMutableArray# buckets#
+
+maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
+maybeResizeSegment segmentRef = do
+  segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
+  let oldSize# = sizeofMutableArray# old#
+      newSize# = oldSize# *# 2#
+  (I# n#) <- readIORef counter
+  if isTrue# (n# <# newSize#) -- maximum load of 1
+  then return segment
+  else do
+    resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
+      case newArray# newSize# [] s1# of
+        (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
+    forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
+      fsList <- IO $ readArray# old# i#
+      forM_ fsList $ \fs -> do
+        let -- Shall we store in hash value in FastString instead?
+            !(I# hash#) = hashFastString fs
+            idx# = hashToIndex# new# hash#
+        IO $ \s1# ->
+          case readArray# new# idx# s1# of
+            (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
+              s3# -> (# s3#, () #)
+    writeIORef segmentRef resizedSegment
+    return resizedSegment
+
+{-# NOINLINE stringTable #-}
+stringTable :: FastStringTable
+stringTable = unsafePerformIO $ do
+  let !(I# numSegments#) = numSegments
+      !(I# initialNumBuckets#) = initialNumBuckets
+      loop a# i# s1#
+        | isTrue# (i# ==# numSegments#) = s1#
+        | otherwise = case newMVar () `unIO` s1# of
+            (# s2#, lock #) -> case newIORef 0 `unIO` s2# of
+              (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
+                (# s4#, buckets# #) -> case newIORef
+                    (FastStringTableSegment lock counter buckets#) `unIO` s4# of
+                  (# s5#, segment #) -> case writeArray# a# i# segment s5# of
+                    s6# -> loop a# (i# +# 1#) s6#
+  uid <- newIORef 603979776 -- ord '$' * 0x01000000
+  n_zencs <- newIORef 0
+  tab <- IO $ \s1# ->
+    case newArray# numSegments# (panic "string_table") s1# of
+      (# s2#, arr# #) -> case loop arr# 0# s2# of
+        s3# -> case unsafeFreezeArray# arr# s3# of
+          (# s4#, segments# #) ->
+            (# s4#, FastStringTable uid n_zencs segments# #)
+
+  -- use the support wired into the RTS to share this CAF among all images of
+  -- libHSghc
+#if GHC_STAGE < 2
+  return tab
+#else
+  sharedCAF tab getOrSetLibHSghcFastStringTable
+
+-- from the RTS; thus we cannot use this mechanism when GHC_STAGE<2; the previous
+-- RTS might not have this symbol
+foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
+  getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
+#endif
+
+{-
+
+We include the FastString table in the `sharedCAF` mechanism because we'd like
+FastStrings created by a Core plugin to have the same uniques as corresponding
+strings created by the host compiler itself.  For example, this allows plugins
+to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
+even re-invoke the parser.
+
+In particular, the following little sanity test was failing in a plugin
+prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
+be looked up /by the plugin/.
+
+   let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
+   putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
+
+`mkTcOcc` involves the lookup (or creation) of a FastString.  Since the
+plugin's FastString.string_table is empty, constructing the RdrName also
+allocates new uniques for the FastStrings "GHC.NT.Type" and "NT".  These
+uniques are almost certainly unequal to the ones that the host compiler
+originally assigned to those FastStrings.  Thus the lookup fails since the
+domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
+unique.
+
+Maintaining synchronization of the two instances of this global is rather
+difficult because of the uses of `unsafePerformIO` in this module.  Not
+synchronizing them risks breaking the rather major invariant that two
+FastStrings with the same unique have the same string. Thus we use the
+lower-level `sharedCAF` mechanism that relies on Globals.c.
+
+-}
+
+mkFastString# :: Addr# -> FastString
+mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
+  where ptr = Ptr a#
+
+{- Note [Updating the FastString table]
+
+We use a concurrent hashtable which contains multiple segments, each hash value
+always maps to the same segment. Read is lock-free, write to the a segment
+should acquire a lock for that segment to avoid race condition, writes to
+different segments are independent.
+
+The procedure goes like this:
+
+1. Find out which segment to operate on based on the hash value
+2. Read the relevant bucket and perform a look up of the string.
+3. If it exists, return it.
+4. Otherwise grab a unique ID, create a new FastString and atomically attempt
+   to update the relevant segment with this FastString:
+
+   * Resize the segment by doubling the number of buckets when the number of
+     FastStrings in this segment grows beyond the threshold.
+   * Double check that the string is not in the bucket. Another thread may have
+     inserted it while we were creating our string.
+   * Return the existing FastString if it exists. The one we preemptively
+     created will get GCed.
+   * Otherwise, insert and return the string we created.
+-}
+
+mkFastStringWith
+    :: (Int -> IORef Int-> IO FastString) -> Ptr Word8 -> Int -> IO FastString
+mkFastStringWith mk_fs !ptr !len = do
+  FastStringTableSegment lock _ buckets# <- readIORef segmentRef
+  let idx# = hashToIndex# buckets# hash#
+  bucket <- IO $ readArray# buckets# idx#
+  res <- bucket_match bucket len ptr
+  case res of
+    Just found -> return found
+    Nothing -> do
+      -- The withMVar below is not dupable. It can lead to deadlock if it is
+      -- only run partially and putMVar is not called after takeMVar.
+      noDuplicate
+      n <- get_uid
+      new_fs <- mk_fs n n_zencs
+      withMVar lock $ \_ -> insert new_fs
+  where
+    !(FastStringTable uid n_zencs segments#) = stringTable
+    get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
+
+    !(I# hash#) = hashStr ptr len
+    (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
+    insert fs = do
+      FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
+      let idx# = hashToIndex# buckets# hash#
+      bucket <- IO $ readArray# buckets# idx#
+      res <- bucket_match bucket len ptr
+      case res of
+        -- The FastString was added by another thread after previous read and
+        -- before we acquired the write lock.
+        Just found -> return found
+        Nothing -> do
+          IO $ \s1# ->
+            case writeArray# buckets# idx# (fs: bucket) s1# of
+              s2# -> (# s2#, () #)
+          modifyIORef' counter succ
+          return fs
+
+bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
+bucket_match [] _ _ = return Nothing
+bucket_match (v@(FastString _ _ bs _):ls) len ptr
+      | len == BS.length bs = do
+         b <- BS.unsafeUseAsCString bs $ \buf ->
+             cmpStringPrefix ptr (castPtr buf) len
+         if b then return (Just v)
+              else bucket_match ls len ptr
+      | otherwise =
+         bucket_match ls len ptr
+
+mkFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkFastStringBytes !ptr !len =
+    -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
+    -- idempotent.
+    unsafeDupablePerformIO $
+        mkFastStringWith (copyNewFastString ptr len) ptr len
+
+-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
+-- between this and 'mkFastStringBytes' is that we don't have to copy
+-- the bytes if the string is new to the table.
+mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkFastStringForeignPtr ptr !fp len
+    = mkFastStringWith (mkNewFastString fp ptr len) ptr len
+
+-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
+-- between this and 'mkFastStringBytes' is that we don't have to copy
+-- the bytes if the string is new to the table.
+mkFastStringByteString :: ByteString -> FastString
+mkFastStringByteString bs =
+    inlinePerformIO $
+      BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
+        let ptr' = castPtr ptr
+        mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
+
+-- | Creates a UTF-8 encoded 'FastString' from a 'String'
+mkFastString :: String -> FastString
+mkFastString str =
+  inlinePerformIO $ do
+    let l = utf8EncodedLength str
+    buf <- mallocForeignPtrBytes l
+    withForeignPtr buf $ \ptr -> do
+      utf8EncodeString ptr str
+      mkFastStringForeignPtr ptr buf l
+
+-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
+mkFastStringByteList :: [Word8] -> FastString
+mkFastStringByteList str = mkFastStringByteString (BS.pack str)
+
+-- | Creates a (lazy) Z-encoded 'FastString' from a 'String' and account
+-- the number of forced z-strings into the passed 'IORef'.
+mkZFastString :: IORef Int -> ByteString -> FastZString
+mkZFastString n_zencs bs = unsafePerformIO $ do
+  atomicModifyIORef' n_zencs $ \n -> (n+1, ())
+  return $ mkFastZStringString (zEncodeString (utf8DecodeByteString bs))
+
+mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
+                -> IORef Int -> IO FastString
+mkNewFastString fp ptr len uid n_zencs = do
+  let bs = BS.fromForeignPtr fp 0 len
+      zstr = mkZFastString n_zencs bs
+  n_chars <- countUTF8Chars ptr len
+  return (FastString uid n_chars bs zstr)
+
+mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
+                          -> IORef Int -> IO FastString
+mkNewFastStringByteString bs ptr len uid n_zencs = do
+  let zstr = mkZFastString n_zencs bs
+  n_chars <- countUTF8Chars ptr len
+  return (FastString uid n_chars bs zstr)
+
+copyNewFastString :: Ptr Word8 -> Int -> Int -> IORef Int -> IO FastString
+copyNewFastString ptr len uid n_zencs = do
+  fp <- copyBytesToForeignPtr ptr len
+  let bs = BS.fromForeignPtr fp 0 len
+      zstr = mkZFastString n_zencs bs
+  n_chars <- countUTF8Chars ptr len
+  return (FastString uid n_chars bs zstr)
+
+copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
+copyBytesToForeignPtr ptr len = do
+  fp <- mallocForeignPtrBytes len
+  withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
+  return fp
+
+cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
+cmpStringPrefix ptr1 ptr2 len =
+ do r <- memcmp ptr1 ptr2 len
+    return (r == 0)
+
+hashStr  :: Ptr Word8 -> Int -> Int
+ -- use the Addr to produce a hash value between 0 & m (inclusive)
+hashStr (Ptr a#) (I# len#) = loop 0# 0#
+  where
+    loop h n =
+      if isTrue# (n ==# len#) then
+        I# h
+      else
+        let
+          -- DO NOT move this let binding! indexCharOffAddr# reads from the
+          -- pointer so we need to evaluate this based on the length check
+          -- above. Not doing this right caused #17909.
+          !c = ord# (indexCharOffAddr# a# n)
+          !h2 = (h *# 16777619#) `xorI#` c
+        in
+          loop h2 (n +# 1#)
+
+-- -----------------------------------------------------------------------------
+-- Operations
+
+-- | Returns the length of the 'FastString' in characters
+lengthFS :: FastString -> Int
+lengthFS f = n_chars f
+
+-- | Returns @True@ if the 'FastString' is empty
+nullFS :: FastString -> Bool
+nullFS f = BS.null (fs_bs f)
+
+-- | Unpacks and decodes the FastString
+unpackFS :: FastString -> String
+unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
+
+-- | Returns a Z-encoded version of a 'FastString'.  This might be the
+-- original, if it was already Z-encoded.  The first time this
+-- function is applied to a particular 'FastString', the results are
+-- memoized.
+--
+zEncodeFS :: FastString -> FastZString
+zEncodeFS (FastString _ _ _ ref) = ref
+
+appendFS :: FastString -> FastString -> FastString
+appendFS fs1 fs2 = mkFastStringByteString
+                 $ BS.append (bytesFS fs1) (bytesFS fs2)
+
+concatFS :: [FastString] -> FastString
+concatFS = mkFastStringByteString . BS.concat . map fs_bs
+
+headFS :: FastString -> Char
+headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
+headFS (FastString _ _ bs _) =
+  inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
+         return (fst (utf8DecodeChar (castPtr ptr)))
+
+tailFS :: FastString -> FastString
+tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
+tailFS (FastString _ _ bs _) =
+    inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
+    do let (_, n) = utf8DecodeChar (castPtr ptr)
+       return $! mkFastStringByteString (BS.drop n bs)
+
+consFS :: Char -> FastString -> FastString
+consFS c fs = mkFastString (c : unpackFS fs)
+
+uniqueOfFS :: FastString -> Int
+uniqueOfFS (FastString u _ _ _) = u
+
+nilFS :: FastString
+nilFS = mkFastString ""
+
+isUnderscoreFS :: FastString -> Bool
+isUnderscoreFS fs = fs == fsLit "_"
+
+-- -----------------------------------------------------------------------------
+-- Stats
+
+getFastStringTable :: IO [[[FastString]]]
+getFastStringTable =
+  forM [0 .. numSegments - 1] $ \(I# i#) -> do
+    let (# segmentRef #) = indexArray# segments# i#
+    FastStringTableSegment _ _ buckets# <- readIORef segmentRef
+    let bucketSize = I# (sizeofMutableArray# buckets#)
+    forM [0 .. bucketSize - 1] $ \(I# j#) ->
+      IO $ readArray# buckets# j#
+  where
+    !(FastStringTable _ _ segments#) = stringTable
+
+getFastStringZEncCounter :: IO Int
+getFastStringZEncCounter = readIORef n_zencs
+  where
+    !(FastStringTable _ n_zencs _) = stringTable
+
+-- -----------------------------------------------------------------------------
+-- Outputting 'FastString's
+
+-- |Outputs a 'FastString' with /no decoding at all/, that is, you
+-- get the actual bytes in the 'FastString' written to the 'Handle'.
+hPutFS :: Handle -> FastString -> IO ()
+hPutFS handle fs = BS.hPut handle $ bytesFS fs
+
+-- ToDo: we'll probably want an hPutFSLocal, or something, to output
+-- in the current locale's encoding (for error messages and suchlike).
+
+-- -----------------------------------------------------------------------------
+-- PtrStrings, here for convenience only.
+
+-- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
+data PtrString = PtrString !(Ptr Word8) !Int
+
+-- | Wrap an unboxed address into a 'PtrString'.
+mkPtrString# :: Addr# -> PtrString
+mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
+
+-- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
+-- encoding.  The original string must not contain non-Latin-1 characters
+-- (above codepoint @0xff@).
+{-# INLINE mkPtrString #-}
+mkPtrString :: String -> PtrString
+mkPtrString s =
+ -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
+ -- and because someone might be using `eqAddr#` to check for string equality.
+ unsafePerformIO (do
+   let len = length s
+   p <- mallocBytes len
+   let
+     loop :: Int -> String -> IO ()
+     loop !_ []    = return ()
+     loop n (c:cs) = do
+        pokeByteOff p n (fromIntegral (ord c) :: Word8)
+        loop (1+n) cs
+   loop 0 s
+   return (PtrString p len)
+ )
+
+-- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
+-- This does not free the memory associated with 'PtrString'.
+unpackPtrString :: PtrString -> String
+unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
+
+-- | Return the length of a 'PtrString'
+lengthPS :: PtrString -> Int
+lengthPS (PtrString _ n) = n
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+foreign import ccall unsafe "strlen"
+  ptrStrLength :: Ptr Word8 -> Int
+
+{-# NOINLINE sLit #-}
+sLit :: String -> PtrString
+sLit x  = mkPtrString x
+
+{-# NOINLINE fsLit #-}
+fsLit :: String -> FastString
+fsLit x = mkFastString x
+
+{-# RULES "slit"
+    forall x . sLit  (unpackCString# x) = mkPtrString#  x #-}
+{-# RULES "fslit"
+    forall x . fsLit (unpackCString# x) = mkFastString# x #-}
author	Sylvain Henry <sylvain@haskus.fr>	2020-04-20 16:54:38 +0200
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2020-04-26 13:55:14 -0400
commit	af332442123878c1b61d236dce46418efcbe8750 (patch)
tree	ec4b332843cdd4fedb4aa60b11b7b8dba82a0764 /compiler/GHC/Data/FastString.hs
parent	b0fbfc7582fb81314dc28a056536737fb5eeaa6e (diff)
download	haskell-af332442123878c1b61d236dce46418efcbe8750.tar.gz