summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2021-09-29 12:40:31 +0200
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-10-02 05:21:07 -0400
commit8b3d98ff376355317c64763cf619b1c41281b0d9 (patch)
treee461bdaa81a7d1037abc239887876704bb30c30b
parent4bdafb48b40afd1eb185c1312302e5759f796472 (diff)
downloadhaskell-8b3d98ff376355317c64763cf619b1c41281b0d9.tar.gz
Don't use FastString for UTF-8 encoding only
-rw-r--r--compiler/GHC/CmmToAsm/Dwarf/Types.hs2
-rw-r--r--compiler/GHC/Data/FastString.hs12
-rw-r--r--compiler/GHC/Types/Literal.hs4
3 files changed, 14 insertions, 4 deletions
diff --git a/compiler/GHC/CmmToAsm/Dwarf/Types.hs b/compiler/GHC/CmmToAsm/Dwarf/Types.hs
index 9b9015fab9..f8f0ae5c44 100644
--- a/compiler/GHC/CmmToAsm/Dwarf/Types.hs
+++ b/compiler/GHC/CmmToAsm/Dwarf/Types.hs
@@ -600,7 +600,7 @@ pprString str
= pprString' $ hcat $ map escapeChar $
if str `lengthIs` utf8EncodedLength str
then str
- else map (chr . fromIntegral) $ BS.unpack $ bytesFS $ mkFastString str
+ else map (chr . fromIntegral) $ BS.unpack $ utf8EncodeString str
-- | Escape a single non-unicode character
escapeChar :: Char -> SDoc
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index 9ed0a38df3..262ddd3ada 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -155,10 +155,11 @@ import GHC.IO
-- | Gives the Modified UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS, fastStringToByteString :: FastString -> ByteString
-bytesFS = fastStringToByteString
+{-# INLINE[1] bytesFS #-}
+bytesFS f = SBS.fromShort $ fs_sbs f
{-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
-fastStringToByteString f = SBS.fromShort $ fs_sbs f
+fastStringToByteString = bytesFS
fastStringToShortByteString :: FastString -> ShortByteString
fastStringToShortByteString = fs_sbs
@@ -529,11 +530,18 @@ mkFastStringShortByteString sbs =
-- | Creates a UTF-8 encoded 'FastString' from a 'String'
mkFastString :: String -> FastString
+{-# NOINLINE[1] mkFastString #-}
mkFastString str =
inlinePerformIO $ do
sbs <- utf8EncodeShortByteString str
mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
+-- The following rule is used to avoid polluting the non-reclaimable FastString
+-- table with transient strings when we only want their encoding.
+{-# RULES
+"bytesFS/mkFastString" forall x. bytesFS (mkFastString x) = utf8EncodeString x
+#-}
+
-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
mkFastStringByteList :: [Word8] -> FastString
mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)
diff --git a/compiler/GHC/Types/Literal.hs b/compiler/GHC/Types/Literal.hs
index 4b6775cc58..7572b5a660 100644
--- a/compiler/GHC/Types/Literal.hs
+++ b/compiler/GHC/Types/Literal.hs
@@ -75,6 +75,7 @@ import GHC.Utils.Binary
import GHC.Settings.Constants
import GHC.Platform
import GHC.Utils.Panic
+import GHC.Utils.Encoding
import Data.ByteString (ByteString)
import Data.Int
@@ -576,7 +577,8 @@ mkLitChar = LitChar
-- e.g. some of the \"error\" functions in GHC.Err such as @GHC.Err.runtimeError@
mkLitString :: String -> Literal
-- stored UTF-8 encoded
-mkLitString s = LitString (bytesFS $ mkFastString s)
+mkLitString [] = LitString mempty
+mkLitString s = LitString (utf8EncodeString s)
mkLitBigNat :: Integer -> Literal
mkLitBigNat x = assertPpr (x >= 0) (integer x)