diff options
author | Ben Gamari <ben@smart-cactus.org> | 2020-10-23 14:16:58 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2020-11-29 18:26:15 -0500 |
commit | b3027eb0a1c1b3cedbb54b0fc8d0924283a7cd25 (patch) | |
tree | 428241e11a6a5d15592964d049344b85d8160cf4 /libraries | |
parent | f40b662b9ea555bab6e9729f4165eaca7021d322 (diff) | |
download | haskell-wip/stringbuffer.tar.gz |
StringBuffer: Rid it of ForeignPtrswip/stringbuffer
Bumps haddock submodule.
Diffstat (limited to 'libraries')
-rw-r--r-- | libraries/ghc-boot/GHC/Utils/Encoding.hs | 38 |
1 files changed, 28 insertions, 10 deletions
diff --git a/libraries/ghc-boot/GHC/Utils/Encoding.hs b/libraries/ghc-boot/GHC/Utils/Encoding.hs index 0f84be189b..38dfc69fbb 100644 --- a/libraries/ghc-boot/GHC/Utils/Encoding.hs +++ b/libraries/ghc-boot/GHC/Utils/Encoding.hs @@ -17,12 +17,15 @@ module GHC.Utils.Encoding ( -- * UTF-8 utf8DecodeCharAddr#, + utf8DecodeCharByteArray#, utf8PrevChar, utf8CharStart, utf8DecodeChar, utf8DecodeByteString, + utf8DecodeByteArray, utf8DecodeShortByteString, utf8CompareShortByteString, + utf8DecodeByteArrayLazy#, utf8DecodeStringLazy, utf8EncodeChar, utf8EncodeString, @@ -53,6 +56,7 @@ import Data.ByteString (ByteString) import qualified Data.ByteString.Internal as BS import Data.ByteString.Short.Internal (ShortByteString(..)) +import GHC.Word import GHC.Exts -- ----------------------------------------------------------------------------- @@ -131,15 +135,20 @@ utf8DecodeChar !(Ptr a#) = -- the start of the current character is, given any position in a -- stream. This function finds the start of the previous character, -- assuming there *is* a previous character. -utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8) -utf8PrevChar p = utf8CharStart (p `plusPtr` (-1)) +utf8PrevChar :: ByteArray# -> Int -> Int +utf8PrevChar arr ofs = utf8CharStart arr (ofs - 1) -utf8CharStart :: Ptr Word8 -> IO (Ptr Word8) -utf8CharStart p = go p - where go p = do w <- peek p - if w >= 0x80 && w < 0xC0 - then go (p `plusPtr` (-1)) - else return p +utf8CharStart :: ByteArray# -> Int -> Int +utf8CharStart = go + where + go arr ofs@(I# ofs#) + | True + , ofs < 0 || ofs > I# (sizeofByteArray# arr) + = error "utf8CharStart: overflow" + | w >= 0x80 && w < 0xC0 = go arr (ofs - 1) + | otherwise = ofs + where + w = W8# (indexWord8Array# arr ofs#) {-# INLINE utf8DecodeLazy# #-} utf8DecodeLazy# :: (IO ()) -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char] @@ -158,6 +167,12 @@ utf8DecodeByteString :: ByteString -> [Char] utf8DecodeByteString (BS.PS fptr offset len) = utf8DecodeStringLazy fptr offset len +utf8DecodeByteArrayLazy# :: ByteArray# -> Int# -> Int# -> [Char] +utf8DecodeByteArrayLazy# a# offset# len# + = unsafeDupablePerformIO $ + let decodeChar i = utf8DecodeCharByteArray# a# (i +# offset#) + in utf8DecodeLazy# (return ()) decodeChar len# + utf8DecodeStringLazy :: ForeignPtr Word8 -> Int -> Int -> [Char] utf8DecodeStringLazy fp offset (I# len#) = unsafeDupablePerformIO $ do @@ -200,12 +215,15 @@ utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0# | isTrue# (b1_1 `ltWord#` b2_1) -> LT | otherwise -> go (off1 +# 1#) (off2 +# 1#) -utf8DecodeShortByteString :: ShortByteString -> [Char] -utf8DecodeShortByteString (SBS ba#) +utf8DecodeByteArray :: ByteArray# -> [Char] +utf8DecodeByteArray ba# = unsafeDupablePerformIO $ let len# = sizeofByteArray# ba# in utf8DecodeLazy# (return ()) (utf8DecodeCharByteArray# ba#) len# +utf8DecodeShortByteString :: ShortByteString -> [Char] +utf8DecodeShortByteString (SBS ba#) = utf8DecodeByteArray ba# + countUTF8Chars :: ShortByteString -> IO Int countUTF8Chars (SBS ba) = go 0# 0# where |