summaryrefslogtreecommitdiff
path: root/compiler/utils/StringBuffer.hs
diff options
context:
space:
mode:
authorPhil Ruffwind <rf@rufflewind.com>2016-12-17 18:09:06 -0500
committerBen Gamari <ben@smart-cactus.org>2016-12-17 18:09:41 -0500
commit21dde8126d615a082648c916a3e20d9878f22517 (patch)
tree5ed1653b1d26c1fda64851f33ea800d511f65a66 /compiler/utils/StringBuffer.hs
parent90cfa84981ee1f9fb756a3af1bd707674c18c034 (diff)
downloadhaskell-21dde8126d615a082648c916a3e20d9878f22517.tar.gz
Improve StringBuffer and FastString docs
This area of code contains a lot of unsafe functionality, so it might be worth documenting to reduce the risk of misuse. Test Plan: inspection Reviewers: austin, bgamari Reviewed By: bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2872
Diffstat (limited to 'compiler/utils/StringBuffer.hs')
-rw-r--r--compiler/utils/StringBuffer.hs42
1 files changed, 38 insertions, 4 deletions
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 7da9f6c22d..bac752ac28 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -89,6 +89,8 @@ instance Show StringBuffer where
-- -----------------------------------------------------------------------------
-- Creation / Destruction
+-- | Read a file into a 'StringBuffer'. The resulting buffer is automatically
+-- managed by the garbage collector.
hGetStringBuffer :: FilePath -> IO StringBuffer
hGetStringBuffer fname = do
h <- openBinaryFile fname ReadMode
@@ -161,6 +163,8 @@ appendStringBuffers sb1 sb2
calcLen sb = len sb - cur sb
size = sb1_len + sb2_len
+-- | Encode a 'String' into a 'StringBuffer' as UTF-8. The resulting buffer
+-- is automatically managed by the garbage collector.
stringToStringBuffer :: String -> StringBuffer
stringToStringBuffer str =
unsafePerformIO $ do
@@ -175,10 +179,15 @@ stringToStringBuffer str =
-- -----------------------------------------------------------------------------
-- Grab a character
--- Getting our fingers dirty a little here, but this is performance-critical
+-- | Return the first UTF-8 character of a nonempty 'StringBuffer' and as well
+-- the remaining portion (analogous to 'Data.List.uncons'). __Warning:__ The
+-- behavior is undefined if the 'StringBuffer' is empty. The result shares
+-- the same buffer as the original. Similar to 'utf8DecodeChar', if the
+-- character cannot be decoded as UTF-8, '\0' is returned.
{-# INLINE nextChar #-}
nextChar :: StringBuffer -> (Char,StringBuffer)
nextChar (StringBuffer buf len (I# cur#)) =
+ -- Getting our fingers dirty a little here, but this is performance-critical
inlinePerformIO $ do
withForeignPtr buf $ \(Ptr a#) -> do
case utf8DecodeChar# (a# `plusAddr#` cur#) of
@@ -186,6 +195,10 @@ nextChar (StringBuffer buf len (I# cur#)) =
let cur' = I# (cur# +# nBytes#) in
return (C# c#, StringBuffer buf len cur')
+-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
+-- to 'Data.List.head'). __Warning:__ The behavior is undefined if the
+-- 'StringBuffer' is empty. Similar to 'utf8DecodeChar', if the character
+-- cannot be decoded as UTF-8, '\0' is returned.
currentChar :: StringBuffer -> Char
currentChar = fst . nextChar
@@ -200,29 +213,50 @@ prevChar (StringBuffer buf _ cur) _ =
-- -----------------------------------------------------------------------------
-- Moving
+-- | Return a 'StringBuffer' with the first UTF-8 character removed (analogous
+-- to 'Data.List.tail'). __Warning:__ The behavior is undefined if the
+-- 'StringBuffer' is empty. The result shares the same buffer as the
+-- original.
stepOn :: StringBuffer -> StringBuffer
stepOn s = snd (nextChar s)
-offsetBytes :: Int -> StringBuffer -> StringBuffer
+-- | Return a 'StringBuffer' with the first @n@ bytes removed. __Warning:__
+-- If there aren't enough characters, the returned 'StringBuffer' will be
+-- invalid and any use of it may lead to undefined behavior. The result
+-- shares the same buffer as the original.
+offsetBytes :: Int -- ^ @n@, the number of bytes
+ -> StringBuffer
+ -> StringBuffer
offsetBytes i s = s { cur = cur s + i }
+-- | Compute the difference in offset between two 'StringBuffer's that share
+-- the same buffer. __Warning:__ The behavior is undefined if the
+-- 'StringBuffer's use separate buffers.
byteDiff :: StringBuffer -> StringBuffer -> Int
byteDiff s1 s2 = cur s2 - cur s1
+-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null').
atEnd :: StringBuffer -> Bool
atEnd (StringBuffer _ l c) = l == c
-- -----------------------------------------------------------------------------
-- Conversion
-lexemeToString :: StringBuffer -> Int {-bytes-} -> String
+-- | Decode the first @n@ bytes of a 'StringBuffer' as UTF-8 into a 'String'.
+-- Similar to 'utf8DecodeChar', if the character cannot be decoded as UTF-8,
+-- they will be replaced with '\0'.
+lexemeToString :: StringBuffer
+ -> Int -- ^ @n@, the number of bytes
+ -> String
lexemeToString _ 0 = ""
lexemeToString (StringBuffer buf _ cur) bytes =
inlinePerformIO $
withForeignPtr buf $ \ptr ->
utf8DecodeString (ptr `plusPtr` cur) bytes
-lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
+lexemeToFastString :: StringBuffer
+ -> Int -- ^ @n@, the number of bytes
+ -> FastString
lexemeToFastString _ 0 = nilFS
lexemeToFastString (StringBuffer buf _ cur) len =
inlinePerformIO $