diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/utils/FastString.hs | 13 | ||||
| -rw-r--r-- | compiler/utils/StringBuffer.hs | 42 | 
2 files changed, 51 insertions, 4 deletions
| diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs index 8f76584875..8d1bbb5c67 100644 --- a/compiler/utils/FastString.hs +++ b/compiler/utils/FastString.hs @@ -23,6 +23,10 @@  --   * Outputing them is fast.  --   * Generated by 'sLit'.  --   * Turn into 'Outputable.SDoc' with 'Outputable.ptext' +--   * Requires manual memory management. +--     Improper use may lead to memory leaks or dangling pointers. +--   * It assumes Latin-1 as the encoding, therefore it cannot represent +--     arbitrary Unicode strings.  --  -- Use 'LitString' unless you want the facilities of 'FastString'.  module FastString @@ -560,14 +564,19 @@ hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs  -- -----------------------------------------------------------------------------  -- LitStrings, here for convenience only. +-- | A 'LitString' is a pointer to some null-terminated array of bytes.  type LitString = Ptr Word8  --Why do we recalculate length every time it's requested?  --If it's commonly needed, we should perhaps have  --data LitString = LitString {-#UNPACK#-}!Addr# {-#UNPACK#-}!Int# +-- | Wrap an unboxed address into a 'LitString'.  mkLitString# :: Addr# -> LitString  mkLitString# a# = Ptr a# +-- | Encode a 'String' into a newly allocated 'LitString' using Latin-1 +-- encoding.  The original string must not contain non-Latin-1 characters +-- (above codepoint @0xff@).  {-# INLINE mkLitString #-}  mkLitString :: String -> LitString  mkLitString s = @@ -583,9 +592,13 @@ mkLitString s =     return p   ) +-- | Decode a 'LitString' back into a 'String' using Latin-1 encoding. +-- This does not free the memory associated with 'LitString'.  unpackLitString :: LitString -> String  unpackLitString (Ptr p) = unpackCString# p +-- | Compute the length of a 'LitString', which must necessarily be +-- null-terminated.  lengthLS :: LitString -> Int  lengthLS = ptrStrLength diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs index 7da9f6c22d..bac752ac28 100644 --- a/compiler/utils/StringBuffer.hs +++ b/compiler/utils/StringBuffer.hs @@ -89,6 +89,8 @@ instance Show StringBuffer where  -- -----------------------------------------------------------------------------  -- Creation / Destruction +-- | Read a file into a 'StringBuffer'.  The resulting buffer is automatically +-- managed by the garbage collector.  hGetStringBuffer :: FilePath -> IO StringBuffer  hGetStringBuffer fname = do     h <- openBinaryFile fname ReadMode @@ -161,6 +163,8 @@ appendStringBuffers sb1 sb2            calcLen sb = len sb - cur sb            size =  sb1_len + sb2_len +-- | Encode a 'String' into a 'StringBuffer' as UTF-8.  The resulting buffer +-- is automatically managed by the garbage collector.  stringToStringBuffer :: String -> StringBuffer  stringToStringBuffer str =   unsafePerformIO $ do @@ -175,10 +179,15 @@ stringToStringBuffer str =  -- -----------------------------------------------------------------------------  -- Grab a character --- Getting our fingers dirty a little here, but this is performance-critical +-- | Return the first UTF-8 character of a nonempty 'StringBuffer' and as well +-- the remaining portion (analogous to 'Data.List.uncons').  __Warning:__ The +-- behavior is undefined if the 'StringBuffer' is empty.  The result shares +-- the same buffer as the original.  Similar to 'utf8DecodeChar', if the +-- character cannot be decoded as UTF-8, '\0' is returned.  {-# INLINE nextChar #-}  nextChar :: StringBuffer -> (Char,StringBuffer)  nextChar (StringBuffer buf len (I# cur#)) = +  -- Getting our fingers dirty a little here, but this is performance-critical    inlinePerformIO $ do      withForeignPtr buf $ \(Ptr a#) -> do          case utf8DecodeChar# (a# `plusAddr#` cur#) of @@ -186,6 +195,10 @@ nextChar (StringBuffer buf len (I# cur#)) =               let cur' = I# (cur# +# nBytes#) in               return (C# c#, StringBuffer buf len cur') +-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous +-- to 'Data.List.head').  __Warning:__ The behavior is undefined if the +-- 'StringBuffer' is empty.  Similar to 'utf8DecodeChar', if the character +-- cannot be decoded as UTF-8, '\0' is returned.  currentChar :: StringBuffer -> Char  currentChar = fst . nextChar @@ -200,29 +213,50 @@ prevChar (StringBuffer buf _   cur) _     =  -- -----------------------------------------------------------------------------  -- Moving +-- | Return a 'StringBuffer' with the first UTF-8 character removed (analogous +-- to 'Data.List.tail').  __Warning:__ The behavior is undefined if the +-- 'StringBuffer' is empty.  The result shares the same buffer as the +-- original.  stepOn :: StringBuffer -> StringBuffer  stepOn s = snd (nextChar s) -offsetBytes :: Int -> StringBuffer -> StringBuffer +-- | Return a 'StringBuffer' with the first @n@ bytes removed.  __Warning:__ +-- If there aren't enough characters, the returned 'StringBuffer' will be +-- invalid and any use of it may lead to undefined behavior.  The result +-- shares the same buffer as the original. +offsetBytes :: Int                      -- ^ @n@, the number of bytes +            -> StringBuffer +            -> StringBuffer  offsetBytes i s = s { cur = cur s + i } +-- | Compute the difference in offset between two 'StringBuffer's that share +-- the same buffer.  __Warning:__ The behavior is undefined if the +-- 'StringBuffer's use separate buffers.  byteDiff :: StringBuffer -> StringBuffer -> Int  byteDiff s1 s2 = cur s2 - cur s1 +-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null').  atEnd :: StringBuffer -> Bool  atEnd (StringBuffer _ l c) = l == c  -- -----------------------------------------------------------------------------  -- Conversion -lexemeToString :: StringBuffer -> Int {-bytes-} -> String +-- | Decode the first @n@ bytes of a 'StringBuffer' as UTF-8 into a 'String'. +-- Similar to 'utf8DecodeChar', if the character cannot be decoded as UTF-8, +-- they will be replaced with '\0'. +lexemeToString :: StringBuffer +               -> Int                   -- ^ @n@, the number of bytes +               -> String  lexemeToString _ 0 = ""  lexemeToString (StringBuffer buf _ cur) bytes =    inlinePerformIO $      withForeignPtr buf $ \ptr ->        utf8DecodeString (ptr `plusPtr` cur) bytes -lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString +lexemeToFastString :: StringBuffer +                   -> Int               -- ^ @n@, the number of bytes +                   -> FastString  lexemeToFastString _ 0 = nilFS  lexemeToFastString (StringBuffer buf _ cur) len =     inlinePerformIO $ | 
