diff options
Diffstat (limited to 'compiler/utils/StringBuffer.lhs')
-rw-r--r-- | compiler/utils/StringBuffer.lhs | 259 |
1 files changed, 0 insertions, 259 deletions
diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs deleted file mode 100644 index 9e6e6c1824..0000000000 --- a/compiler/utils/StringBuffer.lhs +++ /dev/null @@ -1,259 +0,0 @@ -% -% (c) The University of Glasgow 2006 -% (c) The University of Glasgow, 1997-2006 -% - -Buffers for scanning string input stored in external arrays. - -\begin{code} -{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-} -{-# OPTIONS_GHC -O -funbox-strict-fields #-} --- We always optimise this, otherwise performance of a non-optimised --- compiler is severely affected - -module StringBuffer - ( - StringBuffer(..), - -- non-abstract for vs\/HaskellService - - -- * Creation\/destruction - hGetStringBuffer, - hGetStringBufferBlock, - appendStringBuffers, - stringToStringBuffer, - - -- * Inspection - nextChar, - currentChar, - prevChar, - atEnd, - - -- * Moving and comparison - stepOn, - offsetBytes, - byteDiff, - - -- * Conversion - lexemeToString, - lexemeToFastString, - - -- * Parsing integers - parseUnsignedInteger, - ) where - -#include "HsVersions.h" - -import Encoding -import FastString -import FastTypes -import FastFunctions -import Outputable -import Util - -import Data.Maybe -import Control.Exception -import System.IO -import System.IO.Unsafe ( unsafePerformIO ) - -import GHC.Exts - -#if __GLASGOW_HASKELL__ >= 709 -import Foreign -#else -import Foreign.Safe -#endif - --- ----------------------------------------------------------------------------- --- The StringBuffer type - --- |A StringBuffer is an internal pointer to a sized chunk of bytes. --- The bytes are intended to be *immutable*. There are pure --- operations to read the contents of a StringBuffer. --- --- A StringBuffer may have a finalizer, depending on how it was --- obtained. --- -data StringBuffer - = StringBuffer { - buf :: {-# UNPACK #-} !(ForeignPtr Word8), - len :: {-# UNPACK #-} !Int, -- length - cur :: {-# UNPACK #-} !Int -- current pos - } - -- The buffer is assumed to be UTF-8 encoded, and furthermore - -- we add three '\0' bytes to the end as sentinels so that the - -- decoder doesn't have to check for overflow at every single byte - -- of a multibyte sequence. - -instance Show StringBuffer where - showsPrec _ s = showString "<stringbuffer(" - . shows (len s) . showString "," . shows (cur s) - . showString ")>" - --- ----------------------------------------------------------------------------- --- Creation / Destruction - -hGetStringBuffer :: FilePath -> IO StringBuffer -hGetStringBuffer fname = do - h <- openBinaryFile fname ReadMode - size_i <- hFileSize h - offset_i <- skipBOM h size_i 0 -- offset is 0 initially - let size = fromIntegral $ size_i - offset_i - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> do - r <- if size == 0 then return 0 else hGetBuf h ptr size - hClose h - if (r /= size) - then ioError (userError "short read of file") - else newUTF8StringBuffer buf ptr size - -hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer -hGetStringBufferBlock handle wanted - = do size_i <- hFileSize handle - offset_i <- hTell handle >>= skipBOM handle size_i - let size = min wanted (fromIntegral $ size_i-offset_i) - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> - do r <- if size == 0 then return 0 else hGetBuf handle ptr size - if r /= size - then ioError (userError $ "short read of file: "++show(r,size,size_i,handle)) - else newUTF8StringBuffer buf ptr size - --- | Skip the byte-order mark if there is one (see #1744 and #6016), --- and return the new position of the handle in bytes. --- --- This is better than treating #FEFF as whitespace, --- because that would mess up layout. We don't have a concept --- of zero-width whitespace in Haskell: all whitespace codepoints --- have a width of one column. -skipBOM :: Handle -> Integer -> Integer -> IO Integer -skipBOM h size offset = - -- Only skip BOM at the beginning of a file. - if size > 0 && offset == 0 - then do - -- Validate assumption that handle is in binary mode. - ASSERTM( hGetEncoding h >>= return . isNothing ) - -- Temporarily select text mode to make `hLookAhead` and - -- `hGetChar` return full Unicode characters. - bracket_ (hSetBinaryMode h False) (hSetBinaryMode h True) $ do - c <- hLookAhead h - if c == '\xfeff' - then hGetChar h >> hTell h - else return offset - else return offset - -newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer -newUTF8StringBuffer buf ptr size = do - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return $ StringBuffer buf size 0 - -appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer -appendStringBuffers sb1 sb2 - = do newBuf <- mallocForeignPtrArray (size+3) - withForeignPtr newBuf $ \ptr -> - withForeignPtr (buf sb1) $ \sb1Ptr -> - withForeignPtr (buf sb2) $ \sb2Ptr -> - do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len - copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len - pokeArray (ptr `advancePtr` size) [0,0,0] - return (StringBuffer newBuf size 0) - where sb1_len = calcLen sb1 - sb2_len = calcLen sb2 - calcLen sb = len sb - cur sb - size = sb1_len + sb2_len - -stringToStringBuffer :: String -> StringBuffer -stringToStringBuffer str = - unsafePerformIO $ do - let size = utf8EncodedLength str - buf <- mallocForeignPtrArray (size+3) - withForeignPtr buf $ \ptr -> do - utf8EncodeString ptr str - pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0] - -- sentinels for UTF-8 decoding - return (StringBuffer buf size 0) - --- ----------------------------------------------------------------------------- --- Grab a character - --- Getting our fingers dirty a little here, but this is performance-critical -{-# INLINE nextChar #-} -nextChar :: StringBuffer -> (Char,StringBuffer) -nextChar (StringBuffer buf len (I# cur#)) = - inlinePerformIO $ do - withForeignPtr buf $ \(Ptr a#) -> do - case utf8DecodeChar# (a# `plusAddr#` cur#) of - (# c#, nBytes# #) -> - let cur' = I# (cur# +# nBytes#) in - return (C# c#, StringBuffer buf len cur') - -currentChar :: StringBuffer -> Char -currentChar = fst . nextChar - -prevChar :: StringBuffer -> Char -> Char -prevChar (StringBuffer _ _ 0) deflt = deflt -prevChar (StringBuffer buf _ cur) _ = - inlinePerformIO $ do - withForeignPtr buf $ \p -> do - p' <- utf8PrevChar (p `plusPtr` cur) - return (fst (utf8DecodeChar p')) - --- ----------------------------------------------------------------------------- --- Moving - -stepOn :: StringBuffer -> StringBuffer -stepOn s = snd (nextChar s) - -offsetBytes :: Int -> StringBuffer -> StringBuffer -offsetBytes i s = s { cur = cur s + i } - -byteDiff :: StringBuffer -> StringBuffer -> Int -byteDiff s1 s2 = cur s2 - cur s1 - -atEnd :: StringBuffer -> Bool -atEnd (StringBuffer _ l c) = l == c - --- ----------------------------------------------------------------------------- --- Conversion - -lexemeToString :: StringBuffer -> Int {-bytes-} -> String -lexemeToString _ 0 = "" -lexemeToString (StringBuffer buf _ cur) bytes = - inlinePerformIO $ - withForeignPtr buf $ \ptr -> - utf8DecodeString (ptr `plusPtr` cur) bytes - -lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString -lexemeToFastString _ 0 = nilFS -lexemeToFastString (StringBuffer buf _ cur) len = - inlinePerformIO $ - withForeignPtr buf $ \ptr -> - return $! mkFastStringBytes (ptr `plusPtr` cur) len - --- ----------------------------------------------------------------------------- --- Parsing integer strings in various bases -{- -byteOff :: StringBuffer -> Int -> Char -byteOff (StringBuffer buf _ cur) i = - inlinePerformIO $ withForeignPtr buf $ \ptr -> do --- return $! cBox (indexWord8OffFastPtrAsFastChar --- (pUnbox ptr) (iUnbox (cur+i))) ---or --- w <- peek (ptr `plusPtr` (cur+i)) --- return (unsafeChr (fromIntegral (w::Word8))) --} --- | XXX assumes ASCII digits only (by using byteOff) -parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer -parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int - = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let - --LOL, in implementations where the indexing needs slow unsafePerformIO, - --this is less (not more) efficient than using the IO monad explicitly - --here. - !ptr' = pUnbox ptr - byteOff i = cBox (indexWord8OffFastPtrAsFastChar ptr' (iUnbox (cur + i))) - go i x | i == len = x - | otherwise = case byteOff i of - char -> go (i + 1) (x * radix + toInteger (char_to_int char)) - in go 0 0 - -\end{code} |