%
% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
%
\section{String buffers}

Buffers for scanning string input stored in external arrays.

\begin{code}

module StringBuffer
       (
        StringBuffer,

	 -- creation/destruction
        hGetStringBuffer,     -- :: FilePath     -> IO StringBuffer
	stringToStringBuffer, -- :: String       -> IO StringBuffer
	freeStringBuffer,     -- :: StringBuffer -> IO ()

         -- Lookup
	currentChar,      -- :: StringBuffer -> Char
	currentChar#,     -- :: StringBuffer -> Char#
	indexSBuffer,     -- :: StringBuffer -> Int -> Char
	indexSBuffer#,    -- :: StringBuffer -> Int# -> Char#
         -- relative lookup, i.e, currentChar = lookAhead 0
	lookAhead,        -- :: StringBuffer -> Int  -> Char
	lookAhead#,       -- :: StringBuffer -> Int# -> Char#
        
	-- offsets
	currentIndex#,    -- :: StringBuffer -> Int#
	lexemeIndex,	  -- :: StringBuffer -> Int#

	 -- moving the end point of the current lexeme.
        setCurrentPos#,   -- :: StringBuffer -> Int# -> StringBuffer
	incLexeme,	  -- :: StringBuffer -> StringBuffer
	decLexeme,	  -- :: StringBuffer -> StringBuffer

         -- move the start and end lexeme pointer on by x units.        
        stepOn,           -- :: StringBuffer -> StringBuffer
        stepOnBy#,        -- :: StringBuffer -> Int# -> StringBuffer
        stepOnTo#,        -- :: StringBuffer -> Int# -> StringBuffer
        stepOnUntil,      -- :: (Char -> Bool) -> StringBuffer -> StringBuffer
	stepOnUntilChar#, -- :: StringBuffer -> Char# -> StringBuffer
        stepOverLexeme,   -- :: StringBuffer   -> StringBuffer
	scanNumLit,       -- :: Int -> StringBuffer -> (Int, StringBuffer)
	squeezeLexeme,	  -- :: StringBuffer -> Int# -> StringBuffer
	mergeLexemes,	  -- :: StringBuffer -> StringBuffer -> StringBuffer
        expandWhile,      -- :: (Char  -> Bool) -> StringBuffer -> StringBuffer
        expandWhile#,     -- :: (Char# -> Bool) -> StringBuffer -> StringBuffer
        expandUntilMatch, -- :: StrinBuffer -> String -> StringBuffer
         -- at or beyond end of buffer?
        bufferExhausted,  -- :: StringBuffer -> Bool
        emptyLexeme,      -- :: StringBuffer -> Bool

	 -- matching
        prefixMatch,       -- :: StringBuffer -> String -> Bool
	untilEndOfString#, -- :: StringBuffer -> Int#

         -- conversion
        lexemeToString,     -- :: StringBuffer -> String
        lexemeToByteArray,  -- :: StringBuffer -> _ByteArray Int
        lexemeToFastString, -- :: StringBuffer -> FastString
        lexemeToBuffer,     -- :: StringBuffer -> StringBuffer

        FastString,
	ByteArray
       ) where

#include "HsVersions.h"

import GlaExts
#if __GLASGOW_HASKELL__ < 411
import PrelAddr 	( Addr(..) )
#else
import Addr		( Addr(..) )
#endif
import Foreign
import Char		( chr )
import Panic		( panic )

-- urk!
#include "../lib/std/cbits/stgerror.h"

#if __GLASGOW_HASKELL__ >= 303
import IO		( openFile
#if __GLASGOW_HASKELL__ < 407
                        , slurpFile   -- comes from PrelHandle or IOExts now
#endif
                        )
import PrelIOBase
import PrelHandle
import Addr
#else
import IO		( openFile, hFileSize, hClose, IOMode(..) )
import Addr
#endif
#if __GLASGOW_HASKELL__ >= 411
import Ptr		( Ptr(..) )
#endif

#if __GLASGOW_HASKELL__ < 301
import IOBase		( Handle, IOError(..), IOErrorType(..),
			  constructErrorAndFail )
import IOHandle		( readHandle, writeHandle, filePtr )
import PackBase 	( unpackCStringBA )
#else
# if __GLASGOW_HASKELL__ <= 302
import PrelIOBase	( Handle, IOError(..), IOErrorType(..), 
			  constructErrorAndFail )
import PrelHandle	( readHandle, writeHandle, filePtr )
# endif
import PrelPack		( unpackCStringBA )
#endif

#if __GLASGOW_HASKELL__ < 402
import Util 		( bracket )
#else
import Exception	( bracket )
#endif

import PrimPacked
import FastString
import Char 		(isDigit)
\end{code} 

\begin{code}
data StringBuffer
 = StringBuffer
     Addr#
     Int#         -- length
     Int#         -- lexeme start
     Int#         -- current pos
\end{code}

\begin{code}
instance Show StringBuffer where
	showsPrec _ s = showString ""
\end{code}

\begin{code}
hGetStringBuffer :: Bool -> FilePath -> IO StringBuffer
hGetStringBuffer expand_tabs fname = do
   (a, read) <- if expand_tabs 
				then slurpFileExpandTabs fname 
#if __GLASGOW_HASKELL__ < 411
				else slurpFile fname
#else
				else do
				    (Ptr a#, read) <- slurpFile fname
				    return (A# a#, read)
#endif

   let (A# a#) = a;  (I# read#) = read

         -- add sentinel '\NUL'
   _casm_ `` ((char *)%0)[(int)%1]=(char)0; '' (A# a#) (I# (read# -# 1#))
   return (StringBuffer a# read# 0# 0#)

#if __GLASGOW_HASKELL__ < 303
slurpFile fname =
    openFile fname ReadMode >>= \ hndl ->
    hFileSize hndl          >>= \ len ->
    let len_i = fromInteger len in
      -- Allocate an array for system call to store its bytes into.
      -- ToDo: make it robust
--    trace (show ((len_i::Int)+1)) $
    _casm_ `` %r=(char *)malloc(sizeof(char)*(int)%0); '' (len_i::Int)  >>= \ arr@(A# a#) ->
    if addr2Int# a# ==# 0# then
       fail (userError ("hGetStringBuffer: Could not allocate "++show len_i ++ " bytes"))
    else
    readHandle hndl        >>= \ hndl_ ->
    writeHandle hndl hndl_ >>
     let ptr = filePtr hndl_ in
#if __GLASGOW_HASKELL__ <= 302
     _ccall_ fread arr (1::Int) len_i (ptr::ForeignObj)               >>= \  (I# read#) ->
#else
     _ccall_ fread arr (1::Int) len_i (ptr::Addr)                     >>= \  (I# read#) ->
#endif
     hClose hndl		     >>
     if read# ==# 0# then -- EOF or some other error
        fail (userError ("hGetStringBuffer: failed to slurp in interface file "++fname))
     else
	return (arr, I# read#)
#endif

unsafeWriteBuffer :: StringBuffer -> Int# -> Char# -> StringBuffer
unsafeWriteBuffer s@(StringBuffer a _ _ _) i# ch# =
 unsafePerformIO (
   _casm_ `` ((char *)%0)[(int)%1]=(char)%2; '' (A# a) (I# i#) (C# ch#) >>= \ () ->
   return s
 )
\end{code}

-----------------------------------------------------------------------------
-- Turn a String into a StringBuffer

\begin{code}
stringToStringBuffer :: String -> IO StringBuffer
freeStringBuffer :: StringBuffer -> IO ()

#if __GLASGOW_HASKELL__ >= 411
stringToStringBuffer str =
  do let sz@(I# sz#) = length str + 1
     (Ptr a#) <- mallocBytes sz
     fill_in str (A# a#)
     writeCharOffAddr (A# a#) (sz-1) '\0'		-- sentinel
     return (StringBuffer a# sz# 0# 0#)
 where
  fill_in [] _ = return ()
  fill_in (c:cs) a = do
    writeCharOffAddr a 0 c 
    fill_in cs (a `plusAddr` 1)

freeStringBuffer (StringBuffer a# _ _ _) = Foreign.free (Ptr a#)
#else
stringToStringBuffer = panic "stringToStringBuffer: not implemented"
freeStringBuffer sb  = return ()
#endif

\end{code}

-----------------------------------------------------------------------------
This very disturbing bit of code is used for expanding the tabs in a
file before we start parsing it.  Expanding the tabs early makes the
lexer a lot simpler: we only have to record the beginning of the line
in order to be able to calculate the column offset of the current
token.

We guess the size of the buffer required as 20% extra for
expanded tabs, and enlarge it if necessary.

\begin{code}
#if __GLASGOW_HASKELL__ < 303
mayBlock fo thing = thing

writeCharOffAddr :: Addr -> Int -> Char -> IO ()
writeCharOffAddr addr off c
  = _casm_ ``*((char *)%0+(int)%1)=(char)%2;'' addr off c
#endif

getErrType :: IO Int
#if __GLASGOW_HASKELL__ < 303
getErrType = _casm_ ``%r = ghc_errtype;''
#else
getErrType =  _ccall_ getErrType__
#endif

slurpFileExpandTabs :: FilePath -> IO (Addr,Int)
slurpFileExpandTabs fname = do
  bracket (openFile fname ReadMode) (hClose) 
   (\ handle ->
     do sz <- hFileSize handle
        if sz > toInteger (maxBound::Int) 
	  then IOERROR (userError "slurpFile: file too big")
          else do
      	    let sz_i = fromInteger sz
      	        sz_i' = (sz_i * 12) `div` 10		-- add 20% for tabs
      	    chunk <- allocMem sz_i'
      	    trySlurp handle sz_i' chunk
   )

trySlurp :: Handle -> Int -> Addr -> IO (Addr, Int)
trySlurp handle sz_i chunk =
#if __GLASGOW_HASKELL__ == 303
  wantReadableHandle "hGetChar" handle >>= \ handle_ ->
  let fo = haFO__ handle_ in
#elif __GLASGOW_HASKELL__ > 303
  wantReadableHandle "hGetChar" handle $ \ handle_ ->
  let fo = haFO__ handle_ in
#else
  readHandle handle        >>= \ handle_ ->
  let fo = filePtr handle_ in
#endif
  let
	(I# chunk_sz) = sz_i

	tAB_SIZE = 8#

	slurpFile :: Int# -> Int# -> Addr -> Int# -> Int# -> IO (Addr, Int)
	slurpFile c off chunk chunk_sz max_off = slurp c off
	 where

	  slurp :: Int# -> Int# -> IO (Addr, Int)
	  slurp c off | off >=# max_off = do
		let new_sz = chunk_sz *# 2#
	     	chunk' <- reAllocMem chunk (I# new_sz)
	     	slurpFile c off chunk' new_sz (new_sz -# (tAB_SIZE +# 1#))
    	  slurp c off = do
    		intc <- mayBlock fo (_ccall_ fileGetc fo)
    		if intc == ((-1)::Int)
     		  then do errtype <- getErrType
			  if errtype == (ERR_EOF :: Int)
			    then return (chunk, I# off)
			    else constructErrorAndFail "slurpFile"
     		  else case chr intc of
			 '\t' -> tabIt c off
			 ch   -> do  writeCharOffAddr chunk (I# off) ch
				     let c' | ch == '\n' = 0#
					    | otherwise  = c +# 1#
				     slurp c' (off +# 1#)

	  tabIt :: Int# -> Int# -> IO (Addr, Int)
	  -- can't run out of buffer in here, because we reserved an
	  -- extra tAB_SIZE bytes at the end earlier.
	  tabIt c off = do
		writeCharOffAddr chunk (I# off) ' '
		let c' = c +# 1#
		    off' = off +# 1#
		if c' `remInt#` tAB_SIZE ==# 0#
			then slurp c' off'
		    	else tabIt c' off'
  in do

	-- allow space for a full tab at the end of the buffer
	-- (that's what the max_off thing is for),
	-- and add 1 to allow room for the final sentinel \NUL at
	-- the end of the file.
  (chunk', rc) <- slurpFile 0# 0# chunk chunk_sz (chunk_sz -# (tAB_SIZE +# 1#))
#if __GLASGOW_HASKELL__ < 404
  writeHandle handle handle_
#endif
  if rc < (0::Int)
	then constructErrorAndFail "slurpFile"
	else return (chunk', rc+1 {-room for sentinel-})


reAllocMem :: Addr -> Int -> IO Addr
reAllocMem ptr sz = do
   chunk <- _ccall_ realloc ptr sz
   if chunk == nullAddr 
#if __GLASGOW_HASKELL__ >= 400
      then fail "reAllocMem"
#else
      then fail (userError "reAllocMem")
#endif
      else return chunk

allocMem :: Int -> IO Addr
allocMem sz = do
   chunk <- _ccall_ malloc sz
#if __GLASGOW_HASKELL__ < 303
   if chunk == nullAddr 
      then fail (userError "allocMem")
      else return chunk
#else
   if chunk == nullAddr 
      then constructErrorAndFail "allocMem"
      else return chunk
#endif
\end{code}

Lookup

\begin{code}
currentChar  :: StringBuffer -> Char
currentChar sb = case currentChar# sb of c -> C# c

lookAhead :: StringBuffer -> Int  -> Char
lookAhead sb (I# i#) = case lookAhead# sb i# of c -> C# c

indexSBuffer :: StringBuffer -> Int -> Char
indexSBuffer sb (I# i#) = case indexSBuffer# sb i# of c -> C# c

currentChar# :: StringBuffer -> Char#
indexSBuffer# :: StringBuffer -> Int# -> Char#
lookAhead# :: StringBuffer -> Int# -> Char#
currentChar# (StringBuffer fo# _ _ current#) = indexCharOffAddr# fo# current#
indexSBuffer# (StringBuffer fo# _ _ _) i# = indexCharOffAddr# fo# i#

 -- relative lookup, i.e, currentChar = lookAhead 0
lookAhead# (StringBuffer fo# _ _ c#) i# = indexCharOffAddr# fo# (c# +# i#)

currentIndex# :: StringBuffer -> Int#
currentIndex# (StringBuffer fo# _ _ c#) = c#

lexemeIndex :: StringBuffer -> Int#
lexemeIndex (StringBuffer fo# _ c# _) = c#
\end{code}

 moving the start point of the current lexeme.

\begin{code}
 -- moving the end point of the current lexeme.
setCurrentPos# :: StringBuffer -> Int# -> StringBuffer
setCurrentPos# (StringBuffer fo l# s# c#) i# =
 StringBuffer fo l# s# (c# +# i#)

-- augmenting the current lexeme by one.
incLexeme :: StringBuffer -> StringBuffer
incLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# +# 1#)

decLexeme :: StringBuffer -> StringBuffer
decLexeme (StringBuffer fo l# s# c#) = StringBuffer fo l# s# (c# -# 1#)

\end{code}

-- move the start and end point of the buffer on by
-- x units.        

\begin{code}
stepOn :: StringBuffer -> StringBuffer
stepOn (StringBuffer fo l# s# c#) = StringBuffer fo l# (s# +# 1#) (s# +# 1#) -- assume they're the same.

stepOnBy# :: StringBuffer -> Int# -> StringBuffer
stepOnBy# (StringBuffer fo# l# s# c#) i# = 
 case s# +# i# of
  new_s# -> StringBuffer fo# l# new_s# new_s#

-- jump to pos.
stepOnTo# :: StringBuffer -> Int# -> StringBuffer
stepOnTo# (StringBuffer fo l _ _) s# = StringBuffer fo l s# s#

squeezeLexeme :: StringBuffer -> Int# -> StringBuffer
squeezeLexeme (StringBuffer fo l s# c#) i# = StringBuffer fo l (s# +# i#) c#

mergeLexemes :: StringBuffer -> StringBuffer -> StringBuffer
mergeLexemes (StringBuffer fo l s# _) (StringBuffer _ _ _ c#)
   = StringBuffer fo l s# c#

stepOnUntil :: (Char -> Bool) -> StringBuffer -> StringBuffer

stepOnUntil pred (StringBuffer fo l# s# c#) =
 loop c#
  where
   loop c# = 
    case indexCharOffAddr# fo c# of
     ch# | pred (C# ch#) -> StringBuffer fo l# c# c#
	 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
         | otherwise     -> loop (c# +# 1#)

stepOverLexeme :: StringBuffer -> StringBuffer
stepOverLexeme (StringBuffer fo l s# c#) = StringBuffer fo l c# c#

expandWhile :: (Char -> Bool) -> StringBuffer -> StringBuffer
expandWhile pred (StringBuffer fo l# s# c#) =
 loop c#
  where
   loop c# = 
    case indexCharOffAddr# fo c# of
     ch# | pred (C# ch#) -> loop (c# +# 1#)
	 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# l# l# -- EOB, return immediately.
         | otherwise     -> StringBuffer fo l# s# c#

expandWhile# :: (Char# -> Bool) -> StringBuffer -> StringBuffer
expandWhile# pred (StringBuffer fo l# s# c#) =
 loop c#
  where
   loop c# = 
    case indexCharOffAddr# fo c# of
     ch# | pred ch# -> loop (c# +# 1#)
	 | ch# `eqChar#` '\NUL'# && c# >=# l# -> StringBuffer fo l# s# c# -- EOB, return immediately.
         | otherwise     -> StringBuffer fo l# s# c#

scanNumLit :: Integer -> StringBuffer -> (Integer,StringBuffer)
scanNumLit acc (StringBuffer fo l# s# c#) =
 loop acc c#
  where
   loop acc c# = 
    case indexCharOffAddr# fo c# of
     ch# | isDigit (C# ch#) -> loop (acc*10 + (toInteger (I# (ord# ch# -# ord# '0'#)))) (c# +# 1#)
	 | ch# `eqChar#` '\NUL'# && c# >=# l# -> (acc, StringBuffer fo l# s# c#) -- EOB, return immediately.
         | otherwise        -> (acc,StringBuffer fo l# s# c#)


expandUntilMatch :: StringBuffer -> String -> Maybe StringBuffer
expandUntilMatch (StringBuffer fo l# s# c#) str =
  loop c# str
  where
   loop c# [] = Just (StringBuffer fo l# s# c#)
   loop c# ((C# x#):xs) =
    case indexCharOffAddr# fo c# of
      ch# | ch# `eqChar#` '\NUL'# && c# >=# l# -> Nothing
	  | ch# `eqChar#` x# -> loop (c# +# 1#) xs
          | otherwise        -> loop (c# +# 1#) str
	
\end{code}

\begin{code}
   -- at or beyond end of buffer?
bufferExhausted :: StringBuffer -> Bool
bufferExhausted (StringBuffer fo l# _ c#) = c# >=# l#

emptyLexeme :: StringBuffer -> Bool
emptyLexeme (StringBuffer fo l# s# c#) = s# ==# c#

 -- matching
prefixMatch :: StringBuffer -> String -> Maybe StringBuffer
prefixMatch (StringBuffer fo l# s# c#) str =
  loop c# str
  where
   loop c# [] = Just (StringBuffer fo l# s# c#)
   loop c# ((C# x#):xs)
     | indexCharOffAddr# fo c# `eqChar#` x#
     = loop (c# +# 1#) xs
     | otherwise
     = Nothing

untilEndOfString# :: StringBuffer -> StringBuffer
untilEndOfString# (StringBuffer fo l# s# c#) = 
 loop c# 
 where
  getch# i# = indexCharOffAddr# fo i#

  loop c# =
   case getch# c# of
    '\"'# ->
      case getch# (c# -# 1#) of
	'\\'# ->       
                  -- looks like an escaped something or other to me,
		  -- better count the number of "\\"s that are immediately
		  -- preceeding to decide if the " is escaped.
	      let
	       odd_slashes flg i# =
	        case getch# i# of
	         '\\'# -> odd_slashes (not flg) (i# -# 1#)
	         _     -> flg
              in
	      if odd_slashes True (c# -# 2#) then
	          -- odd number, " is ecaped.
		  loop (c# +# 1#)
	      else  -- a real end of string delimiter after all.
		  StringBuffer fo l# s# c#
        _ -> StringBuffer fo l# s# c#
    '\NUL'# ->
	if c# >=# l# then -- hit sentinel, this doesn't look too good..
	   StringBuffer fo l# l# l#
	else
	   loop (c# +# 1#)
    _ -> loop (c# +# 1#)


stepOnUntilChar# :: StringBuffer -> Char# -> StringBuffer
stepOnUntilChar# (StringBuffer fo l# s# c#) x# = 
 loop c# 
 where
  loop c#
   | c# >=# l# || indexCharOffAddr# fo c# `eqChar#` x#
   = StringBuffer fo l# c# c#
   | otherwise
   = loop (c# +# 1#)

         -- conversion
lexemeToString :: StringBuffer -> String
lexemeToString (StringBuffer fo _ start_pos# current#) = 
 if start_pos# ==# current# then
    ""
 else
    unpackCStringBA (copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#)))
    
lexemeToByteArray :: StringBuffer -> ByteArray Int
lexemeToByteArray (StringBuffer fo _ start_pos# current#) = 
 if start_pos# ==# current# then
    error "lexemeToByteArray" 
 else
    copySubStr (A# fo) (I# start_pos#) (I# (current# -# start_pos#))

lexemeToFastString :: StringBuffer -> FastString
lexemeToFastString (StringBuffer fo l# start_pos# current#) =
 if start_pos# ==# current# then
    mkFastCharString2 (A# fo) (I# 0#)
 else
    mkFastSubString (A# fo) (I# start_pos#) (I# (current# -# start_pos#))

{-
 Create a StringBuffer from the current lexeme, and add a sentinel
 at the end. Know What You're Doing before taking this function
 into use..
-}
lexemeToBuffer :: StringBuffer -> StringBuffer
lexemeToBuffer (StringBuffer fo l# start_pos# current#) =
 if start_pos# ==# current# then
    StringBuffer fo 0# start_pos# current# -- an error, really. 
 else
    unsafeWriteBuffer (StringBuffer fo (current# -# start_pos#) start_pos# start_pos#)
		      (current# -# 1#)
		      '\NUL'#

\end{code}