summaryrefslogtreecommitdiff
path: root/ghc
diff options
context:
space:
mode:
authorsimonmar <unknown>2005-10-20 14:00:36 +0000
committersimonmar <unknown>2005-10-20 14:00:36 +0000
commit63e8af080a7e779a48e812e6caa9ea519b046260 (patch)
treeb52886fa39836d676004d4ca9e092927f431f480 /ghc
parent4f0f4342c0268e239fd8bb6bd98ad2583b3485dd (diff)
downloadhaskell-63e8af080a7e779a48e812e6caa9ea519b046260.tar.gz
[project @ 2005-10-20 14:00:36 by simonmar]
Column numbers in SrcLocs are now counted as the number of characters, rather than columns. i.e. a tab always counts as 1. This was necessary for communication with Visual Studio interfaces which expect character indices, but also it seems the majority of other compilers also do things this way. From: Krasimir Angelov <kr.angelov@gmail.com>
Diffstat (limited to 'ghc')
-rw-r--r--ghc/compiler/parser/Lexer.x73
1 files changed, 42 insertions, 31 deletions
diff --git a/ghc/compiler/parser/Lexer.x b/ghc/compiler/parser/Lexer.x
index c7ffc592e0..5351af140c 100644
--- a/ghc/compiler/parser/Lexer.x
+++ b/ghc/compiler/parser/Lexer.x
@@ -587,12 +587,12 @@ pop _span _buf _len = do popLexState; lexToken
pop_and :: Action -> Action
pop_and act span buf len = do popLexState; act span buf len
-notFollowedBy char _ _ _ (_,buf) = atEnd buf || currentChar buf /= char
+notFollowedBy char _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf /= char
-notFollowedBySymbol _ _ _ (_,buf)
+notFollowedBySymbol _ _ _ (AI _ _ buf)
= atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
-atEOL _ _ _ (_,buf) = atEnd buf || currentChar buf == '\n'
+atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
ifExtension pred bits _ _ _ = pred bits
@@ -622,8 +622,7 @@ nested_comment span _str _len = do
Just (c,input) -> go n input
c -> go n input
- err input = do failLocMsgP (srcSpanStart span) (fst input)
- "unterminated `{-'"
+ err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
open_brace, close_brace :: Action
open_brace span _str _len = do
@@ -653,7 +652,8 @@ check_qvarid span buf len = do
token = L span (ITqvarid (mod,var))
try_again = do
- setInput (srcSpanStart span,buf)
+ (AI _ offs _) <- getInput
+ setInput (AI (srcSpanStart span) (offs-len) buf)
pushLexState bad_qvarid
lexToken
@@ -744,7 +744,7 @@ prim_double str = ITprimdouble $! readRational str
-- we're at the first token on a line, insert layout tokens if necessary
do_bol :: Action
do_bol span _str _len = do
- pos <- getOffside (srcSpanEnd span)
+ pos <- getOffside
case pos of
LT -> do
--trace "layout: inserting '}'" $ do
@@ -780,7 +780,7 @@ maybe_layout _ = return ()
--
new_layout_context strict span _buf _len = do
popLexState
- let offset = srcSpanStartCol span
+ (AI _ offset _) <- getInput
ctx <- getContext
case ctx of
Layout prev_off : _ |
@@ -887,21 +887,21 @@ lex_char_tok span buf len = do -- We've seen '
case alexGetChar i1 of
Nothing -> lit_error
- Just ('\'', i2@(end2,_)) -> do -- We've seen ''
+ Just ('\'', i2@(AI end2 _ _)) -> do -- We've seen ''
th_exts <- extension thEnabled
if th_exts then do
setInput i2
return (L (mkSrcSpan loc end2) ITtyQuote)
else lit_error
- Just ('\\', i2@(end2,_)) -> do -- We've seen 'backslash
+ Just ('\\', i2@(AI end2 _ _)) -> do -- We've seen 'backslash
setInput i2
lit_ch <- lex_escape
mc <- getCharOrFail -- Trailing quote
if mc == '\'' then finish_char_tok loc lit_ch
else lit_error
- Just (c, i2@(end2,_)) | not (is_any c) -> lit_error
+ Just (c, i2@(AI end2 _ _)) | not (is_any c) -> lit_error
| otherwise ->
-- We've seen 'x, where x is a valid character
@@ -914,17 +914,18 @@ lex_char_tok span buf len = do -- We've seen '
_other -> do -- We've seen 'x not followed by quote
-- If TH is on, just parse the quote only
th_exts <- extension thEnabled
- if th_exts then return (L (mkSrcSpan loc (fst i1)) ITvarQuote)
+ let (AI end _ _) = i1
+ if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
else lit_error
finish_char_tok :: SrcLoc -> Char -> P (Located Token)
finish_char_tok loc ch -- We've already seen the closing quote
-- Just need to check for trailing #
= do glaexts <- extension glaExtsEnabled
- i@(end,_) <- getInput
+ i@(AI end _ _) <- getInput
if glaexts then do
case alexGetChar i of
- Just ('#',i@(end,_)) -> do
+ Just ('#',i@(AI end _ _)) -> do
setInput i
return (L (mkSrcSpan loc end) (ITprimchar ch))
_other ->
@@ -1066,6 +1067,9 @@ data ParseResult a
data PState = PState {
buffer :: StringBuffer,
last_loc :: SrcSpan, -- pos of previous token
+ last_offs :: !Int, -- offset of the previous token from the
+ -- beginning of the current line.
+ -- \t is equal to 8 spaces.
last_len :: !Int, -- len of previous token
loc :: SrcLoc, -- current loc (end of prev token + 1)
extsBitmap :: !Int, -- bitmap that determines permitted extensions
@@ -1121,24 +1125,30 @@ getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
setLastToken :: SrcSpan -> Int -> P ()
setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
-type AlexInput = (SrcLoc,StringBuffer)
+data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (_,s) = prevChar s '\n'
+alexInputPrevChar (AI _ _ s) = prevChar s '\n'
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar (loc,s)
+alexGetChar (AI loc ofs s)
| atEnd s = Nothing
- | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
- where c = currentChar s
+ | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` Just (c, (AI loc' ofs' s'))
+ where c = currentChar s
loc' = advanceSrcLoc loc c
+ ofs' = advanceOffs c ofs
s' = stepOn s
+ advanceOffs :: Char -> Int -> Int
+ advanceOffs '\n' offs = 0
+ advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
+ advanceOffs _ offs = offs + 1
+
getInput :: P AlexInput
-getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)
+getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
setInput :: AlexInput -> P ()
-setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
+setInput (AI l o b) = P $ \s -> POk s{ loc=l, last_offs=o, buffer=b } ()
pushLexState :: Int -> P ()
pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
@@ -1178,6 +1188,7 @@ mkPState buf loc flags =
PState {
buffer = buf,
last_loc = mkSrcSpan loc loc,
+ last_offs = 0,
last_len = 0,
loc = loc,
extsBitmap = fromIntegral bitmap,
@@ -1215,13 +1226,13 @@ popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
-- This is only used at the outer level of a module when the 'module'
-- keyword is missing.
pushCurrentContext :: P ()
-pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
- POk s{ context = Layout (srcSpanStartCol loc) : ctx} ()
+pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_len=len, context=ctx } ->
+ POk s{context = Layout (offs-len) : ctx} ()
-getOffside :: SrcLoc -> P Ordering
-getOffside loc = P $ \s@PState{context=stk} ->
+getOffside :: P Ordering
+getOffside = P $ \s@PState{last_offs=offs, context=stk} ->
let ord = case stk of
- (Layout n:_) -> compare (srcLocCol loc) n
+ (Layout n:_) -> compare offs n
_ -> GT
in POk s ord
@@ -1255,7 +1266,7 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
lexError :: String -> P a
lexError str = do
loc <- getSrcLoc
- i@(end,_) <- getInput
+ i@(AI end _ _) <- getInput
failLocMsgP loc end str
-- -----------------------------------------------------------------------------
@@ -1265,23 +1276,23 @@ lexError str = do
lexer :: (Located Token -> P a) -> P a
lexer cont = do
tok@(L _ tok__) <- lexToken
- --trace ("token: " ++ show tok__) $ do
+ -- trace ("token: " ++ show tok__) $ do
cont tok
lexToken :: P (Located Token)
lexToken = do
- inp@(loc1,buf) <- getInput
+ inp@(AI loc1 _ buf) <- getInput
sc <- getLexState
exts <- getExts
case alexScanUser exts inp sc of
AlexEOF -> do let span = mkSrcSpan loc1 loc1
setLastToken span 0
return (L span ITeof)
- AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
+ AlexError (AI loc2 _ _) -> do failLocMsgP loc1 loc2 "lexical error"
AlexSkip inp2 _ -> do
setInput inp2
lexToken
- AlexToken inp2@(end,buf2) len t -> do
+ AlexToken inp2@(AI end _ buf2) len t -> do
setInput inp2
let span = mkSrcSpan loc1 end
span `seq` setLastToken span len