summaryrefslogtreecommitdiff
path: root/compiler/parser
diff options
context:
space:
mode:
authorSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
committerSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
commit0065d5ab628975892cea1ec7303f968c3338cbe1 (patch)
tree8e2afe0ab48ee33cf95009809d67c9649573ef92 /compiler/parser
parent28a464a75e14cece5db40f2765a29348273ff2d2 (diff)
downloadhaskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz
Reorganisation of the source tree
Most of the other users of the fptools build system have migrated to Cabal, and with the move to darcs we can now flatten the source tree without losing history, so here goes. The main change is that the ghc/ subdir is gone, and most of what it contained is now at the top level. The build system now makes no pretense at being multi-project, it is just the GHC build system. No doubt this will break many things, and there will be a period of instability while we fix the dependencies. A straightforward build should work, but I haven't yet fixed binary/source distributions. Changes to the Building Guide will follow, too.
Diffstat (limited to 'compiler/parser')
-rw-r--r--compiler/parser/Ctype.lhs341
-rw-r--r--compiler/parser/LexCore.hs130
-rw-r--r--compiler/parser/Lexer.x1457
-rw-r--r--compiler/parser/Parser.y.pp1607
-rw-r--r--compiler/parser/ParserCore.y339
-rw-r--r--compiler/parser/ParserCoreUtils.hs72
-rw-r--r--compiler/parser/RdrHsSyn.lhs869
-rw-r--r--compiler/parser/cutils.c70
-rw-r--r--compiler/parser/cutils.h16
-rw-r--r--compiler/parser/hschooks.c55
-rw-r--r--compiler/parser/hschooks.h9
11 files changed, 4965 insertions, 0 deletions
diff --git a/compiler/parser/Ctype.lhs b/compiler/parser/Ctype.lhs
new file mode 100644
index 0000000000..dbe4e9f1b0
--- /dev/null
+++ b/compiler/parser/Ctype.lhs
@@ -0,0 +1,341 @@
+Character classification
+
+\begin{code}
+module Ctype
+ ( is_ident -- Char# -> Bool
+ , is_symbol -- Char# -> Bool
+ , is_any -- Char# -> Bool
+ , is_space -- Char# -> Bool
+ , is_lower -- Char# -> Bool
+ , is_upper -- Char# -> Bool
+ , is_digit -- Char# -> Bool
+ , is_alphanum -- Char# -> Bool
+
+ , is_hexdigit, is_octdigit
+ , hexDigit, octDecDigit
+ ) where
+
+#include "HsVersions.h"
+
+import DATA_INT ( Int32 )
+import DATA_BITS ( Bits((.&.)) )
+import Char ( ord, chr )
+\end{code}
+
+Bit masks
+
+\begin{code}
+cIdent, cSymbol, cAny, cSpace, cLower, cUpper, cDigit :: Int
+cIdent = 1
+cSymbol = 2
+cAny = 4
+cSpace = 8
+cLower = 16
+cUpper = 32
+cDigit = 64
+\end{code}
+
+The predicates below look costly, but aren't, GHC+GCC do a great job
+at the big case below.
+
+\begin{code}
+{-# INLINE is_ctype #-}
+is_ctype :: Int -> Char -> Bool
+is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32)
+
+is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char -> Bool
+is_ident = is_ctype cIdent
+is_symbol = is_ctype cSymbol
+is_any = is_ctype cAny
+is_space = is_ctype cSpace
+is_lower = is_ctype cLower
+is_upper = is_ctype cUpper
+is_digit = is_ctype cDigit
+is_alphanum = is_ctype (cLower+cUpper+cDigit)
+\end{code}
+
+Utils
+
+\begin{code}
+hexDigit :: Char -> Int
+hexDigit c | is_digit c = ord c - ord '0'
+ | otherwise = ord (to_lower c) - ord 'a' + 10
+
+octDecDigit :: Char -> Int
+octDecDigit c = ord c - ord '0'
+
+is_hexdigit c
+ = is_digit c
+ || (c >= 'a' && c <= 'f')
+ || (c >= 'A' && c <= 'F')
+
+is_octdigit c = c >= '0' && c <= '7'
+
+to_lower c
+ | c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a'))
+ | otherwise = c
+\end{code}
+
+We really mean .|. instead of + below, but GHC currently doesn't do
+any constant folding with bitops. *sigh*
+
+\begin{code}
+charType :: Char -> Int
+charType c = case c of
+ '\0' -> 0 -- \000
+ '\1' -> 0 -- \001
+ '\2' -> 0 -- \002
+ '\3' -> 0 -- \003
+ '\4' -> 0 -- \004
+ '\5' -> 0 -- \005
+ '\6' -> 0 -- \006
+ '\7' -> 0 -- \007
+ '\8' -> 0 -- \010
+ '\9' -> cAny + cSpace -- \t
+ '\10' -> cSpace -- \n (not allowed in strings, so !cAny)
+ '\11' -> cAny + cSpace -- \v
+ '\12' -> cAny + cSpace -- \f
+ '\13' -> cAny + cSpace -- ^M
+ '\14' -> 0 -- \016
+ '\15' -> 0 -- \017
+ '\16' -> 0 -- \020
+ '\17' -> 0 -- \021
+ '\18' -> 0 -- \022
+ '\19' -> 0 -- \023
+ '\20' -> 0 -- \024
+ '\21' -> 0 -- \025
+ '\22' -> 0 -- \026
+ '\23' -> 0 -- \027
+ '\24' -> 0 -- \030
+ '\25' -> 0 -- \031
+ '\26' -> 0 -- \032
+ '\27' -> 0 -- \033
+ '\28' -> 0 -- \034
+ '\29' -> 0 -- \035
+ '\30' -> 0 -- \036
+ '\31' -> 0 -- \037
+ '\32' -> cAny + cSpace --
+ '\33' -> cAny + cSymbol -- !
+ '\34' -> cAny -- "
+ '\35' -> cAny + cSymbol -- #
+ '\36' -> cAny + cSymbol -- $
+ '\37' -> cAny + cSymbol -- %
+ '\38' -> cAny + cSymbol -- &
+ '\39' -> cAny + cIdent -- '
+ '\40' -> cAny -- (
+ '\41' -> cAny -- )
+ '\42' -> cAny + cSymbol -- *
+ '\43' -> cAny + cSymbol -- +
+ '\44' -> cAny -- ,
+ '\45' -> cAny + cSymbol -- -
+ '\46' -> cAny + cSymbol -- .
+ '\47' -> cAny + cSymbol -- /
+ '\48' -> cAny + cIdent + cDigit -- 0
+ '\49' -> cAny + cIdent + cDigit -- 1
+ '\50' -> cAny + cIdent + cDigit -- 2
+ '\51' -> cAny + cIdent + cDigit -- 3
+ '\52' -> cAny + cIdent + cDigit -- 4
+ '\53' -> cAny + cIdent + cDigit -- 5
+ '\54' -> cAny + cIdent + cDigit -- 6
+ '\55' -> cAny + cIdent + cDigit -- 7
+ '\56' -> cAny + cIdent + cDigit -- 8
+ '\57' -> cAny + cIdent + cDigit -- 9
+ '\58' -> cAny + cSymbol -- :
+ '\59' -> cAny -- ;
+ '\60' -> cAny + cSymbol -- <
+ '\61' -> cAny + cSymbol -- =
+ '\62' -> cAny + cSymbol -- >
+ '\63' -> cAny + cSymbol -- ?
+ '\64' -> cAny + cSymbol -- @
+ '\65' -> cAny + cIdent + cUpper -- A
+ '\66' -> cAny + cIdent + cUpper -- B
+ '\67' -> cAny + cIdent + cUpper -- C
+ '\68' -> cAny + cIdent + cUpper -- D
+ '\69' -> cAny + cIdent + cUpper -- E
+ '\70' -> cAny + cIdent + cUpper -- F
+ '\71' -> cAny + cIdent + cUpper -- G
+ '\72' -> cAny + cIdent + cUpper -- H
+ '\73' -> cAny + cIdent + cUpper -- I
+ '\74' -> cAny + cIdent + cUpper -- J
+ '\75' -> cAny + cIdent + cUpper -- K
+ '\76' -> cAny + cIdent + cUpper -- L
+ '\77' -> cAny + cIdent + cUpper -- M
+ '\78' -> cAny + cIdent + cUpper -- N
+ '\79' -> cAny + cIdent + cUpper -- O
+ '\80' -> cAny + cIdent + cUpper -- P
+ '\81' -> cAny + cIdent + cUpper -- Q
+ '\82' -> cAny + cIdent + cUpper -- R
+ '\83' -> cAny + cIdent + cUpper -- S
+ '\84' -> cAny + cIdent + cUpper -- T
+ '\85' -> cAny + cIdent + cUpper -- U
+ '\86' -> cAny + cIdent + cUpper -- V
+ '\87' -> cAny + cIdent + cUpper -- W
+ '\88' -> cAny + cIdent + cUpper -- X
+ '\89' -> cAny + cIdent + cUpper -- Y
+ '\90' -> cAny + cIdent + cUpper -- Z
+ '\91' -> cAny -- [
+ '\92' -> cAny + cSymbol -- backslash
+ '\93' -> cAny -- ]
+ '\94' -> cAny + cSymbol -- ^
+ '\95' -> cAny + cIdent + cLower -- _
+ '\96' -> cAny -- `
+ '\97' -> cAny + cIdent + cLower -- a
+ '\98' -> cAny + cIdent + cLower -- b
+ '\99' -> cAny + cIdent + cLower -- c
+ '\100' -> cAny + cIdent + cLower -- d
+ '\101' -> cAny + cIdent + cLower -- e
+ '\102' -> cAny + cIdent + cLower -- f
+ '\103' -> cAny + cIdent + cLower -- g
+ '\104' -> cAny + cIdent + cLower -- h
+ '\105' -> cAny + cIdent + cLower -- i
+ '\106' -> cAny + cIdent + cLower -- j
+ '\107' -> cAny + cIdent + cLower -- k
+ '\108' -> cAny + cIdent + cLower -- l
+ '\109' -> cAny + cIdent + cLower -- m
+ '\110' -> cAny + cIdent + cLower -- n
+ '\111' -> cAny + cIdent + cLower -- o
+ '\112' -> cAny + cIdent + cLower -- p
+ '\113' -> cAny + cIdent + cLower -- q
+ '\114' -> cAny + cIdent + cLower -- r
+ '\115' -> cAny + cIdent + cLower -- s
+ '\116' -> cAny + cIdent + cLower -- t
+ '\117' -> cAny + cIdent + cLower -- u
+ '\118' -> cAny + cIdent + cLower -- v
+ '\119' -> cAny + cIdent + cLower -- w
+ '\120' -> cAny + cIdent + cLower -- x
+ '\121' -> cAny + cIdent + cLower -- y
+ '\122' -> cAny + cIdent + cLower -- z
+ '\123' -> cAny -- {
+ '\124' -> cAny + cSymbol -- |
+ '\125' -> cAny -- }
+ '\126' -> cAny + cSymbol -- ~
+ '\127' -> 0 -- \177
+ '\128' -> 0 -- \200
+ '\129' -> 0 -- \201
+ '\130' -> 0 -- \202
+ '\131' -> 0 -- \203
+ '\132' -> 0 -- \204
+ '\133' -> 0 -- \205
+ '\134' -> 0 -- \206
+ '\135' -> 0 -- \207
+ '\136' -> 0 -- \210
+ '\137' -> 0 -- \211
+ '\138' -> 0 -- \212
+ '\139' -> 0 -- \213
+ '\140' -> 0 -- \214
+ '\141' -> 0 -- \215
+ '\142' -> 0 -- \216
+ '\143' -> 0 -- \217
+ '\144' -> 0 -- \220
+ '\145' -> 0 -- \221
+ '\146' -> 0 -- \222
+ '\147' -> 0 -- \223
+ '\148' -> 0 -- \224
+ '\149' -> 0 -- \225
+ '\150' -> 0 -- \226
+ '\151' -> 0 -- \227
+ '\152' -> 0 -- \230
+ '\153' -> 0 -- \231
+ '\154' -> 0 -- \232
+ '\155' -> 0 -- \233
+ '\156' -> 0 -- \234
+ '\157' -> 0 -- \235
+ '\158' -> 0 -- \236
+ '\159' -> 0 -- \237
+ '\160' -> cSpace --
+ '\161' -> cAny + cSymbol -- ¡
+ '\162' -> cAny + cSymbol -- ¢
+ '\163' -> cAny + cSymbol -- £
+ '\164' -> cAny + cSymbol -- ¤
+ '\165' -> cAny + cSymbol -- ¥
+ '\166' -> cAny + cSymbol -- ¦
+ '\167' -> cAny + cSymbol -- §
+ '\168' -> cAny + cSymbol -- ¨
+ '\169' -> cAny + cSymbol -- ©
+ '\170' -> cAny + cSymbol -- ª
+ '\171' -> cAny + cSymbol -- «
+ '\172' -> cAny + cSymbol -- ¬
+ '\173' -> cAny + cSymbol -- ­
+ '\174' -> cAny + cSymbol -- ®
+ '\175' -> cAny + cSymbol -- ¯
+ '\176' -> cAny + cSymbol -- °
+ '\177' -> cAny + cSymbol -- ±
+ '\178' -> cAny + cSymbol -- ²
+ '\179' -> cAny + cSymbol -- ³
+ '\180' -> cAny + cSymbol -- ´
+ '\181' -> cAny + cSymbol -- µ
+ '\182' -> cAny + cSymbol -- ¶
+ '\183' -> cAny + cSymbol -- ·
+ '\184' -> cAny + cSymbol -- ¸
+ '\185' -> cAny + cSymbol -- ¹
+ '\186' -> cAny + cSymbol -- º
+ '\187' -> cAny + cSymbol -- »
+ '\188' -> cAny + cSymbol -- ¼
+ '\189' -> cAny + cSymbol -- ½
+ '\190' -> cAny + cSymbol -- ¾
+ '\191' -> cAny + cSymbol -- ¿
+ '\192' -> cAny + cIdent + cUpper -- À
+ '\193' -> cAny + cIdent + cUpper -- Á
+ '\194' -> cAny + cIdent + cUpper -- Â
+ '\195' -> cAny + cIdent + cUpper -- Ã
+ '\196' -> cAny + cIdent + cUpper -- Ä
+ '\197' -> cAny + cIdent + cUpper -- Å
+ '\198' -> cAny + cIdent + cUpper -- Æ
+ '\199' -> cAny + cIdent + cUpper -- Ç
+ '\200' -> cAny + cIdent + cUpper -- È
+ '\201' -> cAny + cIdent + cUpper -- É
+ '\202' -> cAny + cIdent + cUpper -- Ê
+ '\203' -> cAny + cIdent + cUpper -- Ë
+ '\204' -> cAny + cIdent + cUpper -- Ì
+ '\205' -> cAny + cIdent + cUpper -- Í
+ '\206' -> cAny + cIdent + cUpper -- Î
+ '\207' -> cAny + cIdent + cUpper -- Ï
+ '\208' -> cAny + cIdent + cUpper -- Ð
+ '\209' -> cAny + cIdent + cUpper -- Ñ
+ '\210' -> cAny + cIdent + cUpper -- Ò
+ '\211' -> cAny + cIdent + cUpper -- Ó
+ '\212' -> cAny + cIdent + cUpper -- Ô
+ '\213' -> cAny + cIdent + cUpper -- Õ
+ '\214' -> cAny + cIdent + cUpper -- Ö
+ '\215' -> cAny + cSymbol + cLower -- ×
+ '\216' -> cAny + cIdent + cUpper -- Ø
+ '\217' -> cAny + cIdent + cUpper -- Ù
+ '\218' -> cAny + cIdent + cUpper -- Ú
+ '\219' -> cAny + cIdent + cUpper -- Û
+ '\220' -> cAny + cIdent + cUpper -- Ü
+ '\221' -> cAny + cIdent + cUpper -- Ý
+ '\222' -> cAny + cIdent + cUpper -- Þ
+ '\223' -> cAny + cIdent -- ß
+ '\224' -> cAny + cIdent + cLower -- à
+ '\225' -> cAny + cIdent + cLower -- á
+ '\226' -> cAny + cIdent + cLower -- â
+ '\227' -> cAny + cIdent + cLower -- ã
+ '\228' -> cAny + cIdent + cLower -- ä
+ '\229' -> cAny + cIdent + cLower -- å
+ '\230' -> cAny + cIdent + cLower -- æ
+ '\231' -> cAny + cIdent + cLower -- ç
+ '\232' -> cAny + cIdent + cLower -- è
+ '\233' -> cAny + cIdent + cLower -- é
+ '\234' -> cAny + cIdent + cLower -- ê
+ '\235' -> cAny + cIdent + cLower -- ë
+ '\236' -> cAny + cIdent + cLower -- ì
+ '\237' -> cAny + cIdent + cLower -- í
+ '\238' -> cAny + cIdent + cLower -- î
+ '\239' -> cAny + cIdent + cLower -- ï
+ '\240' -> cAny + cIdent + cLower -- ð
+ '\241' -> cAny + cIdent + cLower -- ñ
+ '\242' -> cAny + cIdent + cLower -- ò
+ '\243' -> cAny + cIdent + cLower -- ó
+ '\244' -> cAny + cIdent + cLower -- ô
+ '\245' -> cAny + cIdent + cLower -- õ
+ '\246' -> cAny + cIdent + cLower -- ö
+ '\247' -> cAny + cSymbol -- ÷
+ '\248' -> cAny + cIdent -- ø
+ '\249' -> cAny + cIdent + cLower -- ù
+ '\250' -> cAny + cIdent + cLower -- ú
+ '\251' -> cAny + cIdent + cLower -- û
+ '\252' -> cAny + cIdent + cLower -- ü
+ '\253' -> cAny + cIdent + cLower -- ý
+ '\254' -> cAny + cIdent + cLower -- þ
+ '\255' -> cAny + cIdent + cLower -- ÿ
+\end{code}
diff --git a/compiler/parser/LexCore.hs b/compiler/parser/LexCore.hs
new file mode 100644
index 0000000000..1a545a3e43
--- /dev/null
+++ b/compiler/parser/LexCore.hs
@@ -0,0 +1,130 @@
+module LexCore where
+
+import ParserCoreUtils
+import Ratio
+import Char
+import qualified Numeric( readFloat, readDec )
+
+isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'')
+isKeywordChar c = isAlpha c || (c == '_')
+
+lexer :: (Token -> P a) -> P a
+lexer cont [] = cont TKEOF []
+lexer cont ('\n':cs) = \line -> lexer cont cs (line+1)
+lexer cont ('-':'>':cs) = cont TKrarrow cs
+
+lexer cont (c:cs)
+ | isSpace c = lexer cont cs
+ | isLower c || (c == '_') = lexName cont TKname (c:cs)
+ | isUpper c = lexName cont TKcname (c:cs)
+ | isDigit c || (c == '-') = lexNum cont (c:cs)
+
+lexer cont ('%':cs) = lexKeyword cont cs
+lexer cont ('\'':cs) = lexChar cont cs
+lexer cont ('\"':cs) = lexString [] cont cs
+lexer cont ('#':cs) = cont TKhash cs
+lexer cont ('(':cs) = cont TKoparen cs
+lexer cont (')':cs) = cont TKcparen cs
+lexer cont ('{':cs) = cont TKobrace cs
+lexer cont ('}':cs) = cont TKcbrace cs
+lexer cont ('=':cs) = cont TKeq cs
+lexer cont (':':':':cs) = cont TKcoloncolon cs
+lexer cont ('*':cs) = cont TKstar cs
+lexer cont ('.':cs) = cont TKdot cs
+lexer cont ('\\':cs) = cont TKlambda cs
+lexer cont ('@':cs) = cont TKat cs
+lexer cont ('?':cs) = cont TKquestion cs
+lexer cont (';':cs) = cont TKsemicolon cs
+lexer cont (c:cs) = failP "invalid character" [c]
+
+
+
+lexChar cont ('\\':'x':h1:h0:'\'':cs)
+ | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs
+lexChar cont ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs))
+lexChar cont ('\'':cs) = failP "invalid char character" ['\'']
+lexChar cont ('\"':cs) = failP "invalid char character" ['\"']
+lexChar cont (c:'\'':cs) = cont (TKchar c) cs
+
+
+lexString s cont ('\\':'x':h1:h0:cs)
+ | isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs
+lexString s cont ('\\':cs) = failP "invalid string character" ['\\']
+lexString s cont ('\'':cs) = failP "invalid string character" ['\'']
+lexString s cont ('\"':cs) = cont (TKstring s) cs
+lexString s cont (c:cs) = lexString (s++[c]) cont cs
+
+isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c))
+
+hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0)
+
+
+lexNum cont cs =
+ case cs of
+ ('-':cs) -> f (-1) cs
+ _ -> f 1 cs
+ where f sgn cs =
+ case span isDigit cs of
+ (digits,'.':c:rest)
+ | isDigit c -> cont (TKrational (fromInteger sgn * r)) rest'
+ where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest))
+ -- When reading a floating-point number, which is
+ -- a bit complicated, use the Haskell 98 library function
+ (digits,rest) -> cont (TKinteger (sgn * (read digits))) rest
+
+lexName cont cstr cs = cont (cstr name) rest
+ where (name,rest) = span isNameChar cs
+
+lexKeyword cont cs =
+ case span isKeywordChar cs of
+ ("module",rest) -> cont TKmodule rest
+ ("data",rest) -> cont TKdata rest
+ ("newtype",rest) -> cont TKnewtype rest
+ ("forall",rest) -> cont TKforall rest
+ ("rec",rest) -> cont TKrec rest
+ ("let",rest) -> cont TKlet rest
+ ("in",rest) -> cont TKin rest
+ ("case",rest) -> cont TKcase rest
+ ("of",rest) -> cont TKof rest
+ ("coerce",rest) -> cont TKcoerce rest
+ ("note",rest) -> cont TKnote rest
+ ("external",rest) -> cont TKexternal rest
+ ("_",rest) -> cont TKwild rest
+ _ -> failP "invalid keyword" ('%':cs)
+
+
+#if __GLASGOW_HASKELL__ >= 504
+-- The readFloat in the Numeric library will do the job
+
+readFloat :: (RealFrac a) => ReadS a
+readFloat = Numeric.readFloat
+
+#else
+-- Haskell 98's Numeric.readFloat used to have a bogusly restricted signature
+-- so it was incapable of reading a rational.
+-- So for GHCs that have that old bogus library, here is the code, written out longhand.
+
+readFloat r = [(fromRational ((n%1)*10^^(k-d)),t) | (n,d,s) <- readFix r,
+ (k,t) <- readExp s] ++
+ [ (0/0, t) | ("NaN",t) <- lex r] ++
+ [ (1/0, t) | ("Infinity",t) <- lex r]
+ where
+ readFix r = [(read (ds++ds'), length ds', t)
+ | (ds,d) <- lexDigits r,
+ (ds',t) <- lexFrac d ]
+
+ lexFrac ('.':ds) = lexDigits ds
+ lexFrac s = [("",s)]
+
+ readExp (e:s) | e `elem` "eE" = readExp' s
+ readExp s = [(0,s)]
+
+ readExp' ('-':s) = [(-k,t) | (k,t) <- Numeric.readDec s]
+ readExp' ('+':s) = Numeric.readDec s
+ readExp' s = Numeric.readDec s
+
+lexDigits :: ReadS String
+lexDigits s = case span isDigit s of
+ (cs,s') | not (null cs) -> [(cs,s')]
+ otherwise -> []
+#endif
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
new file mode 100644
index 0000000000..4c1b48efc0
--- /dev/null
+++ b/compiler/parser/Lexer.x
@@ -0,0 +1,1457 @@
+-----------------------------------------------------------------------------
+-- (c) The University of Glasgow, 2006
+--
+-- GHC's lexer.
+--
+-- This is a combination of an Alex-generated lexer from a regex
+-- definition, with some hand-coded bits.
+--
+-- Completely accurate information about token-spans within the source
+-- file is maintained. Every token has a start and end SrcLoc attached to it.
+--
+-----------------------------------------------------------------------------
+
+-- ToDo / known bugs:
+-- - Unicode
+-- - parsing integers is a bit slow
+-- - readRational is a bit slow
+--
+-- Known bugs, that were also in the previous version:
+-- - M... should be 3 tokens, not 1.
+-- - pragma-end should be only valid in a pragma
+
+{
+module Lexer (
+ Token(..), lexer, pragState, mkPState, PState(..),
+ P(..), ParseResult(..), getSrcLoc,
+ failLocMsgP, failSpanMsgP, srcParseFail,
+ popContext, pushCurrentContext, setLastToken, setSrcLoc,
+ getLexState, popLexState, pushLexState,
+ extension, bangPatEnabled
+ ) where
+
+#include "HsVersions.h"
+
+import ErrUtils ( Message )
+import Outputable
+import StringBuffer
+import FastString
+import FastTypes
+import SrcLoc
+import UniqFM
+import DynFlags
+import Ctype
+import Util ( maybePrefixMatch, readRational )
+
+import DATA_BITS
+import Data.Char ( chr )
+import Ratio
+--import TRACE
+
+#if __GLASGOW_HASKELL__ >= 605
+import Data.Char ( GeneralCategory(..), generalCategory, isPrint, isUpper )
+#else
+import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
+#endif
+}
+
+$unispace = \x05
+$whitechar = [\ \t\n\r\f\v\xa0 $unispace]
+$white_no_nl = $whitechar # \n
+
+$ascdigit = 0-9
+$unidigit = \x03
+$decdigit = $ascdigit -- for now, should really be $digit (ToDo)
+$digit = [$ascdigit $unidigit]
+
+$special = [\(\)\,\;\[\]\`\{\}]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
+$unisymbol = \x04
+$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
+
+$unilarge = \x01
+$asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
+$large = [$asclarge $unilarge]
+
+$unismall = \x02
+$ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
+$small = [$ascsmall $unismall \_]
+
+$unigraphic = \x06
+$graphic = [$small $large $symbol $digit $special $unigraphic \:\"\']
+
+$octit = 0-7
+$hexit = [$decdigit A-F a-f]
+$symchar = [$symbol \:]
+$nl = [\n\r]
+$idchar = [$small $large $digit \']
+
+@varid = $small $idchar*
+@conid = $large $idchar*
+
+@varsym = $symbol $symchar*
+@consym = \: $symchar*
+
+@decimal = $decdigit+
+@octal = $octit+
+@hexadecimal = $hexit+
+@exponent = [eE] [\-\+]? @decimal
+
+-- we support the hierarchical module name extension:
+@qual = (@conid \.)+
+
+@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
+
+haskell :-
+
+-- everywhere: skip whitespace and comments
+$white_no_nl+ ;
+
+-- Everywhere: deal with nested comments. We explicitly rule out
+-- pragmas, "{-#", so that we don't accidentally treat them as comments.
+-- (this can happen even though pragmas will normally take precedence due to
+-- longest-match, because pragmas aren't valid in every state, but comments
+-- are).
+"{-" / { notFollowedBy '#' } { nested_comment }
+
+-- Single-line comments are a bit tricky. Haskell 98 says that two or
+-- more dashes followed by a symbol should be parsed as a varsym, so we
+-- have to exclude those.
+-- The regex says: "munch all the characters after the dashes, as long as
+-- the first one is not a symbol".
+"--"\-* [^$symbol :] .* ;
+"--"\-* / { atEOL } ;
+
+-- 'bol' state: beginning of a line. Slurp up all the whitespace (including
+-- blank lines) until we find a non-whitespace character, then do layout
+-- processing.
+--
+-- One slight wibble here: what if the line begins with {-#? In
+-- theory, we have to lex the pragma to see if it's one we recognise,
+-- and if it is, then we backtrack and do_bol, otherwise we treat it
+-- as a nested comment. We don't bother with this: if the line begins
+-- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
+<bol> {
+ \n ;
+ ^\# (line)? { begin line_prag1 }
+ ^\# pragma .* \n ; -- GCC 3.3 CPP generated, apparently
+ ^\# \! .* \n ; -- #!, for scripts
+ () { do_bol }
+}
+
+-- after a layout keyword (let, where, do, of), we begin a new layout
+-- context if the curly brace is missing.
+-- Careful! This stuff is quite delicate.
+<layout, layout_do> {
+ \{ / { notFollowedBy '-' } { pop_and open_brace }
+ -- we might encounter {-# here, but {- has been handled already
+ \n ;
+ ^\# (line)? { begin line_prag1 }
+}
+
+-- do is treated in a subtly different way, see new_layout_context
+<layout> () { new_layout_context True }
+<layout_do> () { new_layout_context False }
+
+-- after a new layout context which was found to be to the left of the
+-- previous context, we have generated a '{' token, and we now need to
+-- generate a matching '}' token.
+<layout_left> () { do_layout_left }
+
+<0,option_prags,glaexts> \n { begin bol }
+
+"{-#" $whitechar* (line|LINE) { begin line_prag2 }
+
+-- single-line line pragmas, of the form
+-- # <line> "<file>" <extra-stuff> \n
+<line_prag1> $decdigit+ { setLine line_prag1a }
+<line_prag1a> \" [$graphic \ ]* \" { setFile line_prag1b }
+<line_prag1b> .* { pop }
+
+-- Haskell-style line pragmas, of the form
+-- {-# LINE <line> "<file>" #-}
+<line_prag2> $decdigit+ { setLine line_prag2a }
+<line_prag2a> \" [$graphic \ ]* \" { setFile line_prag2b }
+<line_prag2b> "#-}"|"-}" { pop }
+ -- NOTE: accept -} at the end of a LINE pragma, for compatibility
+ -- with older versions of GHC which generated these.
+
+-- We only want RULES pragmas to be picked up when -fglasgow-exts
+-- is on, because the contents of the pragma is always written using
+-- glasgow-exts syntax (using forall etc.), so if glasgow exts are not
+-- enabled, we're sure to get a parse error.
+-- (ToDo: we should really emit a warning when ignoring pragmas)
+<glaexts>
+ "{-#" $whitechar* (RULES|rules) { token ITrules_prag }
+
+<0,option_prags,glaexts> {
+ "{-#" $whitechar* (INLINE|inline) { token (ITinline_prag True) }
+ "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
+ { token (ITinline_prag False) }
+ "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
+ { token ITspec_prag }
+ "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
+ $whitechar* (INLINE|inline) { token (ITspec_inline_prag True) }
+ "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
+ $whitechar* (NO(T?)INLINE|no(t?)inline)
+ { token (ITspec_inline_prag False) }
+ "{-#" $whitechar* (SOURCE|source) { token ITsource_prag }
+ "{-#" $whitechar* (DEPRECATED|deprecated)
+ { token ITdeprecated_prag }
+ "{-#" $whitechar* (SCC|scc) { token ITscc_prag }
+ "{-#" $whitechar* (CORE|core) { token ITcore_prag }
+ "{-#" $whitechar* (UNPACK|unpack) { token ITunpack_prag }
+
+ "{-#" { nested_comment }
+
+ -- ToDo: should only be valid inside a pragma:
+ "#-}" { token ITclose_prag}
+}
+
+<option_prags> {
+ "{-#" $whitechar* (OPTIONS|options) { lex_string_prag IToptions_prag }
+ "{-#" $whitechar* (OPTIONS_GHC|options_ghc)
+ { lex_string_prag IToptions_prag }
+ "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
+ "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag }
+}
+
+-- '0' state: ordinary lexemes
+-- 'glaexts' state: glasgow extensions (postfix '#', etc.)
+
+-- "special" symbols
+
+<0,glaexts> {
+ "[:" / { ifExtension parrEnabled } { token ITopabrack }
+ ":]" / { ifExtension parrEnabled } { token ITcpabrack }
+}
+
+<0,glaexts> {
+ "[|" / { ifExtension thEnabled } { token ITopenExpQuote }
+ "[e|" / { ifExtension thEnabled } { token ITopenExpQuote }
+ "[p|" / { ifExtension thEnabled } { token ITopenPatQuote }
+ "[d|" / { ifExtension thEnabled } { layout_token ITopenDecQuote }
+ "[t|" / { ifExtension thEnabled } { token ITopenTypQuote }
+ "|]" / { ifExtension thEnabled } { token ITcloseQuote }
+ \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
+ "$(" / { ifExtension thEnabled } { token ITparenEscape }
+}
+
+<0,glaexts> {
+ "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
+ { special IToparenbar }
+ "|)" / { ifExtension arrowsEnabled } { special ITcparenbar }
+}
+
+<0,glaexts> {
+ \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
+ \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid }
+}
+
+<glaexts> {
+ "(#" / { notFollowedBySymbol } { token IToubxparen }
+ "#)" { token ITcubxparen }
+ "{|" { token ITocurlybar }
+ "|}" { token ITccurlybar }
+}
+
+<0,option_prags,glaexts> {
+ \( { special IToparen }
+ \) { special ITcparen }
+ \[ { special ITobrack }
+ \] { special ITcbrack }
+ \, { special ITcomma }
+ \; { special ITsemi }
+ \` { special ITbackquote }
+
+ \{ { open_brace }
+ \} { close_brace }
+}
+
+<0,option_prags,glaexts> {
+ @qual @varid { check_qvarid }
+ @qual @conid { idtoken qconid }
+ @varid { varid }
+ @conid { idtoken conid }
+}
+
+-- after an illegal qvarid, such as 'M.let',
+-- we back up and try again in the bad_qvarid state:
+<bad_qvarid> {
+ @conid { pop_and (idtoken conid) }
+ @qual @conid { pop_and (idtoken qconid) }
+}
+
+<glaexts> {
+ @qual @varid "#"+ { idtoken qvarid }
+ @qual @conid "#"+ { idtoken qconid }
+ @varid "#"+ { varid }
+ @conid "#"+ { idtoken conid }
+}
+
+-- ToDo: M.(,,,)
+
+<0,glaexts> {
+ @qual @varsym { idtoken qvarsym }
+ @qual @consym { idtoken qconsym }
+ @varsym { varsym }
+ @consym { consym }
+}
+
+<0,glaexts> {
+ @decimal { tok_decimal }
+ 0[oO] @octal { tok_octal }
+ 0[xX] @hexadecimal { tok_hexadecimal }
+}
+
+<glaexts> {
+ @decimal \# { prim_decimal }
+ 0[oO] @octal \# { prim_octal }
+ 0[xX] @hexadecimal \# { prim_hexadecimal }
+}
+
+<0,glaexts> @floating_point { strtoken tok_float }
+<glaexts> @floating_point \# { init_strtoken 1 prim_float }
+<glaexts> @floating_point \# \# { init_strtoken 2 prim_double }
+
+-- Strings and chars are lexed by hand-written code. The reason is
+-- that even if we recognise the string or char here in the regex
+-- lexer, we would still have to parse the string afterward in order
+-- to convert it to a String.
+<0,glaexts> {
+ \' { lex_char_tok }
+ \" { lex_string_tok }
+}
+
+{
+-- work around bug in Alex 2.0
+#if __GLASGOW_HASKELL__ < 503
+unsafeAt arr i = arr ! i
+#endif
+
+-- -----------------------------------------------------------------------------
+-- The token type
+
+data Token
+ = ITas -- Haskell keywords
+ | ITcase
+ | ITclass
+ | ITdata
+ | ITdefault
+ | ITderiving
+ | ITdo
+ | ITelse
+ | IThiding
+ | ITif
+ | ITimport
+ | ITin
+ | ITinfix
+ | ITinfixl
+ | ITinfixr
+ | ITinstance
+ | ITlet
+ | ITmodule
+ | ITnewtype
+ | ITof
+ | ITqualified
+ | ITthen
+ | ITtype
+ | ITwhere
+ | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now)
+
+ | ITforall -- GHC extension keywords
+ | ITforeign
+ | ITexport
+ | ITlabel
+ | ITdynamic
+ | ITsafe
+ | ITthreadsafe
+ | ITunsafe
+ | ITstdcallconv
+ | ITccallconv
+ | ITdotnet
+ | ITmdo
+
+ -- Pragmas
+ | ITinline_prag Bool -- True <=> INLINE, False <=> NOINLINE
+ | ITspec_prag -- SPECIALISE
+ | ITspec_inline_prag Bool -- SPECIALISE INLINE (or NOINLINE)
+ | ITsource_prag
+ | ITrules_prag
+ | ITdeprecated_prag
+ | ITline_prag
+ | ITscc_prag
+ | ITcore_prag -- hdaume: core annotations
+ | ITunpack_prag
+ | ITclose_prag
+ | IToptions_prag String
+ | ITinclude_prag String
+ | ITlanguage_prag
+
+ | ITdotdot -- reserved symbols
+ | ITcolon
+ | ITdcolon
+ | ITequal
+ | ITlam
+ | ITvbar
+ | ITlarrow
+ | ITrarrow
+ | ITat
+ | ITtilde
+ | ITdarrow
+ | ITminus
+ | ITbang
+ | ITstar
+ | ITdot
+
+ | ITbiglam -- GHC-extension symbols
+
+ | ITocurly -- special symbols
+ | ITccurly
+ | ITocurlybar -- {|, for type applications
+ | ITccurlybar -- |}, for type applications
+ | ITvocurly
+ | ITvccurly
+ | ITobrack
+ | ITopabrack -- [:, for parallel arrays with -fparr
+ | ITcpabrack -- :], for parallel arrays with -fparr
+ | ITcbrack
+ | IToparen
+ | ITcparen
+ | IToubxparen
+ | ITcubxparen
+ | ITsemi
+ | ITcomma
+ | ITunderscore
+ | ITbackquote
+
+ | ITvarid FastString -- identifiers
+ | ITconid FastString
+ | ITvarsym FastString
+ | ITconsym FastString
+ | ITqvarid (FastString,FastString)
+ | ITqconid (FastString,FastString)
+ | ITqvarsym (FastString,FastString)
+ | ITqconsym (FastString,FastString)
+
+ | ITdupipvarid FastString -- GHC extension: implicit param: ?x
+ | ITsplitipvarid FastString -- GHC extension: implicit param: %x
+
+ | ITpragma StringBuffer
+
+ | ITchar Char
+ | ITstring FastString
+ | ITinteger Integer
+ | ITrational Rational
+
+ | ITprimchar Char
+ | ITprimstring FastString
+ | ITprimint Integer
+ | ITprimfloat Rational
+ | ITprimdouble Rational
+
+ -- MetaHaskell extension tokens
+ | ITopenExpQuote -- [| or [e|
+ | ITopenPatQuote -- [p|
+ | ITopenDecQuote -- [d|
+ | ITopenTypQuote -- [t|
+ | ITcloseQuote -- |]
+ | ITidEscape FastString -- $x
+ | ITparenEscape -- $(
+ | ITvarQuote -- '
+ | ITtyQuote -- ''
+
+ -- Arrow notation extension
+ | ITproc
+ | ITrec
+ | IToparenbar -- (|
+ | ITcparenbar -- |)
+ | ITlarrowtail -- -<
+ | ITrarrowtail -- >-
+ | ITLarrowtail -- -<<
+ | ITRarrowtail -- >>-
+
+ | ITunknown String -- Used when the lexer can't make sense of it
+ | ITeof -- end of file token
+#ifdef DEBUG
+ deriving Show -- debugging
+#endif
+
+isSpecial :: Token -> Bool
+-- If we see M.x, where x is a keyword, but
+-- is special, we treat is as just plain M.x,
+-- not as a keyword.
+isSpecial ITas = True
+isSpecial IThiding = True
+isSpecial ITqualified = True
+isSpecial ITforall = True
+isSpecial ITexport = True
+isSpecial ITlabel = True
+isSpecial ITdynamic = True
+isSpecial ITsafe = True
+isSpecial ITthreadsafe = True
+isSpecial ITunsafe = True
+isSpecial ITccallconv = True
+isSpecial ITstdcallconv = True
+isSpecial ITmdo = True
+isSpecial _ = False
+
+-- the bitmap provided as the third component indicates whether the
+-- corresponding extension keyword is valid under the extension options
+-- provided to the compiler; if the extension corresponding to *any* of the
+-- bits set in the bitmap is enabled, the keyword is valid (this setup
+-- facilitates using a keyword in two different extensions that can be
+-- activated independently)
+--
+reservedWordsFM = listToUFM $
+ map (\(x, y, z) -> (mkFastString x, (y, z)))
+ [( "_", ITunderscore, 0 ),
+ ( "as", ITas, 0 ),
+ ( "case", ITcase, 0 ),
+ ( "class", ITclass, 0 ),
+ ( "data", ITdata, 0 ),
+ ( "default", ITdefault, 0 ),
+ ( "deriving", ITderiving, 0 ),
+ ( "do", ITdo, 0 ),
+ ( "else", ITelse, 0 ),
+ ( "hiding", IThiding, 0 ),
+ ( "if", ITif, 0 ),
+ ( "import", ITimport, 0 ),
+ ( "in", ITin, 0 ),
+ ( "infix", ITinfix, 0 ),
+ ( "infixl", ITinfixl, 0 ),
+ ( "infixr", ITinfixr, 0 ),
+ ( "instance", ITinstance, 0 ),
+ ( "let", ITlet, 0 ),
+ ( "module", ITmodule, 0 ),
+ ( "newtype", ITnewtype, 0 ),
+ ( "of", ITof, 0 ),
+ ( "qualified", ITqualified, 0 ),
+ ( "then", ITthen, 0 ),
+ ( "type", ITtype, 0 ),
+ ( "where", ITwhere, 0 ),
+ ( "_scc_", ITscc, 0 ), -- ToDo: remove
+
+ ( "forall", ITforall, bit tvBit),
+ ( "mdo", ITmdo, bit glaExtsBit),
+
+ ( "foreign", ITforeign, bit ffiBit),
+ ( "export", ITexport, bit ffiBit),
+ ( "label", ITlabel, bit ffiBit),
+ ( "dynamic", ITdynamic, bit ffiBit),
+ ( "safe", ITsafe, bit ffiBit),
+ ( "threadsafe", ITthreadsafe, bit ffiBit),
+ ( "unsafe", ITunsafe, bit ffiBit),
+ ( "stdcall", ITstdcallconv, bit ffiBit),
+ ( "ccall", ITccallconv, bit ffiBit),
+ ( "dotnet", ITdotnet, bit ffiBit),
+
+ ( "rec", ITrec, bit arrowsBit),
+ ( "proc", ITproc, bit arrowsBit)
+ ]
+
+reservedSymsFM = listToUFM $
+ map (\ (x,y,z) -> (mkFastString x,(y,z)))
+ [ ("..", ITdotdot, 0)
+ ,(":", ITcolon, 0) -- (:) is a reserved op,
+ -- meaning only list cons
+ ,("::", ITdcolon, 0)
+ ,("=", ITequal, 0)
+ ,("\\", ITlam, 0)
+ ,("|", ITvbar, 0)
+ ,("<-", ITlarrow, 0)
+ ,("->", ITrarrow, 0)
+ ,("@", ITat, 0)
+ ,("~", ITtilde, 0)
+ ,("=>", ITdarrow, 0)
+ ,("-", ITminus, 0)
+ ,("!", ITbang, 0)
+
+ ,("*", ITstar, bit glaExtsBit) -- For data T (a::*) = MkT
+ ,(".", ITdot, bit tvBit) -- For 'forall a . t'
+
+ ,("-<", ITlarrowtail, bit arrowsBit)
+ ,(">-", ITrarrowtail, bit arrowsBit)
+ ,("-<<", ITLarrowtail, bit arrowsBit)
+ ,(">>-", ITRarrowtail, bit arrowsBit)
+
+#if __GLASGOW_HASKELL__ >= 605
+ ,("λ", ITlam, bit glaExtsBit)
+ ,("∷", ITdcolon, bit glaExtsBit)
+ ,("⇒", ITdarrow, bit glaExtsBit)
+ ,("∀", ITforall, bit glaExtsBit)
+ ,("→", ITrarrow, bit glaExtsBit)
+ ,("←", ITlarrow, bit glaExtsBit)
+ ,("⋯", ITdotdot, bit glaExtsBit)
+#endif
+ ]
+
+-- -----------------------------------------------------------------------------
+-- Lexer actions
+
+type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
+
+special :: Token -> Action
+special tok span _buf len = return (L span tok)
+
+token, layout_token :: Token -> Action
+token t span buf len = return (L span t)
+layout_token t span buf len = pushLexState layout >> return (L span t)
+
+idtoken :: (StringBuffer -> Int -> Token) -> Action
+idtoken f span buf len = return (L span $! (f buf len))
+
+skip_one_varid :: (FastString -> Token) -> Action
+skip_one_varid f span buf len
+ = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
+
+strtoken :: (String -> Token) -> Action
+strtoken f span buf len =
+ return (L span $! (f $! lexemeToString buf len))
+
+init_strtoken :: Int -> (String -> Token) -> Action
+-- like strtoken, but drops the last N character(s)
+init_strtoken drop f span buf len =
+ return (L span $! (f $! lexemeToString buf (len-drop)))
+
+begin :: Int -> Action
+begin code _span _str _len = do pushLexState code; lexToken
+
+pop :: Action
+pop _span _buf _len = do popLexState; lexToken
+
+pop_and :: Action -> Action
+pop_and act span buf len = do popLexState; act span buf len
+
+notFollowedBy char _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf /= char
+
+notFollowedBySymbol _ _ _ (AI _ _ buf)
+ = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~"
+
+atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n'
+
+ifExtension pred bits _ _ _ = pred bits
+
+{-
+ nested comments require traversing by hand, they can't be parsed
+ using regular expressions.
+-}
+nested_comment :: Action
+nested_comment span _str _len = do
+ input <- getInput
+ go 1 input
+ where go 0 input = do setInput input; lexToken
+ go n input = do
+ case alexGetChar input of
+ Nothing -> err input
+ Just (c,input) -> do
+ case c of
+ '-' -> do
+ case alexGetChar input of
+ Nothing -> err input
+ Just ('\125',input) -> go (n-1) input
+ Just (c,_) -> go n input
+ '\123' -> do
+ case alexGetChar input of
+ Nothing -> err input
+ Just ('-',input') -> go (n+1) input'
+ Just (c,input) -> go n input
+ c -> go n input
+
+ err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
+
+open_brace, close_brace :: Action
+open_brace span _str _len = do
+ ctx <- getContext
+ setContext (NoLayout:ctx)
+ return (L span ITocurly)
+close_brace span _str _len = do
+ popContext
+ return (L span ITccurly)
+
+-- We have to be careful not to count M.<varid> as a qualified name
+-- when <varid> is a keyword. We hack around this by catching
+-- the offending tokens afterward, and re-lexing in a different state.
+check_qvarid span buf len = do
+ case lookupUFM reservedWordsFM var of
+ Just (keyword,exts)
+ | not (isSpecial keyword) ->
+ if exts == 0
+ then try_again
+ else do
+ b <- extension (\i -> exts .&. i /= 0)
+ if b then try_again
+ else return token
+ _other -> return token
+ where
+ (mod,var) = splitQualName buf len
+ token = L span (ITqvarid (mod,var))
+
+ try_again = do
+ (AI _ offs _) <- getInput
+ setInput (AI (srcSpanStart span) (offs-len) buf)
+ pushLexState bad_qvarid
+ lexToken
+
+qvarid buf len = ITqvarid $! splitQualName buf len
+qconid buf len = ITqconid $! splitQualName buf len
+
+splitQualName :: StringBuffer -> Int -> (FastString,FastString)
+-- takes a StringBuffer and a length, and returns the module name
+-- and identifier parts of a qualified name. Splits at the *last* dot,
+-- because of hierarchical module names.
+splitQualName orig_buf len = split orig_buf orig_buf
+ where
+ split buf dot_buf
+ | orig_buf `byteDiff` buf >= len = done dot_buf
+ | c == '.' = found_dot buf'
+ | otherwise = split buf' dot_buf
+ where
+ (c,buf') = nextChar buf
+
+ -- careful, we might get names like M....
+ -- so, if the character after the dot is not upper-case, this is
+ -- the end of the qualifier part.
+ found_dot buf -- buf points after the '.'
+ | isUpper c = split buf' buf
+ | otherwise = done buf
+ where
+ (c,buf') = nextChar buf
+
+ done dot_buf =
+ (lexemeToFastString orig_buf (qual_size - 1),
+ lexemeToFastString dot_buf (len - qual_size))
+ where
+ qual_size = orig_buf `byteDiff` dot_buf
+
+varid span buf len =
+ case lookupUFM reservedWordsFM fs of
+ Just (keyword,0) -> do
+ maybe_layout keyword
+ return (L span keyword)
+ Just (keyword,exts) -> do
+ b <- extension (\i -> exts .&. i /= 0)
+ if b then do maybe_layout keyword
+ return (L span keyword)
+ else return (L span (ITvarid fs))
+ _other -> return (L span (ITvarid fs))
+ where
+ fs = lexemeToFastString buf len
+
+conid buf len = ITconid fs
+ where fs = lexemeToFastString buf len
+
+qvarsym buf len = ITqvarsym $! splitQualName buf len
+qconsym buf len = ITqconsym $! splitQualName buf len
+
+varsym = sym ITvarsym
+consym = sym ITconsym
+
+sym con span buf len =
+ case lookupUFM reservedSymsFM fs of
+ Just (keyword,0) -> return (L span keyword)
+ Just (keyword,exts) -> do
+ b <- extension (\i -> exts .&. i /= 0)
+ if b then return (L span keyword)
+ else return (L span $! con fs)
+ _other -> return (L span $! con fs)
+ where
+ fs = lexemeToFastString buf len
+
+tok_decimal span buf len
+ = return (L span (ITinteger $! parseInteger buf len 10 octDecDigit))
+
+tok_octal span buf len
+ = return (L span (ITinteger $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit))
+
+tok_hexadecimal span buf len
+ = return (L span (ITinteger $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
+
+prim_decimal span buf len
+ = return (L span (ITprimint $! parseInteger buf (len-1) 10 octDecDigit))
+
+prim_octal span buf len
+ = return (L span (ITprimint $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit))
+
+prim_hexadecimal span buf len
+ = return (L span (ITprimint $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit))
+
+tok_float str = ITrational $! readRational str
+prim_float str = ITprimfloat $! readRational str
+prim_double str = ITprimdouble $! readRational str
+
+-- -----------------------------------------------------------------------------
+-- Layout processing
+
+-- we're at the first token on a line, insert layout tokens if necessary
+do_bol :: Action
+do_bol span _str _len = do
+ pos <- getOffside
+ case pos of
+ LT -> do
+ --trace "layout: inserting '}'" $ do
+ popContext
+ -- do NOT pop the lex state, we might have a ';' to insert
+ return (L span ITvccurly)
+ EQ -> do
+ --trace "layout: inserting ';'" $ do
+ popLexState
+ return (L span ITsemi)
+ GT -> do
+ popLexState
+ lexToken
+
+-- certain keywords put us in the "layout" state, where we might
+-- add an opening curly brace.
+maybe_layout ITdo = pushLexState layout_do
+maybe_layout ITmdo = pushLexState layout_do
+maybe_layout ITof = pushLexState layout
+maybe_layout ITlet = pushLexState layout
+maybe_layout ITwhere = pushLexState layout
+maybe_layout ITrec = pushLexState layout
+maybe_layout _ = return ()
+
+-- Pushing a new implicit layout context. If the indentation of the
+-- next token is not greater than the previous layout context, then
+-- Haskell 98 says that the new layout context should be empty; that is
+-- the lexer must generate {}.
+--
+-- We are slightly more lenient than this: when the new context is started
+-- by a 'do', then we allow the new context to be at the same indentation as
+-- the previous context. This is what the 'strict' argument is for.
+--
+new_layout_context strict span _buf _len = do
+ popLexState
+ (AI _ offset _) <- getInput
+ ctx <- getContext
+ case ctx of
+ Layout prev_off : _ |
+ (strict && prev_off >= offset ||
+ not strict && prev_off > offset) -> do
+ -- token is indented to the left of the previous context.
+ -- we must generate a {} sequence now.
+ pushLexState layout_left
+ return (L span ITvocurly)
+ other -> do
+ setContext (Layout offset : ctx)
+ return (L span ITvocurly)
+
+do_layout_left span _buf _len = do
+ popLexState
+ pushLexState bol -- we must be at the start of a line
+ return (L span ITvccurly)
+
+-- -----------------------------------------------------------------------------
+-- LINE pragmas
+
+setLine :: Int -> Action
+setLine code span buf len = do
+ let line = parseInteger buf len 10 octDecDigit
+ setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0)
+ -- subtract one: the line number refers to the *following* line
+ popLexState
+ pushLexState code
+ lexToken
+
+setFile :: Int -> Action
+setFile code span buf len = do
+ let file = lexemeToFastString (stepOn buf) (len-2)
+ setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
+ popLexState
+ pushLexState code
+ lexToken
+
+
+-- -----------------------------------------------------------------------------
+-- Options, includes and language pragmas.
+
+lex_string_prag :: (String -> Token) -> Action
+lex_string_prag mkTok span buf len
+ = do input <- getInput
+ start <- getSrcLoc
+ tok <- go [] input
+ end <- getSrcLoc
+ return (L (mkSrcSpan start end) tok)
+ where go acc input
+ = if isString input "#-}"
+ then do setInput input
+ return (mkTok (reverse acc))
+ else case alexGetChar input of
+ Just (c,i) -> go (c:acc) i
+ Nothing -> err input
+ isString i [] = True
+ isString i (x:xs)
+ = case alexGetChar i of
+ Just (c,i') | c == x -> isString i' xs
+ _other -> False
+ err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
+
+
+-- -----------------------------------------------------------------------------
+-- Strings & Chars
+
+-- This stuff is horrible. I hates it.
+
+lex_string_tok :: Action
+lex_string_tok span buf len = do
+ tok <- lex_string ""
+ end <- getSrcLoc
+ return (L (mkSrcSpan (srcSpanStart span) end) tok)
+
+lex_string :: String -> P Token
+lex_string s = do
+ i <- getInput
+ case alexGetChar' i of
+ Nothing -> lit_error
+
+ Just ('"',i) -> do
+ setInput i
+ glaexts <- extension glaExtsEnabled
+ if glaexts
+ then do
+ i <- getInput
+ case alexGetChar' i of
+ Just ('#',i) -> do
+ setInput i
+ if any (> '\xFF') s
+ then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
+ else let s' = mkZFastString (reverse s) in
+ return (ITprimstring s')
+ -- mkZFastString is a hack to avoid encoding the
+ -- string in UTF-8. We just want the exact bytes.
+ _other ->
+ return (ITstring (mkFastString (reverse s)))
+ else
+ return (ITstring (mkFastString (reverse s)))
+
+ Just ('\\',i)
+ | Just ('&',i) <- next -> do
+ setInput i; lex_string s
+ | Just (c,i) <- next, is_space c -> do
+ setInput i; lex_stringgap s
+ where next = alexGetChar' i
+
+ Just (c, i) -> do
+ c' <- lex_char c i
+ lex_string (c':s)
+
+lex_stringgap s = do
+ c <- getCharOrFail
+ case c of
+ '\\' -> lex_string s
+ c | is_space c -> lex_stringgap s
+ _other -> lit_error
+
+
+lex_char_tok :: Action
+-- Here we are basically parsing character literals, such as 'x' or '\n'
+-- but, when Template Haskell is on, we additionally spot
+-- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
+-- but WIHTOUT CONSUMING the x or T part (the parser does that).
+-- So we have to do two characters of lookahead: when we see 'x we need to
+-- see if there's a trailing quote
+lex_char_tok span buf len = do -- We've seen '
+ i1 <- getInput -- Look ahead to first character
+ let loc = srcSpanStart span
+ case alexGetChar' i1 of
+ Nothing -> lit_error
+
+ Just ('\'', i2@(AI end2 _ _)) -> do -- We've seen ''
+ th_exts <- extension thEnabled
+ if th_exts then do
+ setInput i2
+ return (L (mkSrcSpan loc end2) ITtyQuote)
+ else lit_error
+
+ Just ('\\', i2@(AI end2 _ _)) -> do -- We've seen 'backslash
+ setInput i2
+ lit_ch <- lex_escape
+ mc <- getCharOrFail -- Trailing quote
+ if mc == '\'' then finish_char_tok loc lit_ch
+ else do setInput i2; lit_error
+
+ Just (c, i2@(AI end2 _ _))
+ | not (isAny c) -> lit_error
+ | otherwise ->
+
+ -- We've seen 'x, where x is a valid character
+ -- (i.e. not newline etc) but not a quote or backslash
+ case alexGetChar' i2 of -- Look ahead one more character
+ Nothing -> lit_error
+ Just ('\'', i3) -> do -- We've seen 'x'
+ setInput i3
+ finish_char_tok loc c
+ _other -> do -- We've seen 'x not followed by quote
+ -- If TH is on, just parse the quote only
+ th_exts <- extension thEnabled
+ let (AI end _ _) = i1
+ if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
+ else do setInput i2; lit_error
+
+finish_char_tok :: SrcLoc -> Char -> P (Located Token)
+finish_char_tok loc ch -- We've already seen the closing quote
+ -- Just need to check for trailing #
+ = do glaexts <- extension glaExtsEnabled
+ i@(AI end _ _) <- getInput
+ if glaexts then do
+ case alexGetChar' i of
+ Just ('#',i@(AI end _ _)) -> do
+ setInput i
+ return (L (mkSrcSpan loc end) (ITprimchar ch))
+ _other ->
+ return (L (mkSrcSpan loc end) (ITchar ch))
+ else do
+ return (L (mkSrcSpan loc end) (ITchar ch))
+
+lex_char :: Char -> AlexInput -> P Char
+lex_char c inp = do
+ case c of
+ '\\' -> do setInput inp; lex_escape
+ c | isAny c -> do setInput inp; return c
+ _other -> lit_error
+
+isAny c | c > '\xff' = isPrint c
+ | otherwise = is_any c
+
+lex_escape :: P Char
+lex_escape = do
+ c <- getCharOrFail
+ case c of
+ 'a' -> return '\a'
+ 'b' -> return '\b'
+ 'f' -> return '\f'
+ 'n' -> return '\n'
+ 'r' -> return '\r'
+ 't' -> return '\t'
+ 'v' -> return '\v'
+ '\\' -> return '\\'
+ '"' -> return '\"'
+ '\'' -> return '\''
+ '^' -> do c <- getCharOrFail
+ if c >= '@' && c <= '_'
+ then return (chr (ord c - ord '@'))
+ else lit_error
+
+ 'x' -> readNum is_hexdigit 16 hexDigit
+ 'o' -> readNum is_octdigit 8 octDecDigit
+ x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x)
+
+ c1 -> do
+ i <- getInput
+ case alexGetChar' i of
+ Nothing -> lit_error
+ Just (c2,i2) ->
+ case alexGetChar' i2 of
+ Nothing -> do setInput i2; lit_error
+ Just (c3,i3) ->
+ let str = [c1,c2,c3] in
+ case [ (c,rest) | (p,c) <- silly_escape_chars,
+ Just rest <- [maybePrefixMatch p str] ] of
+ (escape_char,[]):_ -> do
+ setInput i3
+ return escape_char
+ (escape_char,_:_):_ -> do
+ setInput i2
+ return escape_char
+ [] -> lit_error
+
+readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
+readNum is_digit base conv = do
+ i <- getInput
+ c <- getCharOrFail
+ if is_digit c
+ then readNum2 is_digit base conv (conv c)
+ else do setInput i; lit_error
+
+readNum2 is_digit base conv i = do
+ input <- getInput
+ read i input
+ where read i input = do
+ case alexGetChar' input of
+ Just (c,input') | is_digit c -> do
+ read (i*base + conv c) input'
+ _other -> do
+ if i >= 0 && i <= 0x10FFFF
+ then do setInput input; return (chr i)
+ else lit_error
+
+silly_escape_chars = [
+ ("NUL", '\NUL'),
+ ("SOH", '\SOH'),
+ ("STX", '\STX'),
+ ("ETX", '\ETX'),
+ ("EOT", '\EOT'),
+ ("ENQ", '\ENQ'),
+ ("ACK", '\ACK'),
+ ("BEL", '\BEL'),
+ ("BS", '\BS'),
+ ("HT", '\HT'),
+ ("LF", '\LF'),
+ ("VT", '\VT'),
+ ("FF", '\FF'),
+ ("CR", '\CR'),
+ ("SO", '\SO'),
+ ("SI", '\SI'),
+ ("DLE", '\DLE'),
+ ("DC1", '\DC1'),
+ ("DC2", '\DC2'),
+ ("DC3", '\DC3'),
+ ("DC4", '\DC4'),
+ ("NAK", '\NAK'),
+ ("SYN", '\SYN'),
+ ("ETB", '\ETB'),
+ ("CAN", '\CAN'),
+ ("EM", '\EM'),
+ ("SUB", '\SUB'),
+ ("ESC", '\ESC'),
+ ("FS", '\FS'),
+ ("GS", '\GS'),
+ ("RS", '\RS'),
+ ("US", '\US'),
+ ("SP", '\SP'),
+ ("DEL", '\DEL')
+ ]
+
+-- before calling lit_error, ensure that the current input is pointing to
+-- the position of the error in the buffer. This is so that we can report
+-- a correct location to the user, but also so we can detect UTF-8 decoding
+-- errors if they occur.
+lit_error = lexError "lexical error in string/character literal"
+
+getCharOrFail :: P Char
+getCharOrFail = do
+ i <- getInput
+ case alexGetChar' i of
+ Nothing -> lexError "unexpected end-of-file in string/character literal"
+ Just (c,i) -> do setInput i; return c
+
+-- -----------------------------------------------------------------------------
+-- The Parse Monad
+
+data LayoutContext
+ = NoLayout
+ | Layout !Int
+
+data ParseResult a
+ = POk PState a
+ | PFailed
+ SrcSpan -- The start and end of the text span related to
+ -- the error. Might be used in environments which can
+ -- show this span, e.g. by highlighting it.
+ Message -- The error message
+
+data PState = PState {
+ buffer :: StringBuffer,
+ last_loc :: SrcSpan, -- pos of previous token
+ last_offs :: !Int, -- offset of the previous token from the
+ -- beginning of the current line.
+ -- \t is equal to 8 spaces.
+ last_len :: !Int, -- len of previous token
+ loc :: SrcLoc, -- current loc (end of prev token + 1)
+ extsBitmap :: !Int, -- bitmap that determines permitted extensions
+ context :: [LayoutContext],
+ lex_state :: [Int]
+ }
+ -- last_loc and last_len are used when generating error messages,
+ -- and in pushCurrentContext only. Sigh, if only Happy passed the
+ -- current token to happyError, we could at least get rid of last_len.
+ -- Getting rid of last_loc would require finding another way to
+ -- implement pushCurrentContext (which is only called from one place).
+
+newtype P a = P { unP :: PState -> ParseResult a }
+
+instance Monad P where
+ return = returnP
+ (>>=) = thenP
+ fail = failP
+
+returnP :: a -> P a
+returnP a = P $ \s -> POk s a
+
+thenP :: P a -> (a -> P b) -> P b
+(P m) `thenP` k = P $ \ s ->
+ case m s of
+ POk s1 a -> (unP (k a)) s1
+ PFailed span err -> PFailed span err
+
+failP :: String -> P a
+failP msg = P $ \s -> PFailed (last_loc s) (text msg)
+
+failMsgP :: String -> P a
+failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
+
+failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
+failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str)
+
+failSpanMsgP :: SrcSpan -> String -> P a
+failSpanMsgP span msg = P $ \s -> PFailed span (text msg)
+
+extension :: (Int -> Bool) -> P Bool
+extension p = P $ \s -> POk s (p $! extsBitmap s)
+
+getExts :: P Int
+getExts = P $ \s -> POk s (extsBitmap s)
+
+setSrcLoc :: SrcLoc -> P ()
+setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
+
+getSrcLoc :: P SrcLoc
+getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
+
+setLastToken :: SrcSpan -> Int -> P ()
+setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
+
+data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (AI _ _ buf) = prevChar buf '\n'
+
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar (AI loc ofs s)
+ | atEnd s = Nothing
+ | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq`
+ Just (adj_c, (AI loc' ofs' s'))
+ where (c,s') = nextChar s
+ loc' = advanceSrcLoc loc c
+ ofs' = advanceOffs c ofs
+
+ non_graphic = '\x0'
+ upper = '\x1'
+ lower = '\x2'
+ digit = '\x3'
+ symbol = '\x4'
+ space = '\x5'
+ other_graphic = '\x6'
+
+ adj_c
+ | c <= '\x06' = non_graphic
+ | c <= '\xff' = c
+ | otherwise =
+ case generalCategory c of
+ UppercaseLetter -> upper
+ LowercaseLetter -> lower
+ TitlecaseLetter -> upper
+ ModifierLetter -> other_graphic
+ OtherLetter -> other_graphic
+ NonSpacingMark -> other_graphic
+ SpacingCombiningMark -> other_graphic
+ EnclosingMark -> other_graphic
+ DecimalNumber -> digit
+ LetterNumber -> other_graphic
+ OtherNumber -> other_graphic
+ ConnectorPunctuation -> other_graphic
+ DashPunctuation -> other_graphic
+ OpenPunctuation -> other_graphic
+ ClosePunctuation -> other_graphic
+ InitialQuote -> other_graphic
+ FinalQuote -> other_graphic
+ OtherPunctuation -> other_graphic
+ MathSymbol -> symbol
+ CurrencySymbol -> symbol
+ ModifierSymbol -> symbol
+ OtherSymbol -> symbol
+ Space -> space
+ _other -> non_graphic
+
+-- This version does not squash unicode characters, it is used when
+-- lexing strings.
+alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar' (AI loc ofs s)
+ | atEnd s = Nothing
+ | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq`
+ Just (c, (AI loc' ofs' s'))
+ where (c,s') = nextChar s
+ loc' = advanceSrcLoc loc c
+ ofs' = advanceOffs c ofs
+
+advanceOffs :: Char -> Int -> Int
+advanceOffs '\n' offs = 0
+advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
+advanceOffs _ offs = offs + 1
+
+getInput :: P AlexInput
+getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
+
+setInput :: AlexInput -> P ()
+setInput (AI l o b) = P $ \s -> POk s{ loc=l, last_offs=o, buffer=b } ()
+
+pushLexState :: Int -> P ()
+pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
+
+popLexState :: P Int
+popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
+
+getLexState :: P Int
+getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls
+
+-- for reasons of efficiency, flags indicating language extensions (eg,
+-- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed
+-- integer
+
+glaExtsBit, ffiBit, parrBit :: Int
+glaExtsBit = 0
+ffiBit = 1
+parrBit = 2
+arrowsBit = 4
+thBit = 5
+ipBit = 6
+tvBit = 7 -- Scoped type variables enables 'forall' keyword
+bangPatBit = 8 -- Tells the parser to understand bang-patterns
+ -- (doesn't affect the lexer)
+
+glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool
+glaExtsEnabled flags = testBit flags glaExtsBit
+ffiEnabled flags = testBit flags ffiBit
+parrEnabled flags = testBit flags parrBit
+arrowsEnabled flags = testBit flags arrowsBit
+thEnabled flags = testBit flags thBit
+ipEnabled flags = testBit flags ipBit
+tvEnabled flags = testBit flags tvBit
+bangPatEnabled flags = testBit flags bangPatBit
+
+-- PState for parsing options pragmas
+--
+pragState :: StringBuffer -> SrcLoc -> PState
+pragState buf loc =
+ PState {
+ buffer = buf,
+ last_loc = mkSrcSpan loc loc,
+ last_offs = 0,
+ last_len = 0,
+ loc = loc,
+ extsBitmap = 0,
+ context = [],
+ lex_state = [bol, option_prags, 0]
+ }
+
+
+-- create a parse state
+--
+mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState
+mkPState buf loc flags =
+ PState {
+ buffer = buf,
+ last_loc = mkSrcSpan loc loc,
+ last_offs = 0,
+ last_len = 0,
+ loc = loc,
+ extsBitmap = fromIntegral bitmap,
+ context = [],
+ lex_state = [bol, if glaExtsEnabled bitmap then glaexts else 0]
+ -- we begin in the layout state if toplev_layout is set
+ }
+ where
+ bitmap = glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags
+ .|. ffiBit `setBitIf` dopt Opt_FFI flags
+ .|. parrBit `setBitIf` dopt Opt_PArr flags
+ .|. arrowsBit `setBitIf` dopt Opt_Arrows flags
+ .|. thBit `setBitIf` dopt Opt_TH flags
+ .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags
+ .|. tvBit `setBitIf` dopt Opt_ScopedTypeVariables flags
+ .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags
+ --
+ setBitIf :: Int -> Bool -> Int
+ b `setBitIf` cond | cond = bit b
+ | otherwise = 0
+
+getContext :: P [LayoutContext]
+getContext = P $ \s@PState{context=ctx} -> POk s ctx
+
+setContext :: [LayoutContext] -> P ()
+setContext ctx = P $ \s -> POk s{context=ctx} ()
+
+popContext :: P ()
+popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
+ loc = loc, last_len = len, last_loc = last_loc }) ->
+ case ctx of
+ (_:tl) -> POk s{ context = tl } ()
+ [] -> PFailed last_loc (srcParseErr buf len)
+
+-- Push a new layout context at the indentation of the last token read.
+-- This is only used at the outer level of a module when the 'module'
+-- keyword is missing.
+pushCurrentContext :: P ()
+pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_len=len, context=ctx } ->
+ POk s{context = Layout (offs-len) : ctx} ()
+
+getOffside :: P Ordering
+getOffside = P $ \s@PState{last_offs=offs, context=stk} ->
+ let ord = case stk of
+ (Layout n:_) -> compare offs n
+ _ -> GT
+ in POk s ord
+
+-- ---------------------------------------------------------------------------
+-- Construct a parse error
+
+srcParseErr
+ :: StringBuffer -- current buffer (placed just after the last token)
+ -> Int -- length of the previous token
+ -> Message
+srcParseErr buf len
+ = hcat [ if null token
+ then ptext SLIT("parse error (possibly incorrect indentation)")
+ else hcat [ptext SLIT("parse error on input "),
+ char '`', text token, char '\'']
+ ]
+ where token = lexemeToString (offsetBytes (-len) buf) len
+
+-- Report a parse failure, giving the span of the previous token as
+-- the location of the error. This is the entry point for errors
+-- detected during parsing.
+srcParseFail :: P a
+srcParseFail = P $ \PState{ buffer = buf, last_len = len,
+ last_loc = last_loc } ->
+ PFailed last_loc (srcParseErr buf len)
+
+-- A lexical error is reported at a particular position in the source file,
+-- not over a token range.
+lexError :: String -> P a
+lexError str = do
+ loc <- getSrcLoc
+ i@(AI end _ buf) <- getInput
+ reportLexError loc end buf str
+
+-- -----------------------------------------------------------------------------
+-- This is the top-level function: called from the parser each time a
+-- new token is to be read from the input.
+
+lexer :: (Located Token -> P a) -> P a
+lexer cont = do
+ tok@(L _ tok__) <- lexToken
+ --trace ("token: " ++ show tok__) $ do
+ cont tok
+
+lexToken :: P (Located Token)
+lexToken = do
+ inp@(AI loc1 _ buf) <- getInput
+ sc <- getLexState
+ exts <- getExts
+ case alexScanUser exts inp sc of
+ AlexEOF -> do let span = mkSrcSpan loc1 loc1
+ setLastToken span 0
+ return (L span ITeof)
+ AlexError (AI loc2 _ buf) -> do
+ reportLexError loc1 loc2 buf "lexical error"
+ AlexSkip inp2 _ -> do
+ setInput inp2
+ lexToken
+ AlexToken inp2@(AI end _ buf2) len t -> do
+ setInput inp2
+ let span = mkSrcSpan loc1 end
+ let bytes = byteDiff buf buf2
+ span `seq` setLastToken span bytes
+ t span buf bytes
+
+-- ToDo: Alex reports the buffer at the start of the erroneous lexeme,
+-- but it would be more informative to report the location where the
+-- error was actually discovered, especially if this is a decoding
+-- error.
+reportLexError loc1 loc2 buf str =
+ let
+ c = fst (nextChar buf)
+ in
+ if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
+ then failLocMsgP loc2 loc2 "UTF-8 decoding error"
+ else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
+}
diff --git a/compiler/parser/Parser.y.pp b/compiler/parser/Parser.y.pp
new file mode 100644
index 0000000000..3066a0f876
--- /dev/null
+++ b/compiler/parser/Parser.y.pp
@@ -0,0 +1,1607 @@
+-- -*-haskell-*-
+-- ---------------------------------------------------------------------------
+-- (c) The University of Glasgow 1997-2003
+---
+-- The GHC grammar.
+--
+-- Author(s): Simon Marlow, Sven Panne 1997, 1998, 1999
+-- ---------------------------------------------------------------------------
+
+{
+module Parser ( parseModule, parseStmt, parseIdentifier, parseType,
+ parseHeader ) where
+
+#define INCLUDE #include
+INCLUDE "HsVersions.h"
+
+import HsSyn
+import RdrHsSyn
+import HscTypes ( IsBootInterface, DeprecTxt )
+import Lexer
+import RdrName
+import TysWiredIn ( unitTyCon, unitDataCon, tupleTyCon, tupleCon, nilDataCon,
+ listTyCon_RDR, parrTyCon_RDR, consDataCon_RDR )
+import Type ( funTyCon )
+import ForeignCall ( Safety(..), CExportSpec(..), CLabelString,
+ CCallConv(..), CCallTarget(..), defaultCCallConv
+ )
+import OccName ( varName, dataName, tcClsName, tvName )
+import DataCon ( DataCon, dataConName )
+import SrcLoc ( Located(..), unLoc, getLoc, noLoc, combineSrcSpans,
+ SrcSpan, combineLocs, srcLocFile,
+ mkSrcLoc, mkSrcSpan )
+import Module
+import StaticFlags ( opt_SccProfilingOn )
+import Type ( Kind, mkArrowKind, liftedTypeKind )
+import BasicTypes ( Boxity(..), Fixity(..), FixityDirection(..), IPName(..),
+ Activation(..), defaultInlineSpec )
+import OrdList
+
+import FastString
+import Maybes ( orElse )
+import Outputable
+import GLAEXTS
+}
+
+{-
+-----------------------------------------------------------------------------
+Conflicts: 36 shift/reduce (1.25)
+
+10 for abiguity in 'if x then y else z + 1' [State 178]
+ (shift parses as 'if x then y else (z + 1)', as per longest-parse rule)
+ 10 because op might be: : - ! * . `x` VARSYM CONSYM QVARSYM QCONSYM
+
+1 for ambiguity in 'if x then y else z :: T' [State 178]
+ (shift parses as 'if x then y else (z :: T)', as per longest-parse rule)
+
+4 for ambiguity in 'if x then y else z -< e' [State 178]
+ (shift parses as 'if x then y else (z -< T)', as per longest-parse rule)
+ There are four such operators: -<, >-, -<<, >>-
+
+
+2 for ambiguity in 'case v of { x :: T -> T ... } ' [States 11, 253]
+ Which of these two is intended?
+ case v of
+ (x::T) -> T -- Rhs is T
+ or
+ case v of
+ (x::T -> T) -> .. -- Rhs is ...
+
+10 for ambiguity in 'e :: a `b` c'. Does this mean [States 11, 253]
+ (e::a) `b` c, or
+ (e :: (a `b` c))
+ As well as `b` we can have !, VARSYM, QCONSYM, and CONSYM, hence 5 cases
+ Same duplication between states 11 and 253 as the previous case
+
+1 for ambiguity in 'let ?x ...' [State 329]
+ the parser can't tell whether the ?x is the lhs of a normal binding or
+ an implicit binding. Fortunately resolving as shift gives it the only
+ sensible meaning, namely the lhs of an implicit binding.
+
+1 for ambiguity in '{-# RULES "name" [ ... #-} [State 382]
+ we don't know whether the '[' starts the activation or not: it
+ might be the start of the declaration with the activation being
+ empty. --SDM 1/4/2002
+
+6 for conflicts between `fdecl' and `fdeclDEPRECATED', [States 393,394]
+ which are resolved correctly, and moreover,
+ should go away when `fdeclDEPRECATED' is removed.
+
+1 for ambiguity in '{-# RULES "name" forall = ... #-}' [State 474]
+ since 'forall' is a valid variable name, we don't know whether
+ to treat a forall on the input as the beginning of a quantifier
+ or the beginning of the rule itself. Resolving to shift means
+ it's always treated as a quantifier, hence the above is disallowed.
+ This saves explicitly defining a grammar for the rule lhs that
+ doesn't include 'forall'.
+
+-- ---------------------------------------------------------------------------
+-- Adding location info
+
+This is done in a stylised way using the three macros below, L0, L1
+and LL. Each of these macros can be thought of as having type
+
+ L0, L1, LL :: a -> Located a
+
+They each add a SrcSpan to their argument.
+
+ L0 adds 'noSrcSpan', used for empty productions
+
+ L1 for a production with a single token on the lhs. Grabs the SrcSpan
+ from that token.
+
+ LL for a production with >1 token on the lhs. Makes up a SrcSpan from
+ the first and last tokens.
+
+These suffice for the majority of cases. However, we must be
+especially careful with empty productions: LL won't work if the first
+or last token on the lhs can represent an empty span. In these cases,
+we have to calculate the span using more of the tokens from the lhs, eg.
+
+ | 'newtype' tycl_hdr '=' newconstr deriving
+ { L (comb3 $1 $4 $5)
+ (mkTyData NewType (unLoc $2) [$4] (unLoc $5)) }
+
+We provide comb3 and comb4 functions which are useful in such cases.
+
+Be careful: there's no checking that you actually got this right, the
+only symptom will be that the SrcSpans of your syntax will be
+incorrect.
+
+/*
+ * We must expand these macros *before* running Happy, which is why this file is
+ * Parser.y.pp rather than just Parser.y - we run the C pre-processor first.
+ */
+#define L0 L noSrcSpan
+#define L1 sL (getLoc $1)
+#define LL sL (comb2 $1 $>)
+
+-- -----------------------------------------------------------------------------
+
+-}
+
+%token
+ '_' { L _ ITunderscore } -- Haskell keywords
+ 'as' { L _ ITas }
+ 'case' { L _ ITcase }
+ 'class' { L _ ITclass }
+ 'data' { L _ ITdata }
+ 'default' { L _ ITdefault }
+ 'deriving' { L _ ITderiving }
+ 'do' { L _ ITdo }
+ 'else' { L _ ITelse }
+ 'hiding' { L _ IThiding }
+ 'if' { L _ ITif }
+ 'import' { L _ ITimport }
+ 'in' { L _ ITin }
+ 'infix' { L _ ITinfix }
+ 'infixl' { L _ ITinfixl }
+ 'infixr' { L _ ITinfixr }
+ 'instance' { L _ ITinstance }
+ 'let' { L _ ITlet }
+ 'module' { L _ ITmodule }
+ 'newtype' { L _ ITnewtype }
+ 'of' { L _ ITof }
+ 'qualified' { L _ ITqualified }
+ 'then' { L _ ITthen }
+ 'type' { L _ ITtype }
+ 'where' { L _ ITwhere }
+ '_scc_' { L _ ITscc } -- ToDo: remove
+
+ 'forall' { L _ ITforall } -- GHC extension keywords
+ 'foreign' { L _ ITforeign }
+ 'export' { L _ ITexport }
+ 'label' { L _ ITlabel }
+ 'dynamic' { L _ ITdynamic }
+ 'safe' { L _ ITsafe }
+ 'threadsafe' { L _ ITthreadsafe }
+ 'unsafe' { L _ ITunsafe }
+ 'mdo' { L _ ITmdo }
+ 'stdcall' { L _ ITstdcallconv }
+ 'ccall' { L _ ITccallconv }
+ 'dotnet' { L _ ITdotnet }
+ 'proc' { L _ ITproc } -- for arrow notation extension
+ 'rec' { L _ ITrec } -- for arrow notation extension
+
+ '{-# INLINE' { L _ (ITinline_prag _) }
+ '{-# SPECIALISE' { L _ ITspec_prag }
+ '{-# SPECIALISE_INLINE' { L _ (ITspec_inline_prag _) }
+ '{-# SOURCE' { L _ ITsource_prag }
+ '{-# RULES' { L _ ITrules_prag }
+ '{-# CORE' { L _ ITcore_prag } -- hdaume: annotated core
+ '{-# SCC' { L _ ITscc_prag }
+ '{-# DEPRECATED' { L _ ITdeprecated_prag }
+ '{-# UNPACK' { L _ ITunpack_prag }
+ '#-}' { L _ ITclose_prag }
+
+ '..' { L _ ITdotdot } -- reserved symbols
+ ':' { L _ ITcolon }
+ '::' { L _ ITdcolon }
+ '=' { L _ ITequal }
+ '\\' { L _ ITlam }
+ '|' { L _ ITvbar }
+ '<-' { L _ ITlarrow }
+ '->' { L _ ITrarrow }
+ '@' { L _ ITat }
+ '~' { L _ ITtilde }
+ '=>' { L _ ITdarrow }
+ '-' { L _ ITminus }
+ '!' { L _ ITbang }
+ '*' { L _ ITstar }
+ '-<' { L _ ITlarrowtail } -- for arrow notation
+ '>-' { L _ ITrarrowtail } -- for arrow notation
+ '-<<' { L _ ITLarrowtail } -- for arrow notation
+ '>>-' { L _ ITRarrowtail } -- for arrow notation
+ '.' { L _ ITdot }
+
+ '{' { L _ ITocurly } -- special symbols
+ '}' { L _ ITccurly }
+ '{|' { L _ ITocurlybar }
+ '|}' { L _ ITccurlybar }
+ vocurly { L _ ITvocurly } -- virtual open curly (from layout)
+ vccurly { L _ ITvccurly } -- virtual close curly (from layout)
+ '[' { L _ ITobrack }
+ ']' { L _ ITcbrack }
+ '[:' { L _ ITopabrack }
+ ':]' { L _ ITcpabrack }
+ '(' { L _ IToparen }
+ ')' { L _ ITcparen }
+ '(#' { L _ IToubxparen }
+ '#)' { L _ ITcubxparen }
+ '(|' { L _ IToparenbar }
+ '|)' { L _ ITcparenbar }
+ ';' { L _ ITsemi }
+ ',' { L _ ITcomma }
+ '`' { L _ ITbackquote }
+
+ VARID { L _ (ITvarid _) } -- identifiers
+ CONID { L _ (ITconid _) }
+ VARSYM { L _ (ITvarsym _) }
+ CONSYM { L _ (ITconsym _) }
+ QVARID { L _ (ITqvarid _) }
+ QCONID { L _ (ITqconid _) }
+ QVARSYM { L _ (ITqvarsym _) }
+ QCONSYM { L _ (ITqconsym _) }
+
+ IPDUPVARID { L _ (ITdupipvarid _) } -- GHC extension
+ IPSPLITVARID { L _ (ITsplitipvarid _) } -- GHC extension
+
+ CHAR { L _ (ITchar _) }
+ STRING { L _ (ITstring _) }
+ INTEGER { L _ (ITinteger _) }
+ RATIONAL { L _ (ITrational _) }
+
+ PRIMCHAR { L _ (ITprimchar _) }
+ PRIMSTRING { L _ (ITprimstring _) }
+ PRIMINTEGER { L _ (ITprimint _) }
+ PRIMFLOAT { L _ (ITprimfloat _) }
+ PRIMDOUBLE { L _ (ITprimdouble _) }
+
+-- Template Haskell
+'[|' { L _ ITopenExpQuote }
+'[p|' { L _ ITopenPatQuote }
+'[t|' { L _ ITopenTypQuote }
+'[d|' { L _ ITopenDecQuote }
+'|]' { L _ ITcloseQuote }
+TH_ID_SPLICE { L _ (ITidEscape _) } -- $x
+'$(' { L _ ITparenEscape } -- $( exp )
+TH_VAR_QUOTE { L _ ITvarQuote } -- 'x
+TH_TY_QUOTE { L _ ITtyQuote } -- ''T
+
+%monad { P } { >>= } { return }
+%lexer { lexer } { L _ ITeof }
+%name parseModule module
+%name parseStmt maybe_stmt
+%name parseIdentifier identifier
+%name parseType ctype
+%partial parseHeader header
+%tokentype { (Located Token) }
+%%
+
+-----------------------------------------------------------------------------
+-- Identifiers; one of the entry points
+identifier :: { Located RdrName }
+ : qvar { $1 }
+ | qcon { $1 }
+ | qvarop { $1 }
+ | qconop { $1 }
+
+-----------------------------------------------------------------------------
+-- Module Header
+
+-- The place for module deprecation is really too restrictive, but if it
+-- was allowed at its natural place just before 'module', we get an ugly
+-- s/r conflict with the second alternative. Another solution would be the
+-- introduction of a new pragma DEPRECATED_MODULE, but this is not very nice,
+-- either, and DEPRECATED is only expected to be used by people who really
+-- know what they are doing. :-)
+
+module :: { Located (HsModule RdrName) }
+ : 'module' modid maybemoddeprec maybeexports 'where' body
+ {% fileSrcSpan >>= \ loc ->
+ return (L loc (HsModule (Just $2) $4 (fst $6) (snd $6) $3)) }
+ | missing_module_keyword top close
+ {% fileSrcSpan >>= \ loc ->
+ return (L loc (HsModule Nothing Nothing
+ (fst $2) (snd $2) Nothing)) }
+
+missing_module_keyword :: { () }
+ : {- empty -} {% pushCurrentContext }
+
+maybemoddeprec :: { Maybe DeprecTxt }
+ : '{-# DEPRECATED' STRING '#-}' { Just (getSTRING $2) }
+ | {- empty -} { Nothing }
+
+body :: { ([LImportDecl RdrName], [LHsDecl RdrName]) }
+ : '{' top '}' { $2 }
+ | vocurly top close { $2 }
+
+top :: { ([LImportDecl RdrName], [LHsDecl RdrName]) }
+ : importdecls { (reverse $1,[]) }
+ | importdecls ';' cvtopdecls { (reverse $1,$3) }
+ | cvtopdecls { ([],$1) }
+
+cvtopdecls :: { [LHsDecl RdrName] }
+ : topdecls { cvTopDecls $1 }
+
+-----------------------------------------------------------------------------
+-- Module declaration & imports only
+
+header :: { Located (HsModule RdrName) }
+ : 'module' modid maybemoddeprec maybeexports 'where' header_body
+ {% fileSrcSpan >>= \ loc ->
+ return (L loc (HsModule (Just $2) $4 $6 [] $3)) }
+ | missing_module_keyword importdecls
+ {% fileSrcSpan >>= \ loc ->
+ return (L loc (HsModule Nothing Nothing $2 [] Nothing)) }
+
+header_body :: { [LImportDecl RdrName] }
+ : '{' importdecls { $2 }
+ | vocurly importdecls { $2 }
+
+-----------------------------------------------------------------------------
+-- The Export List
+
+maybeexports :: { Maybe [LIE RdrName] }
+ : '(' exportlist ')' { Just $2 }
+ | {- empty -} { Nothing }
+
+exportlist :: { [LIE RdrName] }
+ : exportlist ',' export { $3 : $1 }
+ | exportlist ',' { $1 }
+ | export { [$1] }
+ | {- empty -} { [] }
+
+ -- No longer allow things like [] and (,,,) to be exported
+ -- They are built in syntax, always available
+export :: { LIE RdrName }
+ : qvar { L1 (IEVar (unLoc $1)) }
+ | oqtycon { L1 (IEThingAbs (unLoc $1)) }
+ | oqtycon '(' '..' ')' { LL (IEThingAll (unLoc $1)) }
+ | oqtycon '(' ')' { LL (IEThingWith (unLoc $1) []) }
+ | oqtycon '(' qcnames ')' { LL (IEThingWith (unLoc $1) (reverse $3)) }
+ | 'module' modid { LL (IEModuleContents (unLoc $2)) }
+
+qcnames :: { [RdrName] }
+ : qcnames ',' qcname { unLoc $3 : $1 }
+ | qcname { [unLoc $1] }
+
+qcname :: { Located RdrName } -- Variable or data constructor
+ : qvar { $1 }
+ | qcon { $1 }
+
+-----------------------------------------------------------------------------
+-- Import Declarations
+
+-- import decls can be *empty*, or even just a string of semicolons
+-- whereas topdecls must contain at least one topdecl.
+
+importdecls :: { [LImportDecl RdrName] }
+ : importdecls ';' importdecl { $3 : $1 }
+ | importdecls ';' { $1 }
+ | importdecl { [ $1 ] }
+ | {- empty -} { [] }
+
+importdecl :: { LImportDecl RdrName }
+ : 'import' maybe_src optqualified modid maybeas maybeimpspec
+ { L (comb4 $1 $4 $5 $6) (ImportDecl $4 $2 $3 (unLoc $5) (unLoc $6)) }
+
+maybe_src :: { IsBootInterface }
+ : '{-# SOURCE' '#-}' { True }
+ | {- empty -} { False }
+
+optqualified :: { Bool }
+ : 'qualified' { True }
+ | {- empty -} { False }
+
+maybeas :: { Located (Maybe Module) }
+ : 'as' modid { LL (Just (unLoc $2)) }
+ | {- empty -} { noLoc Nothing }
+
+maybeimpspec :: { Located (Maybe (Bool, [LIE RdrName])) }
+ : impspec { L1 (Just (unLoc $1)) }
+ | {- empty -} { noLoc Nothing }
+
+impspec :: { Located (Bool, [LIE RdrName]) }
+ : '(' exportlist ')' { LL (False, reverse $2) }
+ | 'hiding' '(' exportlist ')' { LL (True, reverse $3) }
+
+-----------------------------------------------------------------------------
+-- Fixity Declarations
+
+prec :: { Int }
+ : {- empty -} { 9 }
+ | INTEGER {% checkPrecP (L1 (fromInteger (getINTEGER $1))) }
+
+infix :: { Located FixityDirection }
+ : 'infix' { L1 InfixN }
+ | 'infixl' { L1 InfixL }
+ | 'infixr' { L1 InfixR }
+
+ops :: { Located [Located RdrName] }
+ : ops ',' op { LL ($3 : unLoc $1) }
+ | op { L1 [$1] }
+
+-----------------------------------------------------------------------------
+-- Top-Level Declarations
+
+topdecls :: { OrdList (LHsDecl RdrName) } -- Reversed
+ : topdecls ';' topdecl { $1 `appOL` $3 }
+ | topdecls ';' { $1 }
+ | topdecl { $1 }
+
+topdecl :: { OrdList (LHsDecl RdrName) }
+ : tycl_decl { unitOL (L1 (TyClD (unLoc $1))) }
+ | 'instance' inst_type where
+ { let (binds,sigs) = cvBindsAndSigs (unLoc $3)
+ in unitOL (L (comb3 $1 $2 $3) (InstD (InstDecl $2 binds sigs))) }
+ | 'default' '(' comma_types0 ')' { unitOL (LL $ DefD (DefaultDecl $3)) }
+ | 'foreign' fdecl { unitOL (LL (unLoc $2)) }
+ | '{-# DEPRECATED' deprecations '#-}' { $2 }
+ | '{-# RULES' rules '#-}' { $2 }
+ | '$(' exp ')' { unitOL (LL $ SpliceD (SpliceDecl $2)) }
+ | decl { unLoc $1 }
+
+tycl_decl :: { LTyClDecl RdrName }
+ : 'type' type '=' ctype
+ -- Note type on the left of the '='; this allows
+ -- infix type constructors to be declared
+ --
+ -- Note ctype, not sigtype, on the right
+ -- We allow an explicit for-all but we don't insert one
+ -- in type Foo a = (b,b)
+ -- Instead we just say b is out of scope
+ {% do { (tc,tvs) <- checkSynHdr $2
+ ; return (LL (TySynonym tc tvs $4)) } }
+
+ | data_or_newtype tycl_hdr constrs deriving
+ { L (comb4 $1 $2 $3 $4) -- We need the location on tycl_hdr
+ -- in case constrs and deriving are both empty
+ (mkTyData (unLoc $1) (unLoc $2) Nothing (reverse (unLoc $3)) (unLoc $4)) }
+
+ | data_or_newtype tycl_hdr opt_kind_sig
+ 'where' gadt_constrlist
+ deriving
+ { L (comb4 $1 $2 $4 $5)
+ (mkTyData (unLoc $1) (unLoc $2) $3 (reverse (unLoc $5)) (unLoc $6)) }
+
+ | 'class' tycl_hdr fds where
+ { let
+ (binds,sigs) = cvBindsAndSigs (unLoc $4)
+ in
+ L (comb4 $1 $2 $3 $4) (mkClassDecl (unLoc $2) (unLoc $3) sigs
+ binds) }
+
+data_or_newtype :: { Located NewOrData }
+ : 'data' { L1 DataType }
+ | 'newtype' { L1 NewType }
+
+opt_kind_sig :: { Maybe Kind }
+ : { Nothing }
+ | '::' kind { Just $2 }
+
+-- tycl_hdr parses the header of a type or class decl,
+-- which takes the form
+-- T a b
+-- Eq a => T a
+-- (Eq a, Ord b) => T a b
+-- Rather a lot of inlining here, else we get reduce/reduce errors
+tycl_hdr :: { Located (LHsContext RdrName, Located RdrName, [LHsTyVarBndr RdrName]) }
+ : context '=>' type {% checkTyClHdr $1 $3 >>= return.LL }
+ | type {% checkTyClHdr (noLoc []) $1 >>= return.L1 }
+
+-----------------------------------------------------------------------------
+-- Nested declarations
+
+decls :: { Located (OrdList (LHsDecl RdrName)) } -- Reversed
+ : decls ';' decl { LL (unLoc $1 `appOL` unLoc $3) }
+ | decls ';' { LL (unLoc $1) }
+ | decl { $1 }
+ | {- empty -} { noLoc nilOL }
+
+
+decllist :: { Located (OrdList (LHsDecl RdrName)) } -- Reversed
+ : '{' decls '}' { LL (unLoc $2) }
+ | vocurly decls close { $2 }
+
+where :: { Located (OrdList (LHsDecl RdrName)) } -- Reversed
+ -- No implicit parameters
+ : 'where' decllist { LL (unLoc $2) }
+ | {- empty -} { noLoc nilOL }
+
+binds :: { Located (HsLocalBinds RdrName) } -- May have implicit parameters
+ : decllist { L1 (HsValBinds (cvBindGroup (unLoc $1))) }
+ | '{' dbinds '}' { LL (HsIPBinds (IPBinds (unLoc $2) emptyLHsBinds)) }
+ | vocurly dbinds close { L (getLoc $2) (HsIPBinds (IPBinds (unLoc $2) emptyLHsBinds)) }
+
+wherebinds :: { Located (HsLocalBinds RdrName) } -- May have implicit parameters
+ : 'where' binds { LL (unLoc $2) }
+ | {- empty -} { noLoc emptyLocalBinds }
+
+
+-----------------------------------------------------------------------------
+-- Transformation Rules
+
+rules :: { OrdList (LHsDecl RdrName) } -- Reversed
+ : rules ';' rule { $1 `snocOL` $3 }
+ | rules ';' { $1 }
+ | rule { unitOL $1 }
+ | {- empty -} { nilOL }
+
+rule :: { LHsDecl RdrName }
+ : STRING activation rule_forall infixexp '=' exp
+ { LL $ RuleD (HsRule (getSTRING $1)
+ ($2 `orElse` AlwaysActive)
+ $3 $4 placeHolderNames $6 placeHolderNames) }
+
+activation :: { Maybe Activation }
+ : {- empty -} { Nothing }
+ | explicit_activation { Just $1 }
+
+explicit_activation :: { Activation } -- In brackets
+ : '[' INTEGER ']' { ActiveAfter (fromInteger (getINTEGER $2)) }
+ | '[' '~' INTEGER ']' { ActiveBefore (fromInteger (getINTEGER $3)) }
+
+rule_forall :: { [RuleBndr RdrName] }
+ : 'forall' rule_var_list '.' { $2 }
+ | {- empty -} { [] }
+
+rule_var_list :: { [RuleBndr RdrName] }
+ : rule_var { [$1] }
+ | rule_var rule_var_list { $1 : $2 }
+
+rule_var :: { RuleBndr RdrName }
+ : varid { RuleBndr $1 }
+ | '(' varid '::' ctype ')' { RuleBndrSig $2 $4 }
+
+-----------------------------------------------------------------------------
+-- Deprecations (c.f. rules)
+
+deprecations :: { OrdList (LHsDecl RdrName) } -- Reversed
+ : deprecations ';' deprecation { $1 `appOL` $3 }
+ | deprecations ';' { $1 }
+ | deprecation { $1 }
+ | {- empty -} { nilOL }
+
+-- SUP: TEMPORARY HACK, not checking for `module Foo'
+deprecation :: { OrdList (LHsDecl RdrName) }
+ : depreclist STRING
+ { toOL [ LL $ DeprecD (Deprecation n (getSTRING $2))
+ | n <- unLoc $1 ] }
+
+
+-----------------------------------------------------------------------------
+-- Foreign import and export declarations
+
+-- for the time being, the following accepts foreign declarations conforming
+-- to the FFI Addendum, Version 1.0 as well as pre-standard declarations
+--
+-- * a flag indicates whether pre-standard declarations have been used and
+-- triggers a deprecation warning further down the road
+--
+-- NB: The first two rules could be combined into one by replacing `safety1'
+-- with `safety'. However, the combined rule conflicts with the
+-- DEPRECATED rules.
+--
+fdecl :: { LHsDecl RdrName }
+fdecl : 'import' callconv safety1 fspec
+ {% mkImport $2 $3 (unLoc $4) >>= return.LL }
+ | 'import' callconv fspec
+ {% do { d <- mkImport $2 (PlaySafe False) (unLoc $3);
+ return (LL d) } }
+ | 'export' callconv fspec
+ {% mkExport $2 (unLoc $3) >>= return.LL }
+ -- the following syntax is DEPRECATED
+ | fdecl1DEPRECATED { L1 (ForD (unLoc $1)) }
+ | fdecl2DEPRECATED { L1 (unLoc $1) }
+
+fdecl1DEPRECATED :: { LForeignDecl RdrName }
+fdecl1DEPRECATED
+ ----------- DEPRECATED label decls ------------
+ : 'label' ext_name varid '::' sigtype
+ { LL $ ForeignImport $3 $5 (CImport defaultCCallConv (PlaySafe False) nilFS nilFS
+ (CLabel ($2 `orElse` mkExtName (unLoc $3)))) True }
+
+ ----------- DEPRECATED ccall/stdcall decls ------------
+ --
+ -- NB: This business with the case expression below may seem overly
+ -- complicated, but it is necessary to avoid some conflicts.
+
+ -- DEPRECATED variant #1: lack of a calling convention specification
+ -- (import)
+ | 'import' {-no callconv-} ext_name safety varid_no_unsafe '::' sigtype
+ { let
+ target = StaticTarget ($2 `orElse` mkExtName (unLoc $4))
+ in
+ LL $ ForeignImport $4 $6 (CImport defaultCCallConv $3 nilFS nilFS
+ (CFunction target)) True }
+
+ -- DEPRECATED variant #2: external name consists of two separate strings
+ -- (module name and function name) (import)
+ | 'import' callconv STRING STRING safety varid_no_unsafe '::' sigtype
+ {% case $2 of
+ DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import"
+ CCall cconv -> return $
+ let
+ imp = CFunction (StaticTarget (getSTRING $4))
+ in
+ LL $ ForeignImport $6 $8 (CImport cconv $5 nilFS nilFS imp) True }
+
+ -- DEPRECATED variant #3: `unsafe' after entity
+ | 'import' callconv STRING 'unsafe' varid_no_unsafe '::' sigtype
+ {% case $2 of
+ DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import"
+ CCall cconv -> return $
+ let
+ imp = CFunction (StaticTarget (getSTRING $3))
+ in
+ LL $ ForeignImport $5 $7 (CImport cconv PlayRisky nilFS nilFS imp) True }
+
+ -- DEPRECATED variant #4: use of the special identifier `dynamic' without
+ -- an explicit calling convention (import)
+ | 'import' {-no callconv-} 'dynamic' safety varid_no_unsafe '::' sigtype
+ { LL $ ForeignImport $4 $6 (CImport defaultCCallConv $3 nilFS nilFS
+ (CFunction DynamicTarget)) True }
+
+ -- DEPRECATED variant #5: use of the special identifier `dynamic' (import)
+ | 'import' callconv 'dynamic' safety varid_no_unsafe '::' sigtype
+ {% case $2 of
+ DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import"
+ CCall cconv -> return $
+ LL $ ForeignImport $5 $7 (CImport cconv $4 nilFS nilFS
+ (CFunction DynamicTarget)) True }
+
+ -- DEPRECATED variant #6: lack of a calling convention specification
+ -- (export)
+ | 'export' {-no callconv-} ext_name varid '::' sigtype
+ { LL $ ForeignExport $3 $5 (CExport (CExportStatic ($2 `orElse` mkExtName (unLoc $3))
+ defaultCCallConv)) True }
+
+ -- DEPRECATED variant #7: external name consists of two separate strings
+ -- (module name and function name) (export)
+ | 'export' callconv STRING STRING varid '::' sigtype
+ {% case $2 of
+ DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import"
+ CCall cconv -> return $
+ LL $ ForeignExport $5 $7
+ (CExport (CExportStatic (getSTRING $4) cconv)) True }
+
+ -- DEPRECATED variant #8: use of the special identifier `dynamic' without
+ -- an explicit calling convention (export)
+ | 'export' {-no callconv-} 'dynamic' varid '::' sigtype
+ { LL $ ForeignImport $3 $5 (CImport defaultCCallConv (PlaySafe False) nilFS nilFS
+ CWrapper) True }
+
+ -- DEPRECATED variant #9: use of the special identifier `dynamic' (export)
+ | 'export' callconv 'dynamic' varid '::' sigtype
+ {% case $2 of
+ DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import"
+ CCall cconv -> return $
+ LL $ ForeignImport $4 $6
+ (CImport cconv (PlaySafe False) nilFS nilFS CWrapper) True }
+
+ ----------- DEPRECATED .NET decls ------------
+ -- NB: removed the .NET call declaration, as it is entirely subsumed
+ -- by the new standard FFI declarations
+
+fdecl2DEPRECATED :: { LHsDecl RdrName }
+fdecl2DEPRECATED
+ : 'import' 'dotnet' 'type' ext_name tycon { LL $ TyClD (ForeignType $5 $4 DNType) }
+ -- left this one unchanged for the moment as type imports are not
+ -- covered currently by the FFI standard -=chak
+
+
+callconv :: { CallConv }
+ : 'stdcall' { CCall StdCallConv }
+ | 'ccall' { CCall CCallConv }
+ | 'dotnet' { DNCall }
+
+safety :: { Safety }
+ : 'unsafe' { PlayRisky }
+ | 'safe' { PlaySafe False }
+ | 'threadsafe' { PlaySafe True }
+ | {- empty -} { PlaySafe False }
+
+safety1 :: { Safety }
+ : 'unsafe' { PlayRisky }
+ | 'safe' { PlaySafe False }
+ | 'threadsafe' { PlaySafe True }
+ -- only needed to avoid conflicts with the DEPRECATED rules
+
+fspec :: { Located (Located FastString, Located RdrName, LHsType RdrName) }
+ : STRING var '::' sigtype { LL (L (getLoc $1) (getSTRING $1), $2, $4) }
+ | var '::' sigtype { LL (noLoc nilFS, $1, $3) }
+ -- if the entity string is missing, it defaults to the empty string;
+ -- the meaning of an empty entity string depends on the calling
+ -- convention
+
+-- DEPRECATED syntax
+ext_name :: { Maybe CLabelString }
+ : STRING { Just (getSTRING $1) }
+ | STRING STRING { Just (getSTRING $2) } -- Ignore "module name" for now
+ | {- empty -} { Nothing }
+
+
+-----------------------------------------------------------------------------
+-- Type signatures
+
+opt_sig :: { Maybe (LHsType RdrName) }
+ : {- empty -} { Nothing }
+ | '::' sigtype { Just $2 }
+
+opt_asig :: { Maybe (LHsType RdrName) }
+ : {- empty -} { Nothing }
+ | '::' atype { Just $2 }
+
+sigtypes1 :: { [LHsType RdrName] }
+ : sigtype { [ $1 ] }
+ | sigtype ',' sigtypes1 { $1 : $3 }
+
+sigtype :: { LHsType RdrName }
+ : ctype { L1 (mkImplicitHsForAllTy (noLoc []) $1) }
+ -- Wrap an Implicit forall if there isn't one there already
+
+sig_vars :: { Located [Located RdrName] }
+ : sig_vars ',' var { LL ($3 : unLoc $1) }
+ | var { L1 [$1] }
+
+-----------------------------------------------------------------------------
+-- Types
+
+strict_mark :: { Located HsBang }
+ : '!' { L1 HsStrict }
+ | '{-# UNPACK' '#-}' '!' { LL HsUnbox }
+
+-- A ctype is a for-all type
+ctype :: { LHsType RdrName }
+ : 'forall' tv_bndrs '.' ctype { LL $ mkExplicitHsForAllTy $2 (noLoc []) $4 }
+ | context '=>' type { LL $ mkImplicitHsForAllTy $1 $3 }
+ -- A type of form (context => type) is an *implicit* HsForAllTy
+ | type { $1 }
+
+-- We parse a context as a btype so that we don't get reduce/reduce
+-- errors in ctype. The basic problem is that
+-- (Eq a, Ord a)
+-- looks so much like a tuple type. We can't tell until we find the =>
+context :: { LHsContext RdrName }
+ : btype {% checkContext $1 }
+
+type :: { LHsType RdrName }
+ : ipvar '::' gentype { LL (HsPredTy (HsIParam (unLoc $1) $3)) }
+ | gentype { $1 }
+
+gentype :: { LHsType RdrName }
+ : btype { $1 }
+ | btype qtyconop gentype { LL $ HsOpTy $1 $2 $3 }
+ | btype tyvarop gentype { LL $ HsOpTy $1 $2 $3 }
+ | btype '->' ctype { LL $ HsFunTy $1 $3 }
+
+btype :: { LHsType RdrName }
+ : btype atype { LL $ HsAppTy $1 $2 }
+ | atype { $1 }
+
+atype :: { LHsType RdrName }
+ : gtycon { L1 (HsTyVar (unLoc $1)) }
+ | tyvar { L1 (HsTyVar (unLoc $1)) }
+ | strict_mark atype { LL (HsBangTy (unLoc $1) $2) }
+ | '(' ctype ',' comma_types1 ')' { LL $ HsTupleTy Boxed ($2:$4) }
+ | '(#' comma_types1 '#)' { LL $ HsTupleTy Unboxed $2 }
+ | '[' ctype ']' { LL $ HsListTy $2 }
+ | '[:' ctype ':]' { LL $ HsPArrTy $2 }
+ | '(' ctype ')' { LL $ HsParTy $2 }
+ | '(' ctype '::' kind ')' { LL $ HsKindSig $2 $4 }
+-- Generics
+ | INTEGER { L1 (HsNumTy (getINTEGER $1)) }
+
+-- An inst_type is what occurs in the head of an instance decl
+-- e.g. (Foo a, Gaz b) => Wibble a b
+-- It's kept as a single type, with a MonoDictTy at the right
+-- hand corner, for convenience.
+inst_type :: { LHsType RdrName }
+ : sigtype {% checkInstType $1 }
+
+inst_types1 :: { [LHsType RdrName] }
+ : inst_type { [$1] }
+ | inst_type ',' inst_types1 { $1 : $3 }
+
+comma_types0 :: { [LHsType RdrName] }
+ : comma_types1 { $1 }
+ | {- empty -} { [] }
+
+comma_types1 :: { [LHsType RdrName] }
+ : ctype { [$1] }
+ | ctype ',' comma_types1 { $1 : $3 }
+
+tv_bndrs :: { [LHsTyVarBndr RdrName] }
+ : tv_bndr tv_bndrs { $1 : $2 }
+ | {- empty -} { [] }
+
+tv_bndr :: { LHsTyVarBndr RdrName }
+ : tyvar { L1 (UserTyVar (unLoc $1)) }
+ | '(' tyvar '::' kind ')' { LL (KindedTyVar (unLoc $2) $4) }
+
+fds :: { Located [Located ([RdrName], [RdrName])] }
+ : {- empty -} { noLoc [] }
+ | '|' fds1 { LL (reverse (unLoc $2)) }
+
+fds1 :: { Located [Located ([RdrName], [RdrName])] }
+ : fds1 ',' fd { LL ($3 : unLoc $1) }
+ | fd { L1 [$1] }
+
+fd :: { Located ([RdrName], [RdrName]) }
+ : varids0 '->' varids0 { L (comb3 $1 $2 $3)
+ (reverse (unLoc $1), reverse (unLoc $3)) }
+
+varids0 :: { Located [RdrName] }
+ : {- empty -} { noLoc [] }
+ | varids0 tyvar { LL (unLoc $2 : unLoc $1) }
+
+-----------------------------------------------------------------------------
+-- Kinds
+
+kind :: { Kind }
+ : akind { $1 }
+ | akind '->' kind { mkArrowKind $1 $3 }
+
+akind :: { Kind }
+ : '*' { liftedTypeKind }
+ | '(' kind ')' { $2 }
+
+
+-----------------------------------------------------------------------------
+-- Datatype declarations
+
+gadt_constrlist :: { Located [LConDecl RdrName] }
+ : '{' gadt_constrs '}' { LL (unLoc $2) }
+ | vocurly gadt_constrs close { $2 }
+
+gadt_constrs :: { Located [LConDecl RdrName] }
+ : gadt_constrs ';' gadt_constr { LL ($3 : unLoc $1) }
+ | gadt_constrs ';' { $1 }
+ | gadt_constr { L1 [$1] }
+
+-- We allow the following forms:
+-- C :: Eq a => a -> T a
+-- C :: forall a. Eq a => !a -> T a
+-- D { x,y :: a } :: T a
+-- forall a. Eq a => D { x,y :: a } :: T a
+
+gadt_constr :: { LConDecl RdrName }
+ : con '::' sigtype
+ { LL (mkGadtDecl $1 $3) }
+ -- Syntax: Maybe merge the record stuff with the single-case above?
+ -- (to kill the mostly harmless reduce/reduce error)
+ -- XXX revisit autrijus
+ | constr_stuff_record '::' sigtype
+ { let (con,details) = unLoc $1 in
+ LL (ConDecl con Implicit [] (noLoc []) details (ResTyGADT $3)) }
+{-
+ | forall context '=>' constr_stuff_record '::' sigtype
+ { let (con,details) = unLoc $4 in
+ LL (ConDecl con Implicit (unLoc $1) $2 details (ResTyGADT $6)) }
+ | forall constr_stuff_record '::' sigtype
+ { let (con,details) = unLoc $2 in
+ LL (ConDecl con Implicit (unLoc $1) (noLoc []) details (ResTyGADT $4)) }
+-}
+
+
+constrs :: { Located [LConDecl RdrName] }
+ : {- empty; a GHC extension -} { noLoc [] }
+ | '=' constrs1 { LL (unLoc $2) }
+
+constrs1 :: { Located [LConDecl RdrName] }
+ : constrs1 '|' constr { LL ($3 : unLoc $1) }
+ | constr { L1 [$1] }
+
+constr :: { LConDecl RdrName }
+ : forall context '=>' constr_stuff
+ { let (con,details) = unLoc $4 in
+ LL (ConDecl con Explicit (unLoc $1) $2 details ResTyH98) }
+ | forall constr_stuff
+ { let (con,details) = unLoc $2 in
+ LL (ConDecl con Explicit (unLoc $1) (noLoc []) details ResTyH98) }
+
+forall :: { Located [LHsTyVarBndr RdrName] }
+ : 'forall' tv_bndrs '.' { LL $2 }
+ | {- empty -} { noLoc [] }
+
+constr_stuff :: { Located (Located RdrName, HsConDetails RdrName (LBangType RdrName)) }
+-- We parse the constructor declaration
+-- C t1 t2
+-- as a btype (treating C as a type constructor) and then convert C to be
+-- a data constructor. Reason: it might continue like this:
+-- C t1 t2 %: D Int
+-- in which case C really would be a type constructor. We can't resolve this
+-- ambiguity till we come across the constructor oprerator :% (or not, more usually)
+ : btype {% mkPrefixCon $1 [] >>= return.LL }
+ | oqtycon '{' '}' {% mkRecCon $1 [] >>= return.LL }
+ | oqtycon '{' fielddecls '}' {% mkRecCon $1 $3 >>= return.LL }
+ | btype conop btype { LL ($2, InfixCon $1 $3) }
+
+constr_stuff_record :: { Located (Located RdrName, HsConDetails RdrName (LBangType RdrName)) }
+ : oqtycon '{' '}' {% mkRecCon $1 [] >>= return.sL (comb2 $1 $>) }
+ | oqtycon '{' fielddecls '}' {% mkRecCon $1 $3 >>= return.sL (comb2 $1 $>) }
+
+fielddecls :: { [([Located RdrName], LBangType RdrName)] }
+ : fielddecl ',' fielddecls { unLoc $1 : $3 }
+ | fielddecl { [unLoc $1] }
+
+fielddecl :: { Located ([Located RdrName], LBangType RdrName) }
+ : sig_vars '::' ctype { LL (reverse (unLoc $1), $3) }
+
+-- We allow the odd-looking 'inst_type' in a deriving clause, so that
+-- we can do deriving( forall a. C [a] ) in a newtype (GHC extension).
+-- The 'C [a]' part is converted to an HsPredTy by checkInstType
+-- We don't allow a context, but that's sorted out by the type checker.
+deriving :: { Located (Maybe [LHsType RdrName]) }
+ : {- empty -} { noLoc Nothing }
+ | 'deriving' qtycon {% do { let { L loc tv = $2 }
+ ; p <- checkInstType (L loc (HsTyVar tv))
+ ; return (LL (Just [p])) } }
+ | 'deriving' '(' ')' { LL (Just []) }
+ | 'deriving' '(' inst_types1 ')' { LL (Just $3) }
+ -- Glasgow extension: allow partial
+ -- applications in derivings
+
+-----------------------------------------------------------------------------
+-- Value definitions
+
+{- There's an awkward overlap with a type signature. Consider
+ f :: Int -> Int = ...rhs...
+ Then we can't tell whether it's a type signature or a value
+ definition with a result signature until we see the '='.
+ So we have to inline enough to postpone reductions until we know.
+-}
+
+{-
+ ATTENTION: Dirty Hackery Ahead! If the second alternative of vars is var
+ instead of qvar, we get another shift/reduce-conflict. Consider the
+ following programs:
+
+ { (^^) :: Int->Int ; } Type signature; only var allowed
+
+ { (^^) :: Int->Int = ... ; } Value defn with result signature;
+ qvar allowed (because of instance decls)
+
+ We can't tell whether to reduce var to qvar until after we've read the signatures.
+-}
+
+decl :: { Located (OrdList (LHsDecl RdrName)) }
+ : sigdecl { $1 }
+ | '!' infixexp rhs {% do { pat <- checkPattern $2;
+ return (LL $ unitOL $ LL $ ValD $
+ PatBind (LL $ BangPat pat) (unLoc $3)
+ placeHolderType placeHolderNames) } }
+ | infixexp opt_sig rhs {% do { r <- checkValDef $1 $2 $3;
+ return (LL $ unitOL (LL $ ValD r)) } }
+
+rhs :: { Located (GRHSs RdrName) }
+ : '=' exp wherebinds { L (comb3 $1 $2 $3) $ GRHSs (unguardedRHS $2) (unLoc $3) }
+ | gdrhs wherebinds { LL $ GRHSs (reverse (unLoc $1)) (unLoc $2) }
+
+gdrhs :: { Located [LGRHS RdrName] }
+ : gdrhs gdrh { LL ($2 : unLoc $1) }
+ | gdrh { L1 [$1] }
+
+gdrh :: { LGRHS RdrName }
+ : '|' quals '=' exp { sL (comb2 $1 $>) $ GRHS (reverse (unLoc $2)) $4 }
+
+sigdecl :: { Located (OrdList (LHsDecl RdrName)) }
+ : infixexp '::' sigtype
+ {% do s <- checkValSig $1 $3;
+ return (LL $ unitOL (LL $ SigD s)) }
+ -- See the above notes for why we need infixexp here
+ | var ',' sig_vars '::' sigtype
+ { LL $ toOL [ LL $ SigD (TypeSig n $5) | n <- $1 : unLoc $3 ] }
+ | infix prec ops { LL $ toOL [ LL $ SigD (FixSig (FixitySig n (Fixity $2 (unLoc $1))))
+ | n <- unLoc $3 ] }
+ | '{-# INLINE' activation qvar '#-}'
+ { LL $ unitOL (LL $ SigD (InlineSig $3 (mkInlineSpec $2 (getINLINE $1)))) }
+ | '{-# SPECIALISE' qvar '::' sigtypes1 '#-}'
+ { LL $ toOL [ LL $ SigD (SpecSig $2 t defaultInlineSpec)
+ | t <- $4] }
+ | '{-# SPECIALISE_INLINE' activation qvar '::' sigtypes1 '#-}'
+ { LL $ toOL [ LL $ SigD (SpecSig $3 t (mkInlineSpec $2 (getSPEC_INLINE $1)))
+ | t <- $5] }
+ | '{-# SPECIALISE' 'instance' inst_type '#-}'
+ { LL $ unitOL (LL $ SigD (SpecInstSig $3)) }
+
+-----------------------------------------------------------------------------
+-- Expressions
+
+exp :: { LHsExpr RdrName }
+ : infixexp '::' sigtype { LL $ ExprWithTySig $1 $3 }
+ | infixexp '-<' exp { LL $ HsArrApp $1 $3 placeHolderType HsFirstOrderApp True }
+ | infixexp '>-' exp { LL $ HsArrApp $3 $1 placeHolderType HsFirstOrderApp False }
+ | infixexp '-<<' exp { LL $ HsArrApp $1 $3 placeHolderType HsHigherOrderApp True }
+ | infixexp '>>-' exp { LL $ HsArrApp $3 $1 placeHolderType HsHigherOrderApp False}
+ | infixexp { $1 }
+
+infixexp :: { LHsExpr RdrName }
+ : exp10 { $1 }
+ | infixexp qop exp10 { LL (OpApp $1 $2 (panic "fixity") $3) }
+
+exp10 :: { LHsExpr RdrName }
+ : '\\' aexp aexps opt_asig '->' exp
+ {% checkPatterns ($2 : reverse $3) >>= \ ps ->
+ return (LL $ HsLam (mkMatchGroup [LL $ Match ps $4
+ (GRHSs (unguardedRHS $6) emptyLocalBinds
+ )])) }
+ | 'let' binds 'in' exp { LL $ HsLet (unLoc $2) $4 }
+ | 'if' exp 'then' exp 'else' exp { LL $ HsIf $2 $4 $6 }
+ | 'case' exp 'of' altslist { LL $ HsCase $2 (mkMatchGroup (unLoc $4)) }
+ | '-' fexp { LL $ mkHsNegApp $2 }
+
+ | 'do' stmtlist {% let loc = comb2 $1 $2 in
+ checkDo loc (unLoc $2) >>= \ (stmts,body) ->
+ return (L loc (mkHsDo DoExpr stmts body)) }
+ | 'mdo' stmtlist {% let loc = comb2 $1 $2 in
+ checkDo loc (unLoc $2) >>= \ (stmts,body) ->
+ return (L loc (mkHsDo (MDoExpr noPostTcTable) stmts body)) }
+ | scc_annot exp { LL $ if opt_SccProfilingOn
+ then HsSCC (unLoc $1) $2
+ else HsPar $2 }
+
+ | 'proc' aexp '->' exp
+ {% checkPattern $2 >>= \ p ->
+ return (LL $ HsProc p (LL $ HsCmdTop $4 []
+ placeHolderType undefined)) }
+ -- TODO: is LL right here?
+
+ | '{-# CORE' STRING '#-}' exp { LL $ HsCoreAnn (getSTRING $2) $4 }
+ -- hdaume: core annotation
+ | fexp { $1 }
+
+scc_annot :: { Located FastString }
+ : '_scc_' STRING { LL $ getSTRING $2 }
+ | '{-# SCC' STRING '#-}' { LL $ getSTRING $2 }
+
+fexp :: { LHsExpr RdrName }
+ : fexp aexp { LL $ HsApp $1 $2 }
+ | aexp { $1 }
+
+aexps :: { [LHsExpr RdrName] }
+ : aexps aexp { $2 : $1 }
+ | {- empty -} { [] }
+
+aexp :: { LHsExpr RdrName }
+ : qvar '@' aexp { LL $ EAsPat $1 $3 }
+ | '~' aexp { LL $ ELazyPat $2 }
+-- | '!' aexp { LL $ EBangPat $2 }
+ | aexp1 { $1 }
+
+aexp1 :: { LHsExpr RdrName }
+ : aexp1 '{' fbinds '}' {% do { r <- mkRecConstrOrUpdate $1 (comb2 $2 $4)
+ (reverse $3);
+ return (LL r) }}
+ | aexp2 { $1 }
+
+-- Here was the syntax for type applications that I was planning
+-- but there are difficulties (e.g. what order for type args)
+-- so it's not enabled yet.
+-- But this case *is* used for the left hand side of a generic definition,
+-- which is parsed as an expression before being munged into a pattern
+ | qcname '{|' gentype '|}' { LL $ HsApp (sL (getLoc $1) (HsVar (unLoc $1)))
+ (sL (getLoc $3) (HsType $3)) }
+
+aexp2 :: { LHsExpr RdrName }
+ : ipvar { L1 (HsIPVar $! unLoc $1) }
+ | qcname { L1 (HsVar $! unLoc $1) }
+ | literal { L1 (HsLit $! unLoc $1) }
+ | INTEGER { L1 (HsOverLit $! mkHsIntegral (getINTEGER $1)) }
+ | RATIONAL { L1 (HsOverLit $! mkHsFractional (getRATIONAL $1)) }
+ | '(' exp ')' { LL (HsPar $2) }
+ | '(' texp ',' texps ')' { LL $ ExplicitTuple ($2 : reverse $4) Boxed }
+ | '(#' texps '#)' { LL $ ExplicitTuple (reverse $2) Unboxed }
+ | '[' list ']' { LL (unLoc $2) }
+ | '[:' parr ':]' { LL (unLoc $2) }
+ | '(' infixexp qop ')' { LL $ SectionL $2 $3 }
+ | '(' qopm infixexp ')' { LL $ SectionR $2 $3 }
+ | '_' { L1 EWildPat }
+
+ -- MetaHaskell Extension
+ | TH_ID_SPLICE { L1 $ HsSpliceE (mkHsSplice
+ (L1 $ HsVar (mkUnqual varName
+ (getTH_ID_SPLICE $1)))) } -- $x
+ | '$(' exp ')' { LL $ HsSpliceE (mkHsSplice $2) } -- $( exp )
+
+ | TH_VAR_QUOTE qvar { LL $ HsBracket (VarBr (unLoc $2)) }
+ | TH_VAR_QUOTE qcon { LL $ HsBracket (VarBr (unLoc $2)) }
+ | TH_TY_QUOTE tyvar { LL $ HsBracket (VarBr (unLoc $2)) }
+ | TH_TY_QUOTE gtycon { LL $ HsBracket (VarBr (unLoc $2)) }
+ | '[|' exp '|]' { LL $ HsBracket (ExpBr $2) }
+ | '[t|' ctype '|]' { LL $ HsBracket (TypBr $2) }
+ | '[p|' infixexp '|]' {% checkPattern $2 >>= \p ->
+ return (LL $ HsBracket (PatBr p)) }
+ | '[d|' cvtopbody '|]' { LL $ HsBracket (DecBr (mkGroup $2)) }
+
+ -- arrow notation extension
+ | '(|' aexp2 cmdargs '|)' { LL $ HsArrForm $2 Nothing (reverse $3) }
+
+cmdargs :: { [LHsCmdTop RdrName] }
+ : cmdargs acmd { $2 : $1 }
+ | {- empty -} { [] }
+
+acmd :: { LHsCmdTop RdrName }
+ : aexp2 { L1 $ HsCmdTop $1 [] placeHolderType undefined }
+
+cvtopbody :: { [LHsDecl RdrName] }
+ : '{' cvtopdecls0 '}' { $2 }
+ | vocurly cvtopdecls0 close { $2 }
+
+cvtopdecls0 :: { [LHsDecl RdrName] }
+ : {- empty -} { [] }
+ | cvtopdecls { $1 }
+
+texp :: { LHsExpr RdrName }
+ : exp { $1 }
+ | qopm infixexp { LL $ SectionR $1 $2 }
+ -- The second production is really here only for bang patterns
+ -- but
+
+texps :: { [LHsExpr RdrName] }
+ : texps ',' texp { $3 : $1 }
+ | texp { [$1] }
+
+
+-----------------------------------------------------------------------------
+-- List expressions
+
+-- The rules below are little bit contorted to keep lexps left-recursive while
+-- avoiding another shift/reduce-conflict.
+
+list :: { LHsExpr RdrName }
+ : texp { L1 $ ExplicitList placeHolderType [$1] }
+ | lexps { L1 $ ExplicitList placeHolderType (reverse (unLoc $1)) }
+ | texp '..' { LL $ ArithSeq noPostTcExpr (From $1) }
+ | texp ',' exp '..' { LL $ ArithSeq noPostTcExpr (FromThen $1 $3) }
+ | texp '..' exp { LL $ ArithSeq noPostTcExpr (FromTo $1 $3) }
+ | texp ',' exp '..' exp { LL $ ArithSeq noPostTcExpr (FromThenTo $1 $3 $5) }
+ | texp pquals { sL (comb2 $1 $>) $ mkHsDo ListComp (reverse (unLoc $2)) $1 }
+
+lexps :: { Located [LHsExpr RdrName] }
+ : lexps ',' texp { LL ($3 : unLoc $1) }
+ | texp ',' texp { LL [$3,$1] }
+
+-----------------------------------------------------------------------------
+-- List Comprehensions
+
+pquals :: { Located [LStmt RdrName] } -- Either a singleton ParStmt,
+ -- or a reversed list of Stmts
+ : pquals1 { case unLoc $1 of
+ [qs] -> L1 qs
+ qss -> L1 [L1 (ParStmt stmtss)]
+ where
+ stmtss = [ (reverse qs, undefined)
+ | qs <- qss ]
+ }
+
+pquals1 :: { Located [[LStmt RdrName]] }
+ : pquals1 '|' quals { LL (unLoc $3 : unLoc $1) }
+ | '|' quals { L (getLoc $2) [unLoc $2] }
+
+quals :: { Located [LStmt RdrName] }
+ : quals ',' qual { LL ($3 : unLoc $1) }
+ | qual { L1 [$1] }
+
+-----------------------------------------------------------------------------
+-- Parallel array expressions
+
+-- The rules below are little bit contorted; see the list case for details.
+-- Note that, in contrast to lists, we only have finite arithmetic sequences.
+-- Moreover, we allow explicit arrays with no element (represented by the nil
+-- constructor in the list case).
+
+parr :: { LHsExpr RdrName }
+ : { noLoc (ExplicitPArr placeHolderType []) }
+ | exp { L1 $ ExplicitPArr placeHolderType [$1] }
+ | lexps { L1 $ ExplicitPArr placeHolderType
+ (reverse (unLoc $1)) }
+ | exp '..' exp { LL $ PArrSeq noPostTcExpr (FromTo $1 $3) }
+ | exp ',' exp '..' exp { LL $ PArrSeq noPostTcExpr (FromThenTo $1 $3 $5) }
+ | exp pquals { sL (comb2 $1 $>) $ mkHsDo PArrComp (reverse (unLoc $2)) $1 }
+
+-- We are reusing `lexps' and `pquals' from the list case.
+
+-----------------------------------------------------------------------------
+-- Case alternatives
+
+altslist :: { Located [LMatch RdrName] }
+ : '{' alts '}' { LL (reverse (unLoc $2)) }
+ | vocurly alts close { L (getLoc $2) (reverse (unLoc $2)) }
+
+alts :: { Located [LMatch RdrName] }
+ : alts1 { L1 (unLoc $1) }
+ | ';' alts { LL (unLoc $2) }
+
+alts1 :: { Located [LMatch RdrName] }
+ : alts1 ';' alt { LL ($3 : unLoc $1) }
+ | alts1 ';' { LL (unLoc $1) }
+ | alt { L1 [$1] }
+
+alt :: { LMatch RdrName }
+ : infixexp opt_sig alt_rhs {% checkPattern $1 >>= \p ->
+ return (LL (Match [p] $2 (unLoc $3))) }
+
+alt_rhs :: { Located (GRHSs RdrName) }
+ : ralt wherebinds { LL (GRHSs (unLoc $1) (unLoc $2)) }
+
+ralt :: { Located [LGRHS RdrName] }
+ : '->' exp { LL (unguardedRHS $2) }
+ | gdpats { L1 (reverse (unLoc $1)) }
+
+gdpats :: { Located [LGRHS RdrName] }
+ : gdpats gdpat { LL ($2 : unLoc $1) }
+ | gdpat { L1 [$1] }
+
+gdpat :: { LGRHS RdrName }
+ : '|' quals '->' exp { sL (comb2 $1 $>) $ GRHS (reverse (unLoc $2)) $4 }
+
+-----------------------------------------------------------------------------
+-- Statement sequences
+
+stmtlist :: { Located [LStmt RdrName] }
+ : '{' stmts '}' { LL (unLoc $2) }
+ | vocurly stmts close { $2 }
+
+-- do { ;; s ; s ; ; s ;; }
+-- The last Stmt should be an expression, but that's hard to enforce
+-- here, because we need too much lookahead if we see do { e ; }
+-- So we use ExprStmts throughout, and switch the last one over
+-- in ParseUtils.checkDo instead
+stmts :: { Located [LStmt RdrName] }
+ : stmt stmts_help { LL ($1 : unLoc $2) }
+ | ';' stmts { LL (unLoc $2) }
+ | {- empty -} { noLoc [] }
+
+stmts_help :: { Located [LStmt RdrName] } -- might be empty
+ : ';' stmts { LL (unLoc $2) }
+ | {- empty -} { noLoc [] }
+
+-- For typing stmts at the GHCi prompt, where
+-- the input may consist of just comments.
+maybe_stmt :: { Maybe (LStmt RdrName) }
+ : stmt { Just $1 }
+ | {- nothing -} { Nothing }
+
+stmt :: { LStmt RdrName }
+ : qual { $1 }
+ | infixexp '->' exp {% checkPattern $3 >>= \p ->
+ return (LL $ mkBindStmt p $1) }
+ | 'rec' stmtlist { LL $ mkRecStmt (unLoc $2) }
+
+qual :: { LStmt RdrName }
+ : exp '<-' exp {% checkPattern $1 >>= \p ->
+ return (LL $ mkBindStmt p $3) }
+ | exp { L1 $ mkExprStmt $1 }
+ | 'let' binds { LL $ LetStmt (unLoc $2) }
+
+-----------------------------------------------------------------------------
+-- Record Field Update/Construction
+
+fbinds :: { HsRecordBinds RdrName }
+ : fbinds1 { $1 }
+ | {- empty -} { [] }
+
+fbinds1 :: { HsRecordBinds RdrName }
+ : fbinds1 ',' fbind { $3 : $1 }
+ | fbind { [$1] }
+
+fbind :: { (Located RdrName, LHsExpr RdrName) }
+ : qvar '=' exp { ($1,$3) }
+
+-----------------------------------------------------------------------------
+-- Implicit Parameter Bindings
+
+dbinds :: { Located [LIPBind RdrName] }
+ : dbinds ';' dbind { LL ($3 : unLoc $1) }
+ | dbinds ';' { LL (unLoc $1) }
+ | dbind { L1 [$1] }
+-- | {- empty -} { [] }
+
+dbind :: { LIPBind RdrName }
+dbind : ipvar '=' exp { LL (IPBind (unLoc $1) $3) }
+
+ipvar :: { Located (IPName RdrName) }
+ : IPDUPVARID { L1 (Dupable (mkUnqual varName (getIPDUPVARID $1))) }
+ | IPSPLITVARID { L1 (Linear (mkUnqual varName (getIPSPLITVARID $1))) }
+
+-----------------------------------------------------------------------------
+-- Deprecations
+
+depreclist :: { Located [RdrName] }
+depreclist : deprec_var { L1 [unLoc $1] }
+ | deprec_var ',' depreclist { LL (unLoc $1 : unLoc $3) }
+
+deprec_var :: { Located RdrName }
+deprec_var : var { $1 }
+ | con { $1 }
+
+-----------------------------------------
+-- Data constructors
+qcon :: { Located RdrName }
+ : qconid { $1 }
+ | '(' qconsym ')' { LL (unLoc $2) }
+ | sysdcon { L1 $ nameRdrName (dataConName (unLoc $1)) }
+-- The case of '[:' ':]' is part of the production `parr'
+
+con :: { Located RdrName }
+ : conid { $1 }
+ | '(' consym ')' { LL (unLoc $2) }
+ | sysdcon { L1 $ nameRdrName (dataConName (unLoc $1)) }
+
+sysdcon :: { Located DataCon } -- Wired in data constructors
+ : '(' ')' { LL unitDataCon }
+ | '(' commas ')' { LL $ tupleCon Boxed $2 }
+ | '[' ']' { LL nilDataCon }
+
+conop :: { Located RdrName }
+ : consym { $1 }
+ | '`' conid '`' { LL (unLoc $2) }
+
+qconop :: { Located RdrName }
+ : qconsym { $1 }
+ | '`' qconid '`' { LL (unLoc $2) }
+
+-----------------------------------------------------------------------------
+-- Type constructors
+
+gtycon :: { Located RdrName } -- A "general" qualified tycon
+ : oqtycon { $1 }
+ | '(' ')' { LL $ getRdrName unitTyCon }
+ | '(' commas ')' { LL $ getRdrName (tupleTyCon Boxed $2) }
+ | '(' '->' ')' { LL $ getRdrName funTyCon }
+ | '[' ']' { LL $ listTyCon_RDR }
+ | '[:' ':]' { LL $ parrTyCon_RDR }
+
+oqtycon :: { Located RdrName } -- An "ordinary" qualified tycon
+ : qtycon { $1 }
+ | '(' qtyconsym ')' { LL (unLoc $2) }
+
+qtyconop :: { Located RdrName } -- Qualified or unqualified
+ : qtyconsym { $1 }
+ | '`' qtycon '`' { LL (unLoc $2) }
+
+qtycon :: { Located RdrName } -- Qualified or unqualified
+ : QCONID { L1 $! mkQual tcClsName (getQCONID $1) }
+ | tycon { $1 }
+
+tycon :: { Located RdrName } -- Unqualified
+ : CONID { L1 $! mkUnqual tcClsName (getCONID $1) }
+
+qtyconsym :: { Located RdrName }
+ : QCONSYM { L1 $! mkQual tcClsName (getQCONSYM $1) }
+ | tyconsym { $1 }
+
+tyconsym :: { Located RdrName }
+ : CONSYM { L1 $! mkUnqual tcClsName (getCONSYM $1) }
+
+-----------------------------------------------------------------------------
+-- Operators
+
+op :: { Located RdrName } -- used in infix decls
+ : varop { $1 }
+ | conop { $1 }
+
+varop :: { Located RdrName }
+ : varsym { $1 }
+ | '`' varid '`' { LL (unLoc $2) }
+
+qop :: { LHsExpr RdrName } -- used in sections
+ : qvarop { L1 $ HsVar (unLoc $1) }
+ | qconop { L1 $ HsVar (unLoc $1) }
+
+qopm :: { LHsExpr RdrName } -- used in sections
+ : qvaropm { L1 $ HsVar (unLoc $1) }
+ | qconop { L1 $ HsVar (unLoc $1) }
+
+qvarop :: { Located RdrName }
+ : qvarsym { $1 }
+ | '`' qvarid '`' { LL (unLoc $2) }
+
+qvaropm :: { Located RdrName }
+ : qvarsym_no_minus { $1 }
+ | '`' qvarid '`' { LL (unLoc $2) }
+
+-----------------------------------------------------------------------------
+-- Type variables
+
+tyvar :: { Located RdrName }
+tyvar : tyvarid { $1 }
+ | '(' tyvarsym ')' { LL (unLoc $2) }
+
+tyvarop :: { Located RdrName }
+tyvarop : '`' tyvarid '`' { LL (unLoc $2) }
+ | tyvarsym { $1 }
+
+tyvarid :: { Located RdrName }
+ : VARID { L1 $! mkUnqual tvName (getVARID $1) }
+ | special_id { L1 $! mkUnqual tvName (unLoc $1) }
+ | 'unsafe' { L1 $! mkUnqual tvName FSLIT("unsafe") }
+ | 'safe' { L1 $! mkUnqual tvName FSLIT("safe") }
+ | 'threadsafe' { L1 $! mkUnqual tvName FSLIT("threadsafe") }
+
+tyvarsym :: { Located RdrName }
+-- Does not include "!", because that is used for strictness marks
+-- or ".", because that separates the quantified type vars from the rest
+-- or "*", because that's used for kinds
+tyvarsym : VARSYM { L1 $! mkUnqual tvName (getVARSYM $1) }
+
+-----------------------------------------------------------------------------
+-- Variables
+
+var :: { Located RdrName }
+ : varid { $1 }
+ | '(' varsym ')' { LL (unLoc $2) }
+
+qvar :: { Located RdrName }
+ : qvarid { $1 }
+ | '(' varsym ')' { LL (unLoc $2) }
+ | '(' qvarsym1 ')' { LL (unLoc $2) }
+-- We've inlined qvarsym here so that the decision about
+-- whether it's a qvar or a var can be postponed until
+-- *after* we see the close paren.
+
+qvarid :: { Located RdrName }
+ : varid { $1 }
+ | QVARID { L1 $ mkQual varName (getQVARID $1) }
+
+varid :: { Located RdrName }
+ : varid_no_unsafe { $1 }
+ | 'unsafe' { L1 $! mkUnqual varName FSLIT("unsafe") }
+ | 'safe' { L1 $! mkUnqual varName FSLIT("safe") }
+ | 'threadsafe' { L1 $! mkUnqual varName FSLIT("threadsafe") }
+
+varid_no_unsafe :: { Located RdrName }
+ : VARID { L1 $! mkUnqual varName (getVARID $1) }
+ | special_id { L1 $! mkUnqual varName (unLoc $1) }
+ | 'forall' { L1 $! mkUnqual varName FSLIT("forall") }
+
+qvarsym :: { Located RdrName }
+ : varsym { $1 }
+ | qvarsym1 { $1 }
+
+qvarsym_no_minus :: { Located RdrName }
+ : varsym_no_minus { $1 }
+ | qvarsym1 { $1 }
+
+qvarsym1 :: { Located RdrName }
+qvarsym1 : QVARSYM { L1 $ mkQual varName (getQVARSYM $1) }
+
+varsym :: { Located RdrName }
+ : varsym_no_minus { $1 }
+ | '-' { L1 $ mkUnqual varName FSLIT("-") }
+
+varsym_no_minus :: { Located RdrName } -- varsym not including '-'
+ : VARSYM { L1 $ mkUnqual varName (getVARSYM $1) }
+ | special_sym { L1 $ mkUnqual varName (unLoc $1) }
+
+
+-- These special_ids are treated as keywords in various places,
+-- but as ordinary ids elsewhere. 'special_id' collects all these
+-- except 'unsafe' and 'forall' whose treatment differs depending on context
+special_id :: { Located FastString }
+special_id
+ : 'as' { L1 FSLIT("as") }
+ | 'qualified' { L1 FSLIT("qualified") }
+ | 'hiding' { L1 FSLIT("hiding") }
+ | 'export' { L1 FSLIT("export") }
+ | 'label' { L1 FSLIT("label") }
+ | 'dynamic' { L1 FSLIT("dynamic") }
+ | 'stdcall' { L1 FSLIT("stdcall") }
+ | 'ccall' { L1 FSLIT("ccall") }
+
+special_sym :: { Located FastString }
+special_sym : '!' { L1 FSLIT("!") }
+ | '.' { L1 FSLIT(".") }
+ | '*' { L1 FSLIT("*") }
+
+-----------------------------------------------------------------------------
+-- Data constructors
+
+qconid :: { Located RdrName } -- Qualified or unqualified
+ : conid { $1 }
+ | QCONID { L1 $ mkQual dataName (getQCONID $1) }
+
+conid :: { Located RdrName }
+ : CONID { L1 $ mkUnqual dataName (getCONID $1) }
+
+qconsym :: { Located RdrName } -- Qualified or unqualified
+ : consym { $1 }
+ | QCONSYM { L1 $ mkQual dataName (getQCONSYM $1) }
+
+consym :: { Located RdrName }
+ : CONSYM { L1 $ mkUnqual dataName (getCONSYM $1) }
+
+ -- ':' means only list cons
+ | ':' { L1 $ consDataCon_RDR }
+
+
+-----------------------------------------------------------------------------
+-- Literals
+
+literal :: { Located HsLit }
+ : CHAR { L1 $ HsChar $ getCHAR $1 }
+ | STRING { L1 $ HsString $ getSTRING $1 }
+ | PRIMINTEGER { L1 $ HsIntPrim $ getPRIMINTEGER $1 }
+ | PRIMCHAR { L1 $ HsCharPrim $ getPRIMCHAR $1 }
+ | PRIMSTRING { L1 $ HsStringPrim $ getPRIMSTRING $1 }
+ | PRIMFLOAT { L1 $ HsFloatPrim $ getPRIMFLOAT $1 }
+ | PRIMDOUBLE { L1 $ HsDoublePrim $ getPRIMDOUBLE $1 }
+
+-----------------------------------------------------------------------------
+-- Layout
+
+close :: { () }
+ : vccurly { () } -- context popped in lexer.
+ | error {% popContext }
+
+-----------------------------------------------------------------------------
+-- Miscellaneous (mostly renamings)
+
+modid :: { Located Module }
+ : CONID { L1 $ mkModuleFS (getCONID $1) }
+ | QCONID { L1 $ let (mod,c) = getQCONID $1 in
+ mkModuleFS
+ (mkFastString
+ (unpackFS mod ++ '.':unpackFS c))
+ }
+
+commas :: { Int }
+ : commas ',' { $1 + 1 }
+ | ',' { 2 }
+
+-----------------------------------------------------------------------------
+
+{
+happyError :: P a
+happyError = srcParseFail
+
+getVARID (L _ (ITvarid x)) = x
+getCONID (L _ (ITconid x)) = x
+getVARSYM (L _ (ITvarsym x)) = x
+getCONSYM (L _ (ITconsym x)) = x
+getQVARID (L _ (ITqvarid x)) = x
+getQCONID (L _ (ITqconid x)) = x
+getQVARSYM (L _ (ITqvarsym x)) = x
+getQCONSYM (L _ (ITqconsym x)) = x
+getIPDUPVARID (L _ (ITdupipvarid x)) = x
+getIPSPLITVARID (L _ (ITsplitipvarid x)) = x
+getCHAR (L _ (ITchar x)) = x
+getSTRING (L _ (ITstring x)) = x
+getINTEGER (L _ (ITinteger x)) = x
+getRATIONAL (L _ (ITrational x)) = x
+getPRIMCHAR (L _ (ITprimchar x)) = x
+getPRIMSTRING (L _ (ITprimstring x)) = x
+getPRIMINTEGER (L _ (ITprimint x)) = x
+getPRIMFLOAT (L _ (ITprimfloat x)) = x
+getPRIMDOUBLE (L _ (ITprimdouble x)) = x
+getTH_ID_SPLICE (L _ (ITidEscape x)) = x
+getINLINE (L _ (ITinline_prag b)) = b
+getSPEC_INLINE (L _ (ITspec_inline_prag b)) = b
+
+-- Utilities for combining source spans
+comb2 :: Located a -> Located b -> SrcSpan
+comb2 = combineLocs
+
+comb3 :: Located a -> Located b -> Located c -> SrcSpan
+comb3 a b c = combineSrcSpans (getLoc a) (combineSrcSpans (getLoc b) (getLoc c))
+
+comb4 :: Located a -> Located b -> Located c -> Located d -> SrcSpan
+comb4 a b c d = combineSrcSpans (getLoc a) $ combineSrcSpans (getLoc b) $
+ combineSrcSpans (getLoc c) (getLoc d)
+
+-- strict constructor version:
+{-# INLINE sL #-}
+sL :: SrcSpan -> a -> Located a
+sL span a = span `seq` L span a
+
+-- Make a source location for the file. We're a bit lazy here and just
+-- make a point SrcSpan at line 1, column 0. Strictly speaking we should
+-- try to find the span of the whole file (ToDo).
+fileSrcSpan :: P SrcSpan
+fileSrcSpan = do
+ l <- getSrcLoc;
+ let loc = mkSrcLoc (srcLocFile l) 1 0;
+ return (mkSrcSpan loc loc)
+}
diff --git a/compiler/parser/ParserCore.y b/compiler/parser/ParserCore.y
new file mode 100644
index 0000000000..3210583f96
--- /dev/null
+++ b/compiler/parser/ParserCore.y
@@ -0,0 +1,339 @@
+{
+module ParserCore ( parseCore ) where
+
+import IfaceSyn
+import ForeignCall
+import RdrHsSyn
+import HsSyn
+import RdrName
+import OccName
+import Kind( Kind(..) )
+import Name( nameOccName, nameModule )
+import Module
+import ParserCoreUtils
+import LexCore
+import Literal
+import SrcLoc
+import TysPrim( wordPrimTyCon, intPrimTyCon, charPrimTyCon,
+ floatPrimTyCon, doublePrimTyCon, addrPrimTyCon )
+import TyCon ( TyCon, tyConName )
+import FastString
+import Outputable
+import Char
+
+#include "../HsVersions.h"
+
+}
+
+%name parseCore
+%tokentype { Token }
+
+%token
+ '%module' { TKmodule }
+ '%data' { TKdata }
+ '%newtype' { TKnewtype }
+ '%forall' { TKforall }
+ '%rec' { TKrec }
+ '%let' { TKlet }
+ '%in' { TKin }
+ '%case' { TKcase }
+ '%of' { TKof }
+ '%coerce' { TKcoerce }
+ '%note' { TKnote }
+ '%external' { TKexternal }
+ '%_' { TKwild }
+ '(' { TKoparen }
+ ')' { TKcparen }
+ '{' { TKobrace }
+ '}' { TKcbrace }
+ '#' { TKhash}
+ '=' { TKeq }
+ '::' { TKcoloncolon }
+ '*' { TKstar }
+ '->' { TKrarrow }
+ '\\' { TKlambda}
+ '@' { TKat }
+ '.' { TKdot }
+ '?' { TKquestion}
+ ';' { TKsemicolon }
+ NAME { TKname $$ }
+ CNAME { TKcname $$ }
+ INTEGER { TKinteger $$ }
+ RATIONAL { TKrational $$ }
+ STRING { TKstring $$ }
+ CHAR { TKchar $$ }
+
+%monad { P } { thenP } { returnP }
+%lexer { lexer } { TKEOF }
+
+%%
+
+module :: { HsExtCore RdrName }
+ : '%module' modid tdefs vdefgs { HsExtCore $2 $3 $4 }
+
+modid :: { Module }
+ : CNAME { mkModuleFS (mkFastString $1) }
+
+-------------------------------------------------------------
+-- Type and newtype declarations are in HsSyn syntax
+
+tdefs :: { [TyClDecl RdrName] }
+ : {- empty -} {[]}
+ | tdef ';' tdefs {$1:$3}
+
+tdef :: { TyClDecl RdrName }
+ : '%data' q_tc_name tv_bndrs '=' '{' cons1 '}'
+ { mkTyData DataType (noLoc [], noLoc (ifaceExtRdrName $2), map toHsTvBndr $3) Nothing $6 Nothing }
+ | '%newtype' q_tc_name tv_bndrs trep
+ { let tc_rdr = ifaceExtRdrName $2 in
+ mkTyData NewType (noLoc [], noLoc tc_rdr, map toHsTvBndr $3) Nothing ($4 (rdrNameOcc tc_rdr)) Nothing }
+
+-- For a newtype we have to invent a fake data constructor name
+-- It doesn't matter what it is, because it won't be used
+trep :: { OccName -> [LConDecl RdrName] }
+ : {- empty -} { (\ tc_occ -> []) }
+ | '=' ty { (\ tc_occ -> let { dc_name = mkRdrUnqual (setOccNameSpace dataName tc_occ) ;
+ con_info = PrefixCon [toHsType $2] }
+ in [noLoc $ ConDecl (noLoc dc_name) Explicit []
+ (noLoc []) con_info ResTyH98]) }
+
+cons1 :: { [LConDecl RdrName] }
+ : con { [$1] }
+ | con ';' cons1 { $1:$3 }
+
+con :: { LConDecl RdrName }
+ : d_pat_occ attv_bndrs hs_atys
+ { noLoc $ ConDecl (noLoc (mkRdrUnqual $1)) Explicit $2 (noLoc []) (PrefixCon $3) ResTyH98}
+ | d_pat_occ '::' ty
+ -- XXX - autrijus - $3 needs to be split into argument and return types!
+ -- also not sure whether the [] below (quantified vars) appears.
+ -- also the "PrefixCon []" is wrong.
+ -- also we want to munge $3 somehow.
+ -- extractWhatEver to unpack ty into the parts to ConDecl
+ -- XXX - define it somewhere in RdrHsSyn
+ { noLoc $ ConDecl (noLoc (mkRdrUnqual $1)) Explicit [] (noLoc []) (PrefixCon []) (undefined $3) }
+
+attv_bndrs :: { [LHsTyVarBndr RdrName] }
+ : {- empty -} { [] }
+ | '@' tv_bndr attv_bndrs { toHsTvBndr $2 : $3 }
+
+hs_atys :: { [LHsType RdrName] }
+ : atys { map toHsType $1 }
+
+
+---------------------------------------
+-- Types
+---------------------------------------
+
+atys :: { [IfaceType] }
+ : {- empty -} { [] }
+ | aty atys { $1:$2 }
+
+aty :: { IfaceType }
+ : tv_occ { IfaceTyVar $1 }
+ | q_tc_name { IfaceTyConApp (IfaceTc $1) [] }
+ | '(' ty ')' { $2 }
+
+bty :: { IfaceType }
+ : tv_occ atys { foldl IfaceAppTy (IfaceTyVar $1) $2 }
+ | q_tc_name atys { IfaceTyConApp (IfaceTc $1) $2 }
+ | '(' ty ')' { $2 }
+
+ty :: { IfaceType }
+ : bty { $1 }
+ | bty '->' ty { IfaceFunTy $1 $3 }
+ | '%forall' tv_bndrs '.' ty { foldr IfaceForAllTy $4 $2 }
+
+----------------------------------------------
+-- Bindings are in Iface syntax
+
+vdefgs :: { [IfaceBinding] }
+ : {- empty -} { [] }
+ | let_bind ';' vdefgs { $1 : $3 }
+
+let_bind :: { IfaceBinding }
+ : '%rec' '{' vdefs1 '}' { IfaceRec $3 }
+ | vdef { let (b,r) = $1
+ in IfaceNonRec b r }
+
+vdefs1 :: { [(IfaceIdBndr, IfaceExpr)] }
+ : vdef { [$1] }
+ | vdef ';' vdefs1 { $1:$3 }
+
+vdef :: { (IfaceIdBndr, IfaceExpr) }
+ : qd_occ '::' ty '=' exp { (($1, $3), $5) }
+ -- NB: qd_occ includes data constructors, because
+ -- we allow data-constructor wrappers at top level
+ -- But we discard the module name, because it must be the
+ -- same as the module being compiled, and Iface syntax only
+ -- has OccNames in binding positions
+
+qd_occ :: { OccName }
+ : var_occ { $1 }
+ | d_occ { $1 }
+
+---------------------------------------
+-- Binders
+bndr :: { IfaceBndr }
+ : '@' tv_bndr { IfaceTvBndr $2 }
+ | id_bndr { IfaceIdBndr $1 }
+
+bndrs :: { [IfaceBndr] }
+ : bndr { [$1] }
+ | bndr bndrs { $1:$2 }
+
+id_bndr :: { IfaceIdBndr }
+ : '(' var_occ '::' ty ')' { ($2,$4) }
+
+id_bndrs :: { [IfaceIdBndr] }
+ : {-empty -} { [] }
+ | id_bndr id_bndrs { $1:$2 }
+
+tv_bndr :: { IfaceTvBndr }
+ : tv_occ { ($1, LiftedTypeKind) }
+ | '(' tv_occ '::' akind ')' { ($2, $4) }
+
+tv_bndrs :: { [IfaceTvBndr] }
+ : {- empty -} { [] }
+ | tv_bndr tv_bndrs { $1:$2 }
+
+akind :: { IfaceKind }
+ : '*' { LiftedTypeKind }
+ | '#' { UnliftedTypeKind }
+ | '?' { OpenTypeKind }
+ | '(' kind ')' { $2 }
+
+kind :: { IfaceKind }
+ : akind { $1 }
+ | akind '->' kind { FunKind $1 $3 }
+
+-----------------------------------------
+-- Expressions
+
+aexp :: { IfaceExpr }
+ : var_occ { IfaceLcl $1 }
+ | modid '.' qd_occ { IfaceExt (ExtPkg $1 $3) }
+ | lit { IfaceLit $1 }
+ | '(' exp ')' { $2 }
+
+fexp :: { IfaceExpr }
+ : fexp aexp { IfaceApp $1 $2 }
+ | fexp '@' aty { IfaceApp $1 (IfaceType $3) }
+ | aexp { $1 }
+
+exp :: { IfaceExpr }
+ : fexp { $1 }
+ | '\\' bndrs '->' exp { foldr IfaceLam $4 $2 }
+ | '%let' let_bind '%in' exp { IfaceLet $2 $4 }
+-- gaw 2004
+ | '%case' '(' ty ')' aexp '%of' id_bndr
+ '{' alts1 '}' { IfaceCase $5 (fst $7) $3 $9 }
+ | '%coerce' aty exp { IfaceNote (IfaceCoerce $2) $3 }
+ | '%note' STRING exp
+ { case $2 of
+ --"SCC" -> IfaceNote (IfaceSCC "scc") $3
+ "InlineCall" -> IfaceNote IfaceInlineCall $3
+ "InlineMe" -> IfaceNote IfaceInlineMe $3
+ }
+ | '%external' STRING aty { IfaceFCall (ForeignCall.CCall
+ (CCallSpec (StaticTarget (mkFastString $2))
+ CCallConv (PlaySafe False)))
+ $3 }
+
+alts1 :: { [IfaceAlt] }
+ : alt { [$1] }
+ | alt ';' alts1 { $1:$3 }
+
+alt :: { IfaceAlt }
+ : modid '.' d_pat_occ bndrs '->' exp
+ { (IfaceDataAlt $3, map ifaceBndrName $4, $6) }
+ -- The external syntax currently includes the types of the
+ -- the args, but they aren't needed internally
+ -- Nor is the module qualifier
+ | lit '->' exp
+ { (IfaceLitAlt $1, [], $3) }
+ | '%_' '->' exp
+ { (IfaceDefault, [], $3) }
+
+lit :: { Literal }
+ : '(' INTEGER '::' aty ')' { convIntLit $2 $4 }
+ | '(' RATIONAL '::' aty ')' { convRatLit $2 $4 }
+ | '(' CHAR '::' aty ')' { MachChar $2 }
+ | '(' STRING '::' aty ')' { MachStr (mkFastString $2) }
+
+tv_occ :: { OccName }
+ : NAME { mkOccName tvName $1 }
+
+var_occ :: { OccName }
+ : NAME { mkVarOcc $1 }
+
+
+-- Type constructor
+q_tc_name :: { IfaceExtName }
+ : modid '.' CNAME { ExtPkg $1 (mkOccName tcName $3) }
+
+-- Data constructor in a pattern or data type declaration; use the dataName,
+-- because that's what we expect in Core case patterns
+d_pat_occ :: { OccName }
+ : CNAME { mkOccName dataName $1 }
+
+-- Data constructor occurrence in an expression;
+-- use the varName because that's the worker Id
+d_occ :: { OccName }
+ : CNAME { mkVarOcc $1 }
+
+{
+
+ifaceBndrName (IfaceIdBndr (n,_)) = n
+ifaceBndrName (IfaceTvBndr (n,_)) = n
+
+convIntLit :: Integer -> IfaceType -> Literal
+convIntLit i (IfaceTyConApp tc [])
+ | tc `eqTc` intPrimTyCon = MachInt i
+ | tc `eqTc` wordPrimTyCon = MachWord i
+ | tc `eqTc` charPrimTyCon = MachChar (chr (fromInteger i))
+ | tc `eqTc` addrPrimTyCon && i == 0 = MachNullAddr
+convIntLit i aty
+ = pprPanic "Unknown integer literal type" (ppr aty)
+
+convRatLit :: Rational -> IfaceType -> Literal
+convRatLit r (IfaceTyConApp tc [])
+ | tc `eqTc` floatPrimTyCon = MachFloat r
+ | tc `eqTc` doublePrimTyCon = MachDouble r
+convRatLit i aty
+ = pprPanic "Unknown rational literal type" (ppr aty)
+
+eqTc :: IfaceTyCon -> TyCon -> Bool -- Ugh!
+eqTc (IfaceTc (ExtPkg mod occ)) tycon
+ = mod == nameModule nm && occ == nameOccName nm
+ where
+ nm = tyConName tycon
+
+-- Tiresomely, we have to generate both HsTypes (in type/class decls)
+-- and IfaceTypes (in Core expressions). So we parse them as IfaceTypes,
+-- and convert to HsTypes here. But the IfaceTypes we can see here
+-- are very limited (see the productions for 'ty', so the translation
+-- isn't hard
+toHsType :: IfaceType -> LHsType RdrName
+toHsType (IfaceTyVar v) = noLoc $ HsTyVar (mkRdrUnqual v)
+toHsType (IfaceAppTy t1 t2) = noLoc $ HsAppTy (toHsType t1) (toHsType t2)
+toHsType (IfaceFunTy t1 t2) = noLoc $ HsFunTy (toHsType t1) (toHsType t2)
+toHsType (IfaceTyConApp (IfaceTc tc) ts) = foldl mkHsAppTy (noLoc $ HsTyVar (ifaceExtRdrName tc)) (map toHsType ts)
+toHsType (IfaceForAllTy tv t) = add_forall (toHsTvBndr tv) (toHsType t)
+
+toHsTvBndr :: IfaceTvBndr -> LHsTyVarBndr RdrName
+toHsTvBndr (tv,k) = noLoc $ KindedTyVar (mkRdrUnqual tv) k
+
+ifaceExtRdrName :: IfaceExtName -> RdrName
+ifaceExtRdrName (ExtPkg mod occ) = mkOrig mod occ
+ifaceExtRdrName other = pprPanic "ParserCore.ifaceExtRdrName" (ppr other)
+
+add_forall tv (L _ (HsForAllTy exp tvs cxt t))
+ = noLoc $ HsForAllTy exp (tv:tvs) cxt t
+add_forall tv t
+ = noLoc $ HsForAllTy Explicit [tv] (noLoc []) t
+
+happyError :: P a
+happyError s l = failP (show l ++ ": Parse error\n") (take 100 s) l
+}
+
diff --git a/compiler/parser/ParserCoreUtils.hs b/compiler/parser/ParserCoreUtils.hs
new file mode 100644
index 0000000000..a590fb5c93
--- /dev/null
+++ b/compiler/parser/ParserCoreUtils.hs
@@ -0,0 +1,72 @@
+module ParserCoreUtils where
+
+import IO
+
+data ParseResult a = OkP a | FailP String
+type P a = String -> Int -> ParseResult a
+
+thenP :: P a -> (a -> P b) -> P b
+m `thenP` k = \ s l ->
+ case m s l of
+ OkP a -> k a s l
+ FailP s -> FailP s
+
+returnP :: a -> P a
+returnP m _ _ = OkP m
+
+failP :: String -> P a
+failP s s' _ = FailP (s ++ ":" ++ s')
+
+getCoreModuleName :: FilePath -> IO String
+getCoreModuleName fpath =
+ catch (do
+ h <- openFile fpath ReadMode
+ ls <- hGetContents h
+ let mo = findMod (words ls)
+ -- make sure we close up the file right away.
+ (length mo) `seq` return ()
+ hClose h
+ return mo)
+ (\ _ -> return "Main")
+ where
+ findMod [] = "Main"
+ findMod ("%module":m:_) = m
+ findMod (_:xs) = findMod xs
+
+
+data Token =
+ TKmodule
+ | TKdata
+ | TKnewtype
+ | TKforall
+ | TKrec
+ | TKlet
+ | TKin
+ | TKcase
+ | TKof
+ | TKcoerce
+ | TKnote
+ | TKexternal
+ | TKwild
+ | TKoparen
+ | TKcparen
+ | TKobrace
+ | TKcbrace
+ | TKhash
+ | TKeq
+ | TKcoloncolon
+ | TKstar
+ | TKrarrow
+ | TKlambda
+ | TKat
+ | TKdot
+ | TKquestion
+ | TKsemicolon
+ | TKname String
+ | TKcname String
+ | TKinteger Integer
+ | TKrational Rational
+ | TKstring String
+ | TKchar Char
+ | TKEOF
+
diff --git a/compiler/parser/RdrHsSyn.lhs b/compiler/parser/RdrHsSyn.lhs
new file mode 100644
index 0000000000..8d59e2b22c
--- /dev/null
+++ b/compiler/parser/RdrHsSyn.lhs
@@ -0,0 +1,869 @@
+%
+% (c) The University of Glasgow, 1996-2003
+
+Functions over HsSyn specialised to RdrName.
+
+\begin{code}
+module RdrHsSyn (
+ extractHsTyRdrTyVars,
+ extractHsRhoRdrTyVars, extractGenericPatTyVars,
+
+ mkHsOpApp, mkClassDecl,
+ mkHsNegApp, mkHsIntegral, mkHsFractional,
+ mkHsDo, mkHsSplice,
+ mkTyData, mkPrefixCon, mkRecCon, mkInlineSpec,
+ mkRecConstrOrUpdate, -- HsExp -> [HsFieldUpdate] -> P HsExp
+
+ cvBindGroup,
+ cvBindsAndSigs,
+ cvTopDecls,
+ findSplice, mkGroup,
+
+ -- Stuff to do with Foreign declarations
+ CallConv(..),
+ mkImport, -- CallConv -> Safety
+ -- -> (FastString, RdrName, RdrNameHsType)
+ -- -> P RdrNameHsDecl
+ mkExport, -- CallConv
+ -- -> (FastString, RdrName, RdrNameHsType)
+ -- -> P RdrNameHsDecl
+ mkExtName, -- RdrName -> CLabelString
+ mkGadtDecl, -- Located RdrName -> LHsType RdrName -> ConDecl RdrName
+
+ -- Bunch of functions in the parser monad for
+ -- checking and constructing values
+ checkPrecP, -- Int -> P Int
+ checkContext, -- HsType -> P HsContext
+ checkPred, -- HsType -> P HsPred
+ checkTyClHdr, -- LHsContext RdrName -> LHsType RdrName -> P (LHsContext RdrName, Located RdrName, [LHsTyVarBndr RdrName])
+ checkSynHdr, -- LHsType RdrName -> P (Located RdrName, [LHsTyVarBndr RdrName])
+ checkInstType, -- HsType -> P HsType
+ checkPattern, -- HsExp -> P HsPat
+ checkPatterns, -- SrcLoc -> [HsExp] -> P [HsPat]
+ checkDo, -- [Stmt] -> P [Stmt]
+ checkMDo, -- [Stmt] -> P [Stmt]
+ checkValDef, -- (SrcLoc, HsExp, HsRhs, [HsDecl]) -> P HsDecl
+ checkValSig, -- (SrcLoc, HsExp, HsRhs, [HsDecl]) -> P HsDecl
+ parseError, -- String -> Pa
+ ) where
+
+#include "HsVersions.h"
+
+import HsSyn -- Lots of it
+import RdrName ( RdrName, isRdrTyVar, mkUnqual, rdrNameOcc,
+ isRdrDataCon, isUnqual, getRdrName, isQual,
+ setRdrNameSpace )
+import BasicTypes ( maxPrecedence, Activation, InlineSpec(..), alwaysInlineSpec, neverInlineSpec )
+import Lexer ( P, failSpanMsgP, extension, bangPatEnabled )
+import TysWiredIn ( unitTyCon )
+import ForeignCall ( CCallConv, Safety, CCallTarget(..), CExportSpec(..),
+ DNCallSpec(..), DNKind(..), CLabelString )
+import OccName ( srcDataName, varName, isDataOcc, isTcOcc,
+ occNameString )
+import SrcLoc
+import OrdList ( OrdList, fromOL )
+import Bag ( Bag, emptyBag, snocBag, consBag, foldrBag )
+import Outputable
+import FastString
+import Panic
+
+import List ( isSuffixOf, nubBy )
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{A few functions over HsSyn at RdrName}
+%* *
+%************************************************************************
+
+extractHsTyRdrNames finds the free variables of a HsType
+It's used when making the for-alls explicit.
+
+\begin{code}
+extractHsTyRdrTyVars :: LHsType RdrName -> [Located RdrName]
+extractHsTyRdrTyVars ty = nubBy eqLocated (extract_lty ty [])
+
+extractHsRhoRdrTyVars :: LHsContext RdrName -> LHsType RdrName -> [Located RdrName]
+-- This one takes the context and tau-part of a
+-- sigma type and returns their free type variables
+extractHsRhoRdrTyVars ctxt ty
+ = nubBy eqLocated $ extract_lctxt ctxt (extract_lty ty [])
+
+extract_lctxt ctxt acc = foldr (extract_pred . unLoc) acc (unLoc ctxt)
+
+extract_pred (HsClassP cls tys) acc = foldr extract_lty acc tys
+extract_pred (HsIParam n ty) acc = extract_lty ty acc
+
+extract_lty (L loc ty) acc
+ = case ty of
+ HsTyVar tv -> extract_tv loc tv acc
+ HsBangTy _ ty -> extract_lty ty acc
+ HsAppTy ty1 ty2 -> extract_lty ty1 (extract_lty ty2 acc)
+ HsListTy ty -> extract_lty ty acc
+ HsPArrTy ty -> extract_lty ty acc
+ HsTupleTy _ tys -> foldr extract_lty acc tys
+ HsFunTy ty1 ty2 -> extract_lty ty1 (extract_lty ty2 acc)
+ HsPredTy p -> extract_pred p acc
+ HsOpTy ty1 (L loc tv) ty2 -> extract_tv loc tv (extract_lty ty1 (extract_lty ty2 acc))
+ HsParTy ty -> extract_lty ty acc
+ HsNumTy num -> acc
+ HsSpliceTy _ -> acc -- Type splices mention no type variables
+ HsKindSig ty k -> extract_lty ty acc
+ HsForAllTy exp [] cx ty -> extract_lctxt cx (extract_lty ty acc)
+ HsForAllTy exp tvs cx ty -> acc ++ (filter ((`notElem` locals) . unLoc) $
+ extract_lctxt cx (extract_lty ty []))
+ where
+ locals = hsLTyVarNames tvs
+
+extract_tv :: SrcSpan -> RdrName -> [Located RdrName] -> [Located RdrName]
+extract_tv loc tv acc | isRdrTyVar tv = L loc tv : acc
+ | otherwise = acc
+
+extractGenericPatTyVars :: LHsBinds RdrName -> [Located RdrName]
+-- Get the type variables out of the type patterns in a bunch of
+-- possibly-generic bindings in a class declaration
+extractGenericPatTyVars binds
+ = nubBy eqLocated (foldrBag get [] binds)
+ where
+ get (L _ (FunBind { fun_matches = MatchGroup ms _ })) acc = foldr (get_m.unLoc) acc ms
+ get other acc = acc
+
+ get_m (Match (L _ (TypePat ty) : _) _ _) acc = extract_lty ty acc
+ get_m other acc = acc
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Construction functions for Rdr stuff}
+%* *
+%************************************************************************
+
+mkClassDecl builds a RdrClassDecl, filling in the names for tycon and datacon
+by deriving them from the name of the class. We fill in the names for the
+tycon and datacon corresponding to the class, by deriving them from the
+name of the class itself. This saves recording the names in the interface
+file (which would be equally good).
+
+Similarly for mkConDecl, mkClassOpSig and default-method names.
+
+ *** See "THE NAMING STORY" in HsDecls ****
+
+\begin{code}
+mkClassDecl (cxt, cname, tyvars) fds sigs mbinds
+ = ClassDecl { tcdCtxt = cxt, tcdLName = cname, tcdTyVars = tyvars,
+ tcdFDs = fds,
+ tcdSigs = sigs,
+ tcdMeths = mbinds
+ }
+
+mkTyData new_or_data (context, tname, tyvars) ksig data_cons maybe_deriv
+ = TyData { tcdND = new_or_data, tcdCtxt = context, tcdLName = tname,
+ tcdTyVars = tyvars, tcdCons = data_cons,
+ tcdKindSig = ksig, tcdDerivs = maybe_deriv }
+\end{code}
+
+\begin{code}
+mkHsNegApp :: LHsExpr RdrName -> HsExpr RdrName
+-- RdrName If the type checker sees (negate 3#) it will barf, because negate
+-- can't take an unboxed arg. But that is exactly what it will see when
+-- we write "-3#". So we have to do the negation right now!
+mkHsNegApp (L loc e) = f e
+ where f (HsLit (HsIntPrim i)) = HsLit (HsIntPrim (-i))
+ f (HsLit (HsFloatPrim i)) = HsLit (HsFloatPrim (-i))
+ f (HsLit (HsDoublePrim i)) = HsLit (HsDoublePrim (-i))
+ f expr = NegApp (L loc e) noSyntaxExpr
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[cvBinds-etc]{Converting to @HsBinds@, etc.}
+%* *
+%************************************************************************
+
+Function definitions are restructured here. Each is assumed to be recursive
+initially, and non recursive definitions are discovered by the dependency
+analyser.
+
+
+\begin{code}
+-- | Groups together bindings for a single function
+cvTopDecls :: OrdList (LHsDecl RdrName) -> [LHsDecl RdrName]
+cvTopDecls decls = go (fromOL decls)
+ where
+ go :: [LHsDecl RdrName] -> [LHsDecl RdrName]
+ go [] = []
+ go (L l (ValD b) : ds) = L l' (ValD b') : go ds'
+ where (L l' b', ds') = getMonoBind (L l b) ds
+ go (d : ds) = d : go ds
+
+cvBindGroup :: OrdList (LHsDecl RdrName) -> HsValBinds RdrName
+cvBindGroup binding
+ = case (cvBindsAndSigs binding) of { (mbs, sigs) ->
+ ValBindsIn mbs sigs
+ }
+
+cvBindsAndSigs :: OrdList (LHsDecl RdrName)
+ -> (Bag (LHsBind RdrName), [LSig RdrName])
+-- Input decls contain just value bindings and signatures
+cvBindsAndSigs fb = go (fromOL fb)
+ where
+ go [] = (emptyBag, [])
+ go (L l (SigD s) : ds) = (bs, L l s : ss)
+ where (bs,ss) = go ds
+ go (L l (ValD b) : ds) = (b' `consBag` bs, ss)
+ where (b',ds') = getMonoBind (L l b) ds
+ (bs,ss) = go ds'
+
+-----------------------------------------------------------------------------
+-- Group function bindings into equation groups
+
+getMonoBind :: LHsBind RdrName -> [LHsDecl RdrName]
+ -> (LHsBind RdrName, [LHsDecl RdrName])
+-- Suppose (b',ds') = getMonoBind b ds
+-- ds is a *reversed* list of parsed bindings
+-- b is a MonoBinds that has just been read off the front
+
+-- Then b' is the result of grouping more equations from ds that
+-- belong with b into a single MonoBinds, and ds' is the depleted
+-- list of parsed bindings.
+--
+-- No AndMonoBinds or EmptyMonoBinds here; just single equations
+
+getMonoBind (L loc bind@(FunBind { fun_id = L _ f, fun_matches = MatchGroup mtchs _ })) binds
+ | has_args mtchs
+ = go mtchs loc binds
+ where
+ go mtchs1 loc1 (L loc2 (ValD (FunBind { fun_id = L _ f2, fun_matches = MatchGroup mtchs2 _ })) : binds)
+ | f == f2 = go (mtchs2++mtchs1) loc binds
+ where loc = combineSrcSpans loc1 loc2
+ go mtchs1 loc binds
+ = (L loc (bind { fun_matches = mkMatchGroup (reverse mtchs1) }), binds)
+ -- Reverse the final matches, to get it back in the right order
+
+getMonoBind bind binds = (bind, binds)
+
+has_args ((L _ (Match args _ _)) : _) = not (null args)
+ -- Don't group together FunBinds if they have
+ -- no arguments. This is necessary now that variable bindings
+ -- with no arguments are now treated as FunBinds rather
+ -- than pattern bindings (tests/rename/should_fail/rnfail002).
+\end{code}
+
+\begin{code}
+findSplice :: [LHsDecl a] -> (HsGroup a, Maybe (SpliceDecl a, [LHsDecl a]))
+findSplice ds = addl emptyRdrGroup ds
+
+mkGroup :: [LHsDecl a] -> HsGroup a
+mkGroup ds = addImpDecls emptyRdrGroup ds
+
+addImpDecls :: HsGroup a -> [LHsDecl a] -> HsGroup a
+-- The decls are imported, and should not have a splice
+addImpDecls group decls = case addl group decls of
+ (group', Nothing) -> group'
+ other -> panic "addImpDecls"
+
+addl :: HsGroup a -> [LHsDecl a] -> (HsGroup a, Maybe (SpliceDecl a, [LHsDecl a]))
+ -- This stuff reverses the declarations (again) but it doesn't matter
+
+-- Base cases
+addl gp [] = (gp, Nothing)
+addl gp (L l d : ds) = add gp l d ds
+
+
+add :: HsGroup a -> SrcSpan -> HsDecl a -> [LHsDecl a]
+ -> (HsGroup a, Maybe (SpliceDecl a, [LHsDecl a]))
+
+add gp l (SpliceD e) ds = (gp, Just (e, ds))
+
+-- Class declarations: pull out the fixity signatures to the top
+add gp@(HsGroup {hs_tyclds = ts, hs_fixds = fs}) l (TyClD d) ds
+ | isClassDecl d =
+ let fsigs = [ L l f | L l (FixSig f) <- tcdSigs d ] in
+ addl (gp { hs_tyclds = L l d : ts, hs_fixds = fsigs ++ fs }) ds
+ | otherwise =
+ addl (gp { hs_tyclds = L l d : ts }) ds
+
+-- Signatures: fixity sigs go a different place than all others
+add gp@(HsGroup {hs_fixds = ts}) l (SigD (FixSig f)) ds
+ = addl (gp {hs_fixds = L l f : ts}) ds
+add gp@(HsGroup {hs_valds = ts}) l (SigD d) ds
+ = addl (gp {hs_valds = add_sig (L l d) ts}) ds
+
+-- Value declarations: use add_bind
+add gp@(HsGroup {hs_valds = ts}) l (ValD d) ds
+ = addl (gp { hs_valds = add_bind (L l d) ts }) ds
+
+-- The rest are routine
+add gp@(HsGroup {hs_instds = ts}) l (InstD d) ds
+ = addl (gp { hs_instds = L l d : ts }) ds
+add gp@(HsGroup {hs_defds = ts}) l (DefD d) ds
+ = addl (gp { hs_defds = L l d : ts }) ds
+add gp@(HsGroup {hs_fords = ts}) l (ForD d) ds
+ = addl (gp { hs_fords = L l d : ts }) ds
+add gp@(HsGroup {hs_depds = ts}) l (DeprecD d) ds
+ = addl (gp { hs_depds = L l d : ts }) ds
+add gp@(HsGroup {hs_ruleds = ts}) l (RuleD d) ds
+ = addl (gp { hs_ruleds = L l d : ts }) ds
+
+add_bind b (ValBindsIn bs sigs) = ValBindsIn (bs `snocBag` b) sigs
+add_sig s (ValBindsIn bs sigs) = ValBindsIn bs (s:sigs)
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[PrefixToHS-utils]{Utilities for conversion}
+%* *
+%************************************************************************
+
+
+\begin{code}
+-----------------------------------------------------------------------------
+-- mkPrefixCon
+
+-- When parsing data declarations, we sometimes inadvertently parse
+-- a constructor application as a type (eg. in data T a b = C a b `D` E a b)
+-- This function splits up the type application, adds any pending
+-- arguments, and converts the type constructor back into a data constructor.
+
+mkPrefixCon :: LHsType RdrName -> [LBangType RdrName]
+ -> P (Located RdrName, HsConDetails RdrName (LBangType RdrName))
+mkPrefixCon ty tys
+ = split ty tys
+ where
+ split (L _ (HsAppTy t u)) ts = split t (u : ts)
+ split (L l (HsTyVar tc)) ts = do data_con <- tyConToDataCon l tc
+ return (data_con, PrefixCon ts)
+ split (L l _) _ = parseError l "parse error in data/newtype declaration"
+
+mkRecCon :: Located RdrName -> [([Located RdrName], LBangType RdrName)]
+ -> P (Located RdrName, HsConDetails RdrName (LBangType RdrName))
+mkRecCon (L loc con) fields
+ = do data_con <- tyConToDataCon loc con
+ return (data_con, RecCon [ (l,t) | (ls,t) <- fields, l <- ls ])
+
+tyConToDataCon :: SrcSpan -> RdrName -> P (Located RdrName)
+tyConToDataCon loc tc
+ | isTcOcc (rdrNameOcc tc)
+ = return (L loc (setRdrNameSpace tc srcDataName))
+ | otherwise
+ = parseError loc (showSDoc (text "Not a constructor:" <+> quotes (ppr tc)))
+
+----------------------------------------------------------------------------
+-- Various Syntactic Checks
+
+checkInstType :: LHsType RdrName -> P (LHsType RdrName)
+checkInstType (L l t)
+ = case t of
+ HsForAllTy exp tvs ctxt ty -> do
+ dict_ty <- checkDictTy ty
+ return (L l (HsForAllTy exp tvs ctxt dict_ty))
+
+ HsParTy ty -> checkInstType ty
+
+ ty -> do dict_ty <- checkDictTy (L l ty)
+ return (L l (HsForAllTy Implicit [] (noLoc []) dict_ty))
+
+checkTyVars :: [LHsType RdrName] -> P [LHsTyVarBndr RdrName]
+checkTyVars tvs
+ = mapM chk tvs
+ where
+ -- Check that the name space is correct!
+ chk (L l (HsKindSig (L _ (HsTyVar tv)) k))
+ | isRdrTyVar tv = return (L l (KindedTyVar tv k))
+ chk (L l (HsTyVar tv))
+ | isRdrTyVar tv = return (L l (UserTyVar tv))
+ chk (L l other)
+ = parseError l "Type found where type variable expected"
+
+checkSynHdr :: LHsType RdrName -> P (Located RdrName, [LHsTyVarBndr RdrName])
+checkSynHdr ty = do { (_, tc, tvs) <- checkTyClHdr (noLoc []) ty
+ ; return (tc, tvs) }
+
+checkTyClHdr :: LHsContext RdrName -> LHsType RdrName
+ -> P (LHsContext RdrName, Located RdrName, [LHsTyVarBndr RdrName])
+-- The header of a type or class decl should look like
+-- (C a, D b) => T a b
+-- or T a b
+-- or a + b
+-- etc
+checkTyClHdr (L l cxt) ty
+ = do (tc, tvs) <- gol ty []
+ mapM_ chk_pred cxt
+ return (L l cxt, tc, tvs)
+ where
+ gol (L l ty) acc = go l ty acc
+
+ go l (HsTyVar tc) acc
+ | not (isRdrTyVar tc) = checkTyVars acc >>= \ tvs ->
+ return (L l tc, tvs)
+ go l (HsOpTy t1 tc t2) acc = checkTyVars (t1:t2:acc) >>= \ tvs ->
+ return (tc, tvs)
+ go l (HsParTy ty) acc = gol ty acc
+ go l (HsAppTy t1 t2) acc = gol t1 (t2:acc)
+ go l other acc = parseError l "Malformed LHS to type of class declaration"
+
+ -- The predicates in a type or class decl must all
+ -- be HsClassPs. They need not all be type variables,
+ -- even in Haskell 98. E.g. class (Monad m, Monad (t m)) => MonadT t m
+ chk_pred (L l (HsClassP _ args)) = return ()
+ chk_pred (L l _)
+ = parseError l "Malformed context in type or class declaration"
+
+
+checkContext :: LHsType RdrName -> P (LHsContext RdrName)
+checkContext (L l t)
+ = check t
+ where
+ check (HsTupleTy _ ts) -- (Eq a, Ord b) shows up as a tuple type
+ = do ctx <- mapM checkPred ts
+ return (L l ctx)
+
+ check (HsParTy ty) -- to be sure HsParTy doesn't get into the way
+ = check (unLoc ty)
+
+ check (HsTyVar t) -- Empty context shows up as a unit type ()
+ | t == getRdrName unitTyCon = return (L l [])
+
+ check t
+ = do p <- checkPred (L l t)
+ return (L l [p])
+
+
+checkPred :: LHsType RdrName -> P (LHsPred RdrName)
+-- Watch out.. in ...deriving( Show )... we use checkPred on
+-- the list of partially applied predicates in the deriving,
+-- so there can be zero args.
+checkPred (L spn (HsPredTy (HsIParam n ty)))
+ = return (L spn (HsIParam n ty))
+checkPred (L spn ty)
+ = check spn ty []
+ where
+ checkl (L l ty) args = check l ty args
+
+ check _loc (HsTyVar t) args | not (isRdrTyVar t)
+ = return (L spn (HsClassP t args))
+ check _loc (HsAppTy l r) args = checkl l (r:args)
+ check _loc (HsOpTy l (L loc tc) r) args = check loc (HsTyVar tc) (l:r:args)
+ check _loc (HsParTy t) args = checkl t args
+ check loc _ _ = parseError loc "malformed class assertion"
+
+checkDictTy :: LHsType RdrName -> P (LHsType RdrName)
+checkDictTy (L spn ty) = check ty []
+ where
+ check (HsTyVar t) args | not (isRdrTyVar t)
+ = return (L spn (HsPredTy (HsClassP t args)))
+ check (HsAppTy l r) args = check (unLoc l) (r:args)
+ check (HsParTy t) args = check (unLoc t) args
+ check _ _ = parseError spn "Malformed context in instance header"
+
+---------------------------------------------------------------------------
+-- Checking statements in a do-expression
+-- We parse do { e1 ; e2 ; }
+-- as [ExprStmt e1, ExprStmt e2]
+-- checkDo (a) checks that the last thing is an ExprStmt
+-- (b) returns it separately
+-- same comments apply for mdo as well
+
+checkDo = checkDoMDo "a " "'do'"
+checkMDo = checkDoMDo "an " "'mdo'"
+
+checkDoMDo :: String -> String -> SrcSpan -> [LStmt RdrName] -> P ([LStmt RdrName], LHsExpr RdrName)
+checkDoMDo pre nm loc [] = parseError loc ("Empty " ++ nm ++ " construct")
+checkDoMDo pre nm loc ss = do
+ check ss
+ where
+ check [L l (ExprStmt e _ _)] = return ([], e)
+ check [L l _] = parseError l ("The last statement in " ++ pre ++ nm ++
+ " construct must be an expression")
+ check (s:ss) = do
+ (ss',e') <- check ss
+ return ((s:ss'),e')
+
+-- -------------------------------------------------------------------------
+-- Checking Patterns.
+
+-- We parse patterns as expressions and check for valid patterns below,
+-- converting the expression into a pattern at the same time.
+
+checkPattern :: LHsExpr RdrName -> P (LPat RdrName)
+checkPattern e = checkLPat e
+
+checkPatterns :: [LHsExpr RdrName] -> P [LPat RdrName]
+checkPatterns es = mapM checkPattern es
+
+checkLPat :: LHsExpr RdrName -> P (LPat RdrName)
+checkLPat e@(L l _) = checkPat l e []
+
+checkPat :: SrcSpan -> LHsExpr RdrName -> [LPat RdrName] -> P (LPat RdrName)
+checkPat loc (L l (HsVar c)) args
+ | isRdrDataCon c = return (L loc (ConPatIn (L l c) (PrefixCon args)))
+checkPat loc e args -- OK to let this happen even if bang-patterns
+ -- are not enabled, because there is no valid
+ -- non-bang-pattern parse of (C ! e)
+ | Just (e', args') <- splitBang e
+ = do { args'' <- checkPatterns args'
+ ; checkPat loc e' (args'' ++ args) }
+checkPat loc (L _ (HsApp f x)) args
+ = do { x <- checkLPat x; checkPat loc f (x:args) }
+checkPat loc (L _ e) []
+ = do { p <- checkAPat loc e; return (L loc p) }
+checkPat loc pat _some_args
+ = patFail loc
+
+checkAPat loc e = case e of
+ EWildPat -> return (WildPat placeHolderType)
+ HsVar x | isQual x -> parseError loc ("Qualified variable in pattern: "
+ ++ showRdrName x)
+ | otherwise -> return (VarPat x)
+ HsLit l -> return (LitPat l)
+
+ -- Overloaded numeric patterns (e.g. f 0 x = x)
+ -- Negation is recorded separately, so that the literal is zero or +ve
+ -- NB. Negative *primitive* literals are already handled by
+ -- RdrHsSyn.mkHsNegApp
+ HsOverLit pos_lit -> return (mkNPat pos_lit Nothing)
+ NegApp (L _ (HsOverLit pos_lit)) _
+ -> return (mkNPat pos_lit (Just noSyntaxExpr))
+
+ SectionR (L _ (HsVar bang)) e
+ | bang == bang_RDR -> checkLPat e >>= (return . BangPat)
+ ELazyPat e -> checkLPat e >>= (return . LazyPat)
+ EAsPat n e -> checkLPat e >>= (return . AsPat n)
+ ExprWithTySig e t -> checkLPat e >>= \e ->
+ -- Pattern signatures are parsed as sigtypes,
+ -- but they aren't explicit forall points. Hence
+ -- we have to remove the implicit forall here.
+ let t' = case t of
+ L _ (HsForAllTy Implicit _ (L _ []) ty) -> ty
+ other -> other
+ in
+ return (SigPatIn e t')
+
+ -- n+k patterns
+ OpApp (L nloc (HsVar n)) (L _ (HsVar plus)) _
+ (L _ (HsOverLit lit@(HsIntegral _ _)))
+ | plus == plus_RDR
+ -> return (mkNPlusKPat (L nloc n) lit)
+
+ OpApp l op fix r -> checkLPat l >>= \l ->
+ checkLPat r >>= \r ->
+ case op of
+ L cl (HsVar c) | isDataOcc (rdrNameOcc c)
+ -> return (ConPatIn (L cl c) (InfixCon l r))
+ _ -> patFail loc
+
+ HsPar e -> checkLPat e >>= (return . ParPat)
+ ExplicitList _ es -> mapM (\e -> checkLPat e) es >>= \ps ->
+ return (ListPat ps placeHolderType)
+ ExplicitPArr _ es -> mapM (\e -> checkLPat e) es >>= \ps ->
+ return (PArrPat ps placeHolderType)
+
+ ExplicitTuple es b -> mapM (\e -> checkLPat e) es >>= \ps ->
+ return (TuplePat ps b placeHolderType)
+
+ RecordCon c _ fs -> mapM checkPatField fs >>= \fs ->
+ return (ConPatIn c (RecCon fs))
+-- Generics
+ HsType ty -> return (TypePat ty)
+ _ -> patFail loc
+
+plus_RDR, bang_RDR :: RdrName
+plus_RDR = mkUnqual varName FSLIT("+") -- Hack
+bang_RDR = mkUnqual varName FSLIT("!") -- Hack
+
+checkPatField :: (Located RdrName, LHsExpr RdrName) -> P (Located RdrName, LPat RdrName)
+checkPatField (n,e) = do
+ p <- checkLPat e
+ return (n,p)
+
+patFail loc = parseError loc "Parse error in pattern"
+
+
+---------------------------------------------------------------------------
+-- Check Equation Syntax
+
+checkValDef :: LHsExpr RdrName
+ -> Maybe (LHsType RdrName)
+ -> Located (GRHSs RdrName)
+ -> P (HsBind RdrName)
+
+checkValDef lhs opt_sig grhss
+ = do { mb_fun <- isFunLhs lhs
+ ; case mb_fun of
+ Just (fun, is_infix, pats) -> checkFunBind (getLoc lhs)
+ fun is_infix pats opt_sig grhss
+ Nothing -> checkPatBind lhs grhss }
+
+checkFunBind lhs_loc fun is_infix pats opt_sig (L rhs_span grhss)
+ | isQual (unLoc fun)
+ = parseError (getLoc fun) ("Qualified name in function definition: " ++
+ showRdrName (unLoc fun))
+ | otherwise
+ = do ps <- checkPatterns pats
+ let match_span = combineSrcSpans lhs_loc rhs_span
+ matches = mkMatchGroup [L match_span (Match ps opt_sig grhss)]
+ return (FunBind { fun_id = fun, fun_infix = is_infix, fun_matches = matches,
+ fun_co_fn = idCoercion, bind_fvs = placeHolderNames })
+ -- The span of the match covers the entire equation.
+ -- That isn't quite right, but it'll do for now.
+
+checkPatBind lhs (L _ grhss)
+ = do { lhs <- checkPattern lhs
+ ; return (PatBind lhs grhss placeHolderType placeHolderNames) }
+
+checkValSig
+ :: LHsExpr RdrName
+ -> LHsType RdrName
+ -> P (Sig RdrName)
+checkValSig (L l (HsVar v)) ty
+ | isUnqual v && not (isDataOcc (rdrNameOcc v))
+ = return (TypeSig (L l v) ty)
+checkValSig (L l other) ty
+ = parseError l "Invalid type signature"
+
+mkGadtDecl
+ :: Located RdrName
+ -> LHsType RdrName -- assuming HsType
+ -> ConDecl RdrName
+mkGadtDecl name (L _ (HsForAllTy _ qvars cxt ty)) = ConDecl
+ { con_name = name
+ , con_explicit = Implicit
+ , con_qvars = qvars
+ , con_cxt = cxt
+ , con_details = PrefixCon args
+ , con_res = ResTyGADT res
+ }
+ where
+ (args, res) = splitHsFunType ty
+mkGadtDecl name ty = ConDecl
+ { con_name = name
+ , con_explicit = Implicit
+ , con_qvars = []
+ , con_cxt = noLoc []
+ , con_details = PrefixCon args
+ , con_res = ResTyGADT res
+ }
+ where
+ (args, res) = splitHsFunType ty
+
+-- A variable binding is parsed as a FunBind.
+
+
+ -- The parser left-associates, so there should
+ -- not be any OpApps inside the e's
+splitBang :: LHsExpr RdrName -> Maybe (LHsExpr RdrName, [LHsExpr RdrName])
+-- Splits (f ! g a b) into (f, [(! g), a, g])
+splitBang (L loc (OpApp l_arg bang@(L loc' (HsVar op)) _ r_arg))
+ | op == bang_RDR = Just (l_arg, L loc (SectionR bang arg1) : argns)
+ where
+ (arg1,argns) = split_bang r_arg []
+ split_bang (L _ (HsApp f e)) es = split_bang f (e:es)
+ split_bang e es = (e,es)
+splitBang other = Nothing
+
+isFunLhs :: LHsExpr RdrName
+ -> P (Maybe (Located RdrName, Bool, [LHsExpr RdrName]))
+-- Just (fun, is_infix, arg_pats) if e is a function LHS
+isFunLhs e = go e []
+ where
+ go (L loc (HsVar f)) es
+ | not (isRdrDataCon f) = return (Just (L loc f, False, es))
+ go (L _ (HsApp f e)) es = go f (e:es)
+ go (L _ (HsPar e)) es@(_:_) = go e es
+ go e@(L loc (OpApp l (L loc' (HsVar op)) fix r)) es
+ | Just (e',es') <- splitBang e
+ = do { bang_on <- extension bangPatEnabled
+ ; if bang_on then go e' (es' ++ es)
+ else return (Just (L loc' op, True, (l:r:es))) }
+ -- No bangs; behave just like the next case
+ | not (isRdrDataCon op)
+ = return (Just (L loc' op, True, (l:r:es)))
+ | otherwise
+ = do { mb_l <- go l es
+ ; case mb_l of
+ Just (op', True, j : k : es')
+ -> return (Just (op', True, j : op_app : es'))
+ where
+ op_app = L loc (OpApp k (L loc' (HsVar op)) fix r)
+ _ -> return Nothing }
+ go _ _ = return Nothing
+
+---------------------------------------------------------------------------
+-- Miscellaneous utilities
+
+checkPrecP :: Located Int -> P Int
+checkPrecP (L l i)
+ | 0 <= i && i <= maxPrecedence = return i
+ | otherwise = parseError l "Precedence out of range"
+
+mkRecConstrOrUpdate
+ :: LHsExpr RdrName
+ -> SrcSpan
+ -> HsRecordBinds RdrName
+ -> P (HsExpr RdrName)
+
+mkRecConstrOrUpdate (L l (HsVar c)) loc fs | isRdrDataCon c
+ = return (RecordCon (L l c) noPostTcExpr fs)
+mkRecConstrOrUpdate exp loc fs@(_:_)
+ = return (RecordUpd exp fs placeHolderType placeHolderType)
+mkRecConstrOrUpdate _ loc []
+ = parseError loc "Empty record update"
+
+mkInlineSpec :: Maybe Activation -> Bool -> InlineSpec
+-- The Maybe is becuase the user can omit the activation spec (and usually does)
+mkInlineSpec Nothing True = alwaysInlineSpec -- INLINE
+mkInlineSpec Nothing False = neverInlineSpec -- NOINLINE
+mkInlineSpec (Just act) inl = Inline act inl
+
+
+-----------------------------------------------------------------------------
+-- utilities for foreign declarations
+
+-- supported calling conventions
+--
+data CallConv = CCall CCallConv -- ccall or stdcall
+ | DNCall -- .NET
+
+-- construct a foreign import declaration
+--
+mkImport :: CallConv
+ -> Safety
+ -> (Located FastString, Located RdrName, LHsType RdrName)
+ -> P (HsDecl RdrName)
+mkImport (CCall cconv) safety (entity, v, ty) = do
+ importSpec <- parseCImport entity cconv safety v
+ return (ForD (ForeignImport v ty importSpec False))
+mkImport (DNCall ) _ (entity, v, ty) = do
+ spec <- parseDImport entity
+ return $ ForD (ForeignImport v ty (DNImport spec) False)
+
+-- parse the entity string of a foreign import declaration for the `ccall' or
+-- `stdcall' calling convention'
+--
+parseCImport :: Located FastString
+ -> CCallConv
+ -> Safety
+ -> Located RdrName
+ -> P ForeignImport
+parseCImport (L loc entity) cconv safety v
+ -- FIXME: we should allow white space around `dynamic' and `wrapper' -=chak
+ | entity == FSLIT ("dynamic") =
+ return $ CImport cconv safety nilFS nilFS (CFunction DynamicTarget)
+ | entity == FSLIT ("wrapper") =
+ return $ CImport cconv safety nilFS nilFS CWrapper
+ | otherwise = parse0 (unpackFS entity)
+ where
+ -- using the static keyword?
+ parse0 (' ': rest) = parse0 rest
+ parse0 ('s':'t':'a':'t':'i':'c':rest) = parse1 rest
+ parse0 rest = parse1 rest
+ -- check for header file name
+ parse1 "" = parse4 "" nilFS False nilFS
+ parse1 (' ':rest) = parse1 rest
+ parse1 str@('&':_ ) = parse2 str nilFS
+ parse1 str@('[':_ ) = parse3 str nilFS False
+ parse1 str
+ | ".h" `isSuffixOf` first = parse2 rest (mkFastString first)
+ | otherwise = parse4 str nilFS False nilFS
+ where
+ (first, rest) = break (\c -> c == ' ' || c == '&' || c == '[') str
+ -- check for address operator (indicating a label import)
+ parse2 "" header = parse4 "" header False nilFS
+ parse2 (' ':rest) header = parse2 rest header
+ parse2 ('&':rest) header = parse3 rest header True
+ parse2 str@('[':_ ) header = parse3 str header False
+ parse2 str header = parse4 str header False nilFS
+ -- check for library object name
+ parse3 (' ':rest) header isLbl = parse3 rest header isLbl
+ parse3 ('[':rest) header isLbl =
+ case break (== ']') rest of
+ (lib, ']':rest) -> parse4 rest header isLbl (mkFastString lib)
+ _ -> parseError loc "Missing ']' in entity"
+ parse3 str header isLbl = parse4 str header isLbl nilFS
+ -- check for name of C function
+ parse4 "" header isLbl lib = build (mkExtName (unLoc v)) header isLbl lib
+ parse4 (' ':rest) header isLbl lib = parse4 rest header isLbl lib
+ parse4 str header isLbl lib
+ | all (== ' ') rest = build (mkFastString first) header isLbl lib
+ | otherwise = parseError loc "Malformed entity string"
+ where
+ (first, rest) = break (== ' ') str
+ --
+ build cid header False lib = return $
+ CImport cconv safety header lib (CFunction (StaticTarget cid))
+ build cid header True lib = return $
+ CImport cconv safety header lib (CLabel cid )
+
+--
+-- Unravel a dotnet spec string.
+--
+parseDImport :: Located FastString -> P DNCallSpec
+parseDImport (L loc entity) = parse0 comps
+ where
+ comps = words (unpackFS entity)
+
+ parse0 [] = d'oh
+ parse0 (x : xs)
+ | x == "static" = parse1 True xs
+ | otherwise = parse1 False (x:xs)
+
+ parse1 _ [] = d'oh
+ parse1 isStatic (x:xs)
+ | x == "method" = parse2 isStatic DNMethod xs
+ | x == "field" = parse2 isStatic DNField xs
+ | x == "ctor" = parse2 isStatic DNConstructor xs
+ parse1 isStatic xs = parse2 isStatic DNMethod xs
+
+ parse2 _ _ [] = d'oh
+ parse2 isStatic kind (('[':x):xs) =
+ case x of
+ [] -> d'oh
+ vs | last vs == ']' -> parse3 isStatic kind (init vs) xs
+ parse2 isStatic kind xs = parse3 isStatic kind "" xs
+
+ parse3 isStatic kind assem [x] =
+ return (DNCallSpec isStatic kind assem x
+ -- these will be filled in once known.
+ (error "FFI-dotnet-args")
+ (error "FFI-dotnet-result"))
+ parse3 _ _ _ _ = d'oh
+
+ d'oh = parseError loc "Malformed entity string"
+
+-- construct a foreign export declaration
+--
+mkExport :: CallConv
+ -> (Located FastString, Located RdrName, LHsType RdrName)
+ -> P (HsDecl RdrName)
+mkExport (CCall cconv) (L loc entity, v, ty) = return $
+ ForD (ForeignExport v ty (CExport (CExportStatic entity' cconv)) False)
+ where
+ entity' | nullFS entity = mkExtName (unLoc v)
+ | otherwise = entity
+mkExport DNCall (L loc entity, v, ty) =
+ parseError (getLoc v){-TODO: not quite right-}
+ "Foreign export is not yet supported for .NET"
+
+-- Supplying the ext_name in a foreign decl is optional; if it
+-- isn't there, the Haskell name is assumed. Note that no transformation
+-- of the Haskell name is then performed, so if you foreign export (++),
+-- it's external name will be "++". Too bad; it's important because we don't
+-- want z-encoding (e.g. names with z's in them shouldn't be doubled)
+--
+mkExtName :: RdrName -> CLabelString
+mkExtName rdrNm = mkFastString (occNameString (rdrNameOcc rdrNm))
+\end{code}
+
+
+-----------------------------------------------------------------------------
+-- Misc utils
+
+\begin{code}
+showRdrName :: RdrName -> String
+showRdrName r = showSDoc (ppr r)
+
+parseError :: SrcSpan -> String -> P a
+parseError span s = failSpanMsgP span s
+\end{code}
diff --git a/compiler/parser/cutils.c b/compiler/parser/cutils.c
new file mode 100644
index 0000000000..08832f298d
--- /dev/null
+++ b/compiler/parser/cutils.c
@@ -0,0 +1,70 @@
+/*
+These utility routines are used various
+places in the GHC library.
+*/
+
+/* For GHC 4.08, we are relying on the fact that RtsFlags has
+ * compatible layout with the current version, because we're
+ * #including the current version of RtsFlags.h below. 4.08 didn't
+ * ship with its own RtsFlags.h, unfortunately. For later GHC
+ * versions, we #include the correct RtsFlags.h.
+ */
+#if __GLASGOW_HASKELL__ < 502
+#include "../includes/Rts.h"
+#include "../includes/RtsFlags.h"
+#else
+#include "Rts.h"
+#include "RtsFlags.h"
+#endif
+
+#include "HsFFI.h"
+
+#include <string.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+/*
+Calling 'strlen' and 'memcpy' directly gives problems with GCC's inliner,
+and causes gcc to require too many registers on x84
+*/
+
+HsInt
+ghc_strlen( HsAddr a )
+{
+ return (strlen((char *)a));
+}
+
+HsInt
+ghc_memcmp( HsAddr a1, HsAddr a2, HsInt len )
+{
+ return (memcmp((char *)a1, a2, len));
+}
+
+HsInt
+ghc_memcmp_off( HsAddr a1, HsInt i, HsAddr a2, HsInt len )
+{
+ return (memcmp((char *)a1 + i, a2, len));
+}
+
+void
+enableTimingStats( void ) /* called from the driver */
+{
+#if __GLASGOW_HASKELL__ >= 411
+ RtsFlags.GcFlags.giveStats = ONELINE_GC_STATS;
+#endif
+ /* ignored when bootstrapping with an older GHC */
+}
+
+void
+setHeapSize( HsInt size )
+{
+ RtsFlags.GcFlags.heapSizeSuggestion = size / BLOCK_SIZE;
+ if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+ RtsFlags.GcFlags.heapSizeSuggestion > RtsFlags.GcFlags.maxHeapSize) {
+ RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
+ }
+}
+
+
diff --git a/compiler/parser/cutils.h b/compiler/parser/cutils.h
new file mode 100644
index 0000000000..c7c1867ded
--- /dev/null
+++ b/compiler/parser/cutils.h
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * Utility C functions.
+ *
+ * -------------------------------------------------------------------------- */
+
+#include "HsFFI.h"
+
+// Out-of-line string functions, see PrimPacked.lhs
+HsInt ghc_strlen( HsAddr a );
+HsInt ghc_memcmp( HsAddr a1, HsAddr a2, HsInt len );
+HsInt ghc_memcmp_off( HsAddr a1, HsInt i, HsAddr a2, HsInt len );
+
+
+void enableTimingStats( void );
+void setHeapSize( HsInt size );
diff --git a/compiler/parser/hschooks.c b/compiler/parser/hschooks.c
new file mode 100644
index 0000000000..f3e7447a49
--- /dev/null
+++ b/compiler/parser/hschooks.c
@@ -0,0 +1,55 @@
+/*
+These routines customise the error messages
+for various bits of the RTS. They are linked
+in instead of the defaults.
+*/
+
+/* For GHC 4.08, we are relying on the fact that RtsFlags has
+ * compatible layout with the current version, because we're
+ * #including the current version of RtsFlags.h below. 4.08 didn't
+ * ship with its own RtsFlags.h, unfortunately. For later GHC
+ * versions, we #include the correct RtsFlags.h.
+ */
+#if __GLASGOW_HASKELL__ < 502
+#include "../includes/Rts.h"
+#include "../includes/RtsFlags.h"
+#else
+#include "Rts.h"
+#include "RtsFlags.h"
+#endif
+
+#include "HsFFI.h"
+
+#include <string.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+void
+defaultsHook (void)
+{
+ RtsFlags.GcFlags.heapSizeSuggestion = 6*1024*1024 / BLOCK_SIZE;
+ RtsFlags.GcFlags.maxStkSize = 8*1024*1024 / sizeof(W_);
+#if __GLASGOW_HASKELL__ >= 411
+ /* GHC < 4.11 didn't have these */
+ RtsFlags.GcFlags.giveStats = COLLECT_GC_STATS;
+ RtsFlags.GcFlags.statsFile = stderr;
+#endif
+}
+
+void
+OutOfHeapHook (unsigned long request_size/* always zero these days */,
+ unsigned long heap_size)
+ /* both in bytes */
+{
+ fprintf(stderr, "GHC's heap exhausted: current limit is %lu bytes;\nUse the `-M<size>' option to increase the total heap size.\n",
+ heap_size);
+}
+
+void
+StackOverflowHook (unsigned long stack_size) /* in bytes */
+{
+ fprintf(stderr, "GHC stack-space overflow: current limit is %ld bytes.\nUse the `-K<size>' option to increase it.\n", stack_size);
+}
+
diff --git a/compiler/parser/hschooks.h b/compiler/parser/hschooks.h
new file mode 100644
index 0000000000..4ce1c0f93d
--- /dev/null
+++ b/compiler/parser/hschooks.h
@@ -0,0 +1,9 @@
+/* -----------------------------------------------------------------------------
+ * $Id: hschooks.h,v 1.4 2002/04/22 14:54:10 simonmar Exp $
+ *
+ * Hooks into the RTS from the compiler.
+ *
+ * -------------------------------------------------------------------------- */
+
+#include "HsFFI.h"
+