diff options
| author | simonpj <unknown> | 1996-12-19 18:36:20 +0000 |
|---|---|---|
| committer | simonpj <unknown> | 1996-12-19 18:36:20 +0000 |
| commit | bb521c6bba76f19474f12195b990b29eda66a4e8 (patch) | |
| tree | fecb11771c7d9f25634e6bd5857c991686707b8d /ghc/compiler/reader | |
| parent | c3e7e772db4fbc7171de7b7e98d578ab9cff167c (diff) | |
| download | haskell-bb521c6bba76f19474f12195b990b29eda66a4e8.tar.gz | |
[project @ 1996-12-19 18:35:23 by simonpj]
Adding and removing files
Diffstat (limited to 'ghc/compiler/reader')
| -rw-r--r-- | ghc/compiler/reader/Lex.lhs | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/ghc/compiler/reader/Lex.lhs b/ghc/compiler/reader/Lex.lhs new file mode 100644 index 0000000000..a353f79eca --- /dev/null +++ b/ghc/compiler/reader/Lex.lhs @@ -0,0 +1,372 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996 +% +\section[Lexical analysis]{Lexical analysis} + +\begin{code} +#include "HsVersions.h" + +module Lex ( + + isLexCon, isLexVar, isLexId, isLexSym, + isLexConId, isLexConSym, isLexVarId, isLexVarSym, + mkTupNameStr, + + -- Monad for parser + IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError + + ) where + + +IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper)) + +import Demand ( Demand {- instance Read -} ) +import FiniteMap ( FiniteMap, listToFM, lookupFM ) +import Maybes ( Maybe(..), MaybeErr(..) ) +import Pretty +import CharSeq ( CSeq ) +import ErrUtils ( Error(..) ) +import Outputable ( Outputable(..) ) +import PprStyle ( PprStyle(..) ) +import Util ( nOfThem, panic ) + +\end{code} + +%************************************************************************ +%* * +\subsection{Lexical categories} +%* * +%************************************************************************ + +These functions test strings to see if they fit the lexical categories +defined in the Haskell report. Normally applied as in e.g. @isCon +(getLocalName foo)@. + +\begin{code} +isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym, + isLexVarId, isLexVarSym :: FAST_STRING -> Bool + +isLexCon cs = isLexConId cs || isLexConSym cs +isLexVar cs = isLexVarId cs || isLexVarSym cs + +isLexId cs = isLexConId cs || isLexVarId cs +isLexSym cs = isLexConSym cs || isLexVarSym cs + +------------- + +isLexConId cs + | _NULL_ cs = False + | cs == SLIT("[]") = True + | c == '(' = True -- (), (,), (,,), ... + | otherwise = isUpper c || isUpperISO c + where + c = _HEAD_ cs + +isLexVarId cs + | _NULL_ cs = False + | otherwise = isLower c || isLowerISO c + where + c = _HEAD_ cs + +isLexConSym cs + | _NULL_ cs = False + | otherwise = c == ':' + || cs == SLIT("->") + where + c = _HEAD_ cs + +isLexVarSym cs + | _NULL_ cs = False + | otherwise = isSymbolASCII c + || isSymbolISO c + where + c = _HEAD_ cs + +------------- +isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-" +isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf]) +isUpperISO c = 0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c +isLowerISO c = 0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c +\end{code} + + +%************************************************************************ +%* * +\subsection{Tuple strings -- ugh!} +%* * +%************************************************************************ + +\begin{code} +mkTupNameStr 0 = SLIT("()") +mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???" +mkTupNameStr 2 = _PK_ "(,)" -- not strictly necessary +mkTupNameStr 3 = _PK_ "(,,)" -- ditto +mkTupNameStr 4 = _PK_ "(,,,)" -- ditto +mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")") +\end{code} + + + +%************************************************************************ +%* * +\subsection{Data types} +%* * +%************************************************************************ + +\begin{code} +data IfaceToken + = ITinterface -- keywords + | ITusages + | ITversions + | ITexports + | ITinstance_modules + | ITinstances + | ITfixities + | ITdeclarations + | ITpragmas + | ITdata + | ITtype + | ITnewtype + | ITderiving + | ITclass + | ITwhere + | ITinstance + | ITinfixl + | ITinfixr + | ITinfix + | ITforall + | ITbang -- magic symbols + | ITvbar + | ITdcolon + | ITcomma + | ITdarrow + | ITdotdot + | ITequal + | ITocurly + | ITdccurly + | ITdocurly + | ITobrack + | IToparen + | ITrarrow + | ITccurly + | ITcbrack + | ITcparen + | ITsemi + | ITinteger Integer -- numbers and names + | ITvarid FAST_STRING + | ITconid FAST_STRING + | ITvarsym FAST_STRING + | ITconsym FAST_STRING + | ITqvarid (FAST_STRING,FAST_STRING) + | ITqconid (FAST_STRING,FAST_STRING) + | ITqvarsym (FAST_STRING,FAST_STRING) + | ITqconsym (FAST_STRING,FAST_STRING) + + -- Stuff for reading unfoldings + | ITarity | ITstrict | ITunfold + | ITdemand [Demand] | ITbottom + | ITlam | ITbiglam | ITcase | ITlet | ITletrec | ITin | ITof + | ITcoerce_in | ITcoerce_out + | ITchar Char | ITstring FAST_STRING + deriving Text -- debugging +\end{code} + +%************************************************************************ +%* * +\subsection{The lexical analyser} +%* * +%************************************************************************ + +\begin{code} +lexIface :: String -> [IfaceToken] + +lexIface input + = _scc_ "Lexer" + case input of + [] -> [] + + -- whitespace and comments + ' ' : cs -> lexIface cs + '\t' : cs -> lexIface cs + '\n' : cs -> lexIface cs + '-' : '-' : cs -> lex_comment cs + +-- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake? +-- '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs + + '(' : '.' : '.' : ')' : cs -> ITdotdot : lexIface cs + '{' : cs -> ITocurly : lexIface cs + '}' : cs -> ITccurly : lexIface cs + '(' : ',' : cs -> lex_tuple Nothing cs + '(' : ')' : cs -> ITconid SLIT("()") : lexIface cs + '(' : cs -> IToparen : lexIface cs + ')' : cs -> ITcparen : lexIface cs + '[' : ']' : cs -> ITconid SLIT("[]") : lexIface cs + '[' : cs -> ITobrack : lexIface cs + ']' : cs -> ITcbrack : lexIface cs + ',' : cs -> ITcomma : lexIface cs + ':' : ':' : cs -> ITdcolon : lexIface cs + ';' : cs -> ITsemi : lexIface cs + '\"' : cs -> case read input of + ((str, rest) : _) -> ITstring (_PK_ (str::String)) : lexIface rest + '\'' : cs -> case read input of + ((ch, rest) : _) -> ITchar ch : lexIface rest + + '_' : 'S' : '_' : cs -> ITstrict : lex_demand cs + '_' : cs -> lex_keyword cs + + c : cs | isDigit c -> lex_num input + | otherwise -> lex_id input + + other -> error ("lexing:"++other) + where + lex_comment str + = case (span ((/=) '\n') str) of { (junk, rest) -> + lexIface rest } + + ------------------ + lex_demand (c:cs) | isSpace c = lex_demand cs + | otherwise = case readList (c:cs) of + ((demand,rest) : _) -> ITdemand demand : lexIface rest + ----------- + lex_num str + = case (span isDigit str) of { (num, rest) -> + ITinteger (read num) : lexIface rest } + + ------------ + lex_keyword str + = case (span is_kwd_mod_char str) of { (kw, rest) -> + case (lookupFM ifaceKeywordsFM kw) of + Nothing -> panic ("lex_keyword:"++str) + Just xx -> xx : lexIface rest + } + + is_kwd_mod_char '_' = True + is_kwd_mod_char c = isAlphanum c + + ----------- + lex_tuple module_dot orig_cs = go 2 orig_cs + where + go n (',':cs) = go (n+1) cs + go n (')':cs) = end_lex_id module_dot (ITconid (mkTupNameStr n)) cs + go n other = panic ("lex_tuple" ++ orig_cs) + + -- NB: ':' isn't valid inside an identifier, only at the start. + -- otherwise we get confused by a::t! + is_id_char c = isAlphanum c || c `elem` "_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic + + lex_id cs = go [] cs + where + go xs (f :cs) | is_kwd_mod_char f = go (f : xs) cs + go xs ('.':cs) | not (null xs) = lex_id2 (Just (_PK_ (reverse xs))) [] cs + go xs cs = lex_id2 Nothing xs cs + + -- Dealt with the Module.part + lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs + lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs + lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs + lex_id2 module_dot [] (':' : cs) = lex_id3 module_dot [':'] cs + lex_id2 module_dot xs cs = lex_id3 module_dot xs cs + + -- Dealt with [], (), : special cases + lex_id3 module_dot xs (f:cs) | is_id_char f = lex_id3 module_dot (f : xs) cs + + lex_id3 Nothing xs rest = case lookupFM haskellKeywordsFM rxs of + Just kwd_token -> kwd_token : lexIface rest + other -> (mk_var_token rxs) : lexIface rest + where + rxs = reverse xs + + lex_id3 (Just m) xs rest = end_lex_id (Just m) (mk_var_token (reverse xs)) rest + + mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n + | f == ':' = ITconsym n + | isAlpha f = ITvarid n + | otherwise = ITvarsym n + where + n = _PK_ xs + + end_lex_id (Just m) (ITconid n) cs = ITqconid (m,n) : lexIface cs + end_lex_id (Just m) (ITvarid n) cs = ITqvarid (m,n) : lexIface cs + end_lex_id (Just m) (ITconsym n) cs = ITqconsym (m,n): lexIface cs + end_lex_id (Just m) (ITvarsym n) cs = ITqvarsym (m,n): lexIface cs + end_lex_id (Just m) ITbang cs = ITqvarsym (m,SLIT("!")) : lexIface cs + end_lex_id (Just m) token cs = panic ("end_lex_id:" ++ show token) + end_lex_id Nothing token cs = token : lexIface cs + + ------------ + ifaceKeywordsFM :: FiniteMap String IfaceToken + ifaceKeywordsFM = listToFM [ + ("interface_", ITinterface) + ,("usages_", ITusages) + ,("versions_", ITversions) + ,("exports_", ITexports) + ,("instance_modules_", ITinstance_modules) + ,("instances_", ITinstances) + ,("fixities_", ITfixities) + ,("declarations_", ITdeclarations) + ,("pragmas_", ITpragmas) + ,("forall_", ITforall) + ,("U_", ITunfold) + ,("A_", ITarity) + ,("coerce_in_", ITcoerce_in) + ,("coerce_out_", ITcoerce_out) + ,("A_", ITarity) + ,("A_", ITarity) + ,("!_", ITbottom) + + ] + + haskellKeywordsFM = listToFM [ + ("data", ITdata) + ,("type", ITtype) + ,("newtype", ITnewtype) + ,("class", ITclass) + ,("where", ITwhere) + ,("instance", ITinstance) + ,("infixl", ITinfixl) + ,("infixr", ITinfixr) + ,("infix", ITinfix) + ,("case", ITcase) + ,("of", ITof) + ,("in", ITin) + ,("let", ITlet) + ,("letrec", ITletrec) + ,("deriving", ITderiving) + + ,("->", ITrarrow) + ,("\\", ITlam) + ,("/\\", ITbiglam) + ,("|", ITvbar) + ,("!", ITbang) + ,("=>", ITdarrow) + ,("=", ITequal) + ] +\end{code} + + +%************************************************************************ +%* * +\subsection{Other utility functions +%* * +%************************************************************************ + +\begin{code} +type IfM a = MaybeErr a Error + +returnIf :: a -> IfM a +thenIf :: IfM a -> (a -> IfM b) -> IfM b +happyError :: Int -> [IfaceToken] -> IfM a + +returnIf a = Succeeded a + +thenIf (Succeeded a) k = k a +thenIf (Failed err) _ = Failed err + +happyError ln toks = Failed (ifaceParseErr ln toks) + +----------------------------------------------------------------- + +ifaceParseErr ln toks sty + = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppStr "toks=", ppStr (show (take 10 toks))] +\end{code} |
