summaryrefslogtreecommitdiff
path: root/compiler/parser
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/parser')
-rw-r--r--compiler/parser/Lexer.x20
-rw-r--r--compiler/parser/Parser.y156
-rw-r--r--compiler/parser/RdrHsSyn.hs566
3 files changed, 515 insertions, 227 deletions
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index bceb48bf48..f820007bf2 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -72,8 +72,7 @@ module Lexer (
addWarning,
lexTokenStream,
addAnnotation,AddAnn,addAnnsAt,mkParensApiAnn,
- commentToAnnotation,
- moveAnnotations
+ commentToAnnotation
) where
import GhcPrelude
@@ -3069,23 +3068,6 @@ mkParensApiAnn s@(RealSrcSpan ss) = [mj AnnOpenP lo,mj AnnCloseP lc]
lo = mkSrcSpan (srcSpanStart s) (mkSrcLoc f sl (sc+1))
lc = mkSrcSpan (mkSrcLoc f el (ec - 1)) (srcSpanEnd s)
--- | Move the annotations and comments belonging to the @old@ span to the @new@
--- one.
-moveAnnotations :: SrcSpan -> SrcSpan -> P ()
-moveAnnotations old new = P $ \s ->
- let
- updateAnn ((l,a),v)
- | l == old = ((new,a),v)
- | otherwise = ((l,a),v)
- updateComment (l,c)
- | l == old = (new,c)
- | otherwise = (l,c)
- in
- POk s {
- annotations = map updateAnn (annotations s)
- , annotations_comments = map updateComment (annotations_comments s)
- } ()
-
queueComment :: Located Token -> P()
queueComment c = P $ \s -> POk s {
comment_q = commentToAnnotation c : comment_q s
diff --git a/compiler/parser/Parser.y b/compiler/parser/Parser.y
index dd9beadc4d..adfbf2c332 100644
--- a/compiler/parser/Parser.y
+++ b/compiler/parser/Parser.y
@@ -1772,19 +1772,6 @@ sigtypes1 :: { (OrdList (LHsSigType GhcPs)) }
-----------------------------------------------------------------------------
-- Types
-strict_mark :: { Located ([AddAnn],HsSrcBang) }
- : strictness { sL1 $1 (let (a, str) = unLoc $1 in (a, HsSrcBang NoSourceText NoSrcUnpack str)) }
- | unpackedness { sL1 $1 (let (a, prag, unpk) = unLoc $1 in (a, HsSrcBang prag unpk NoSrcStrict)) }
- | unpackedness strictness { sLL $1 $> (let { (a, prag, unpk) = unLoc $1
- ; (a', str) = unLoc $2 }
- in (a ++ a', HsSrcBang prag unpk str)) }
- -- Although UNPACK with no '!' without StrictData and UNPACK with '~' are illegal,
- -- we get a better error message if we parse them here
-
-strictness :: { Located ([AddAnn], SrcStrictness) }
- : '!' { sL1 $1 ([mj AnnBang $1], SrcStrict) }
- | '~' { sL1 $1 ([mj AnnTilde $1], SrcLazy) }
-
unpackedness :: { Located ([AddAnn], SourceText, SrcUnpackedness) }
: '{-# UNPACK' '#-}' { sLL $1 $> ([mo $1, mc $2], getUNPACK_PRAGs $1, SrcUnpack) }
| '{-# NOUNPACK' '#-}' { sLL $1 $> ([mo $1, mc $2], getNOUNPACK_PRAGs $1, SrcNoUnpack) }
@@ -1806,8 +1793,8 @@ ctype :: { LHsType GhcPs }
[mu AnnDcolon $2] }
| type { $1 }
-----------------------
--- Notes for 'ctypedoc'
+-- Note [ctype and ctypedoc]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- It would have been nice to simplify the grammar by unifying `ctype` and
-- ctypedoc` into one production, allowing comments on types everywhere (and
-- rejecting them after parsing, where necessary). This is however not possible
@@ -1840,11 +1827,6 @@ ctypedoc :: { LHsType GhcPs }
-- (Eq a, Ord a)
-- looks so much like a tuple type. We can't tell until we find the =>
--- We have the t1 ~ t2 form both in 'context' and in type,
--- to permit an individual equational constraint without parenthesis.
--- Thus for some reason we allow f :: a~b => blah
--- but not f :: ?x::Int => blah
--- See Note [Parsing ~]
context :: { LHsContext GhcPs }
: btype {% do { (anns,ctx) <- checkContext $1
; if null (unLoc ctx)
@@ -1853,14 +1835,14 @@ context :: { LHsContext GhcPs }
; ams ctx anns
} }
-context_no_ops :: { LHsContext GhcPs }
- : btype_no_ops {% do { ty <- splitTilde (reverse (unLoc $1))
- ; (anns,ctx) <- checkContext ty
- ; if null (unLoc ctx)
- then addAnnotation (gl ty) AnnUnit (gl ty)
+-- See Note [Constr variatons of non-terminals]
+constr_context :: { LHsContext GhcPs }
+ : constr_btype {% do { (anns,ctx) <- checkContext $1
+ ; if null (unLoc ctx)
+ then addAnnotation (gl $1) AnnUnit (gl $1)
else return ()
- ; ams ctx anns
- } }
+ ; ams ctx anns
+ } }
{- Note [GADT decl discards annotations]
~~~~~~~~~~~~~~~~~~~~~
@@ -1906,23 +1888,26 @@ typedoc :: { LHsType GhcPs }
$4)
[mu AnnRarrow $3] }
+-- See Note [Constr variatons of non-terminals]
+constr_btype :: { LHsType GhcPs }
+ : constr_tyapps {% mergeOps (unLoc $1) }
+-- See Note [Constr variatons of non-terminals]
+constr_tyapps :: { Located [Located TyEl] } -- NB: This list is reversed
+ : constr_tyapp { sL1 $1 [$1] }
+ | constr_tyapps constr_tyapp { sLL $1 $> $ $2 : (unLoc $1) }
--- See Note [Parsing ~]
-btype :: { LHsType GhcPs }
- : tyapps {% mergeOps (unLoc $1) }
+-- See Note [Constr variatons of non-terminals]
+constr_tyapp :: { Located TyEl }
+ : tyapp { $1 }
+ | docprev { sL1 $1 $ TyElDocPrev (unLoc $1) }
--- Used for parsing Haskell98-style data constructors,
--- in order to forbid the blasphemous
--- > data Foo = Int :+ Char :* Bool
--- See also Note [Parsing data constructors is hard] in RdrHsSyn
-btype_no_ops :: { Located [LHsType GhcPs] } -- NB: This list is reversed
- : atype_docs { sL1 $1 [$1] }
- | btype_no_ops atype_docs { sLL $1 $> $ $2 : (unLoc $1) }
+btype :: { LHsType GhcPs }
+ : tyapps {% mergeOps $1 }
-tyapps :: { Located [Located TyEl] } -- NB: This list is reversed
- : tyapp { sL1 $1 [$1] }
- | tyapps tyapp { sLL $1 $> $ $2 : (unLoc $1) }
+tyapps :: { [Located TyEl] } -- NB: This list is reversed
+ : tyapp { [$1] }
+ | tyapps tyapp { $2 : $1 }
tyapp :: { Located TyEl }
: atype { sL1 $1 $ TyElOpd (unLoc $1) }
@@ -1932,18 +1917,15 @@ tyapp :: { Located TyEl }
[mj AnnSimpleQuote $1,mj AnnVal $2] }
| SIMPLEQUOTE varop {% ams (sLL $1 $> $ TyElOpr (unLoc $2))
[mj AnnSimpleQuote $1,mj AnnVal $2] }
-
-atype_docs :: { LHsType GhcPs }
- : atype docprev { sLL $1 $> $ HsDocTy noExt $1 $2 }
- | atype { $1 }
+ | '~' { sL1 $1 TyElTilde }
+ | '!' { sL1 $1 TyElBang }
+ | unpackedness { sL1 $1 $ TyElUnpackedness (unLoc $1) }
atype :: { LHsType GhcPs }
: ntgtycon { sL1 $1 (HsTyVar noExt NotPromoted $1) } -- Not including unit tuples
| tyvar { sL1 $1 (HsTyVar noExt NotPromoted $1) } -- (See Note [Unit tuples])
| '*' {% do { warnStarIsType (getLoc $1)
; return $ sL1 $1 (HsStarTy noExt (isUnicode $1)) } }
- | strict_mark atype {% ams (sLL $1 $> (HsBangTy noExt (snd $ unLoc $1) $2))
- (fst $ unLoc $1) } -- Constructor sigs only
| '{' fielddecls '}' {% amms (checkRecordSyntax
(sLL $1 $> $ HsRecTy noExt $2))
-- Constructor sigs only
@@ -2054,23 +2036,6 @@ varids0 :: { Located [Located RdrName] }
: {- empty -} { noLoc [] }
| varids0 tyvar { sLL $1 $> ($2 : unLoc $1) }
-{-
-Note [Parsing ~]
-~~~~~~~~~~~~~~~~
-
-Due to parsing conflicts between laziness annotations in data type
-declarations (see strict_mark) and equality types ~'s are always
-parsed as laziness annotations, and turned into HsOpTy's in the
-correct places using RdrHsSyn.splitTilde.
-
-Since strict_mark is parsed as part of atype which is part of type,
-typedoc and context (where HsEqTy previously appeared) it made most
-sense and was simplest to parse ~ as part of strict_mark and later
-turn them into HsOpTy's.
-
--}
-
-
-----------------------------------------------------------------------------
-- Kinds
@@ -2167,8 +2132,60 @@ constrs1 :: { Located [LConDecl GhcPs] }
>> return (sLL $1 $> (addConDoc $5 $2 : addConDocFirst (unLoc $1) $4)) }
| constr { sL1 $1 [$1] }
+{- Note [Constr variatons of non-terminals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In record declarations we assume that 'ctype' used to parse the type will not
+consume the trailing docprev:
+
+ data R = R { field :: Int -- ^ comment on the field }
+
+In 'R' we expect the comment to apply to the entire field, not to 'Int'. The
+same issue is detailed in Note [ctype and ctypedoc].
+
+So, we do not want 'ctype' to consume 'docprev', therefore
+ we do not want 'btype' to consume 'docprev', therefore
+ we do not want 'tyapps' to consume 'docprev'.
+
+At the same time, when parsing a 'constr', we do want to consume 'docprev':
+
+ data T = C Int -- ^ comment on Int
+ Bool -- ^ comment on Bool
+
+So, we do want 'constr_stuff' to consume 'docprev'.
+
+The problem arises because the clauses in 'constr' have the following
+structure:
+
+ (a) context '=>' constr_stuff (e.g. data T a = Ord a => C a)
+ (b) constr_stuff (e.g. data T a = C a)
+
+and to avoid a reduce/reduce conflict, 'context' and 'constr_stuff' must be
+compatible. And for 'context' to be compatible with 'constr_stuff', it must
+consume 'docprev'.
+
+So, we want 'context' to consume 'docprev', therefore
+ we want 'btype' to consume 'docprev', therefore
+ we want 'tyapps' to consume 'docprev'.
+
+Our requirements end up conflicting: for parsing record types, we want 'tyapps'
+to leave 'docprev' alone, but for parsing constructors, we want it to consume
+'docprev'.
+
+As the result, we maintain two parallel hierarchies of non-terminals that
+either consume 'docprev' or not:
+
+ tyapps constr_tyapps
+ btype constr_btype
+ context constr_context
+ ...
+
+They must be kept identical except for their treatment of 'docprev'.
+
+-}
+
constr :: { LConDecl GhcPs }
- : maybe_docnext forall context_no_ops '=>' constr_stuff
+ : maybe_docnext forall constr_context '=>' constr_stuff
{% ams (let (con,details,doc_prev) = unLoc $5 in
addConDoc (L (comb4 $2 $3 $4 $5) (mkConDeclH98 con
(snd $ unLoc $2)
@@ -2190,17 +2207,8 @@ forall :: { Located ([AddAnn], Maybe [LHsTyVarBndr GhcPs]) }
| {- empty -} { noLoc ([], Nothing) }
constr_stuff :: { Located (Located RdrName, HsConDeclDetails GhcPs, Maybe LHsDocString) }
- -- See Note [Parsing data constructors is hard] in RdrHsSyn
- : btype_no_ops {% do { c <- splitCon (unLoc $1)
+ : constr_tyapps {% do { c <- mergeDataCon (unLoc $1)
; return $ sL1 $1 c } }
- | btype_no_ops conop maybe_docprev btype_no_ops
- {% do { lhs <- splitTilde (reverse (unLoc $1))
- ; (_, ds_l) <- checkInfixConstr lhs
- ; let rhs1 = foldl1 mkHsAppTy (reverse (unLoc $4))
- ; (rhs, ds_r) <- checkInfixConstr rhs1
- ; return $ if isJust (ds_l `mplus` $3)
- then sLL $1 $> ($2, InfixCon lhs rhs1, $3)
- else sLL $1 $> ($2, InfixCon lhs rhs, ds_r) } }
fielddecls :: { [LConDeclField GhcPs] }
: {- empty -} { [] }
diff --git a/compiler/parser/RdrHsSyn.hs b/compiler/parser/RdrHsSyn.hs
index e4f74d6b73..b43b0456bd 100644
--- a/compiler/parser/RdrHsSyn.hs
+++ b/compiler/parser/RdrHsSyn.hs
@@ -19,7 +19,7 @@ module RdrHsSyn (
mkTySynonym, mkTyFamInstEqn,
mkTyFamInst,
mkFamDecl, mkLHsSigType,
- splitCon, mkInlinePragma,
+ mkInlinePragma,
mkPatSynMatchGroup,
mkRecConstrOrUpdate, -- HsExp -> [HsFieldUpdate] -> P HsExp
mkTyClD, mkInstD,
@@ -46,7 +46,6 @@ module RdrHsSyn (
checkBlockArguments,
checkPrecP, -- Int -> P Int
checkContext, -- HsType -> P HsContext
- checkInfixConstr,
checkPattern, -- HsExp -> P HsPat
bang_RDR,
checkPatterns, -- SrcLoc -> [HsExp] -> P [HsPat]
@@ -58,8 +57,7 @@ module RdrHsSyn (
checkRecordSyntax,
checkEmptyGADTs,
parseErrorSDoc, hintBangPat,
- splitTilde,
- TyEl(..), mergeOps,
+ TyEl(..), mergeOps, mergeDataCon,
-- Help with processing exports
ImpExpSubSpec(..),
@@ -462,91 +460,92 @@ has_args ((L _ (XMatch _)) : _) = panic "has_args"
{- Note [Parsing data constructors is hard]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We parse the RHS of the constructor declaration
- data T = C t1 t2
-as a btype_no_ops (treating C as a type constructor) and then convert C to be
-a data constructor. Reason: it might continue like this:
- data T = C t1 t2 :% D Int
-in which case C really /would/ be a type constructor. We can't resolve this
-ambiguity till we come across the constructor oprerator :% (or not, more usually)
-
-So the plan is:
-
-* Parse the data constructor declration as a type (actually btype_no_ops)
-
-* Use 'splitCon' to rejig it into the data constructor, the args, and possibly
- extract a docstring for the constructor
-
-* In doing so, we use 'tyConToDataCon' to convert the RdrName for
- the data con, which has been parsed as a tycon, back to a datacon.
- This is more than just adjusting the name space; for operators we
- need to check that it begins with a colon. E.g.
- data T = (+++)
- will parse ok (since tycons can be operators), but we should reject
- it (Trac #12051).
-
-'splitCon' takes a reversed list @apps@ of types as input, such that
-@foldl1 mkHsAppTy (reverse apps)@ yields the original type. This is because
-this is easy for the parser to produce and we avoid the overhead of unrolling
-'HsAppTy'.
+
+The problem with parsing data constructors is that they look a lot like types.
+Compare:
+
+ (s1) data T = C t1 t2
+ (s2) type T = C t1 t2
+
+Syntactically, there's little difference between these declarations, except in
+(s1) 'C' is a data constructor, but in (s2) 'C' is a type constructor.
+
+This similarity would pose no problem if we knew ahead of time if we are
+parsing a type or a constructor declaration. Looking at (s1) and (s2), a simple
+(but wrong!) rule comes to mind: in 'data' declarations assume we are parsing
+data constructors, and in other contexts (e.g. 'type' declarations) assume we
+are parsing type constructors.
+
+This simple rule does not work because of two problematic cases:
+
+ (p1) data T = C t1 t2 :+ t3
+ (p2) data T = C t1 t2 => t3
+
+In (p1) we encounter (:+) and it turns out we are parsing an infix data
+declaration, so (C t1 t2) is a type and 'C' is a type constructor.
+In (p2) we encounter (=>) and it turns out we are parsing an existential
+context, so (C t1 t2) is a constraint and 'C' is a type constructor.
+
+As the result, in order to determine whether (C t1 t2) declares a data
+constructor, a type, or a context, we would need unlimited lookahead which
+'happy' is not so happy with.
+
+To further complicate matters, the interpretation of (!) and (~) is different
+in constructors and types:
+
+ (b1) type T = C ! D
+ (b2) data T = C ! D
+ (b3) data T = C ! D => E
+
+In (b1) and (b3), (!) is a type operator with two arguments: 'C' and 'D'. At
+the same time, in (b2) it is a strictness annotation: 'C' is a data constructor
+with a single strict argument 'D'. For the programmer, these cases are usually
+easy to tell apart due to whitespace conventions:
+
+ (b2) data T = C !D -- no space after the bang hints that
+ -- it is a strictness annotation
+
+For the parser, on the other hand, this whitespace does not matter. We cannot
+tell apart (b2) from (b3) until we encounter (=>), so it requires unlimited
+lookahead.
+
+The solution that accounts for all of these issues is to initially parse data
+declarations and types as a reversed list of TyEl:
+
+ data TyEl = TyElOpr RdrName
+ | TyElOpd (HsType GhcPs)
+ | TyElBang | TyElTilde
+ | ...
+
+For example, both occurences of (C ! D) in the following example are parsed
+into equal lists of TyEl:
+
+ data T = C ! D => C ! D results in [ TyElOpd (HsTyVar "D")
+ , TyElBang
+ , TyElOpd (HsTyVar "C") ]
+
+Note that elements are in reverse order. Also, 'C' is parsed as a type
+constructor (HsTyVar) even when it is a data constructor. We fix this in
+`tyConToDataCon`.
+
+By the time the list of TyEl is assembled, we have looked ahead enough to
+decide whether to reduce using `mergeOps` (for types) or `mergeDataCon` (for
+data constructors). These functions are where the actual job of parsing is
+done.
-}
-splitCon :: [LHsType GhcPs]
- -> P ( Located RdrName -- constructor name
- , HsConDeclDetails GhcPs -- constructor field information
- , Maybe LHsDocString -- docstring to go on the constructor
- )
+-- | Reinterpret a type constructor, including type operators, as a data
+-- constructor.
-- See Note [Parsing data constructors is hard]
--- This gets given a "type" that should look like
--- C Int Bool
--- or C { x::Int, y::Bool }
--- and returns the pieces
-splitCon apps
- = split apps' []
- where
- oneDoc = [ () | L _ (HsDocTy{}) <- apps ] `lengthIs` 1
- ty = foldl1 mkHsAppTy (reverse apps)
-
- -- the trailing doc, if any, can be extracted first
- (apps', trailing_doc)
- = case apps of
- L _ (HsDocTy _ t ds) : ts | oneDoc -> (t : ts, Just ds)
- ts -> (ts, Nothing)
-
- -- A comment on the constructor is handled a bit differently - it doesn't
- -- remain an 'HsDocTy', but gets lifted out and returned as the third
- -- element of the tuple.
- split [ L _ (HsDocTy _ con con_doc) ] ts = do
- (data_con, con_details, con_doc') <- split [con] ts
- return (data_con, con_details, con_doc' `mplus` Just con_doc)
- split [ L l (HsTyVar _ _ (L _ tc)) ] ts = do
- data_con <- tyConToDataCon l tc
- return (data_con, mk_rest ts, trailing_doc)
- split [ L l (HsTupleTy _ HsBoxedOrConstraintTuple ts) ] []
- = return ( L l (getRdrName (tupleDataCon Boxed (length ts)))
- , PrefixCon ts
- , trailing_doc
- )
- split [ L l _ ] _ = parseErrorSDoc l (text msg <+> ppr ty)
- where msg = "Cannot parse data constructor in a data/newtype declaration:"
- split (u : us) ts = split us (u : ts)
- split _ _ = panic "RdrHsSyn:splitCon"
-
- mk_rest [L _ (HsDocTy _ t@(L _ HsRecTy{}) _)] = mk_rest [t]
- mk_rest [L l (HsRecTy _ flds)] = RecCon (L l flds)
- mk_rest ts = PrefixCon ts
-
-tyConToDataCon :: SrcSpan -> RdrName -> P (Located RdrName)
--- See Note [Parsing data constructors is hard]
--- Data constructor RHSs are parsed as types
+tyConToDataCon :: SrcSpan -> RdrName -> Either (SrcSpan, SDoc) (Located RdrName)
tyConToDataCon loc tc
- | isTcOcc occ
+ | isTcOcc occ || isDataOcc occ
, isLexCon (occNameFS occ)
= return (L loc (setRdrNameSpace tc srcDataName))
| otherwise
- = parseErrorSDoc loc (msg $$ extra)
+ = Left (loc, msg $$ extra)
where
occ = rdrNameOcc tc
@@ -555,22 +554,6 @@ tyConToDataCon loc tc
= text "Perhaps you intended to use ExistentialQuantification"
| otherwise = empty
--- | Split a type to extract the trailing doc string (if there is one) from a
--- type produced by the 'btype_no_ops' production.
-splitDocTy :: LHsType GhcPs -> (LHsType GhcPs, Maybe LHsDocString)
-splitDocTy (L l (HsAppTy x t1 t2)) = (L l (HsAppTy x t1 t2'), ds)
- where ~(t2', ds) = splitDocTy t2
-splitDocTy (L _ (HsDocTy _ ty ds)) = (ty, Just ds)
-splitDocTy ty = (ty, Nothing)
-
--- | Given a type that is a field to an infix data constructor, try to split
--- off a trailing docstring on the type, and check that there are no other
--- docstrings.
-checkInfixConstr :: LHsType GhcPs -> P (LHsType GhcPs, Maybe LHsDocString)
-checkInfixConstr ty = checkNoDocs msg ty' *> pure (ty', doc_string)
- where (ty', doc_string) = splitDocTy ty
- msg = text "infix constructor field"
-
mkPatSynMatchGroup :: Located RdrName
-> Located (OrdList (LHsDecl GhcPs))
-> P (MatchGroup GhcPs (LHsExpr GhcPs))
@@ -1235,6 +1218,7 @@ splitBang (L _ (OpApp _ l_arg bang@(L _ (HsVar _ (L _ op))) r_arg))
split_bang e es = (e,es)
splitBang _ = Nothing
+-- See Note [isFunLhs vs mergeDataCon]
isFunLhs :: LHsExpr GhcPs
-> P (Maybe (Located RdrName, LexicalFixity, [LHsExpr GhcPs],[AddAnn]))
-- A variable binding is parsed as a FunBind.
@@ -1295,38 +1279,64 @@ isFunLhs e = go e [] []
_ -> return Nothing }
go _ _ _ = return Nothing
--- | Transform a list of 'atype' with 'strict_mark' into
--- HsOpTy's of 'eqTyCon_RDR':
---
--- [~a, ~b, c, ~d] ==> (~a) ~ ((b c) ~ d)
---
--- See Note [Parsing ~]
-splitTilde :: [LHsType GhcPs] -> P (LHsType GhcPs)
-splitTilde [] = panic "splitTilde"
-splitTilde (x:xs) = go x xs
- where
- -- We accumulate applications in the LHS until we encounter a laziness
- -- annotation. For example, if we have [Foo, x, y, ~Bar, z], the 'lhs'
- -- accumulator will become '(Foo x) y'. Then we strip the laziness
- -- annotation off 'Bar' and process the tail [Bar, z] recursively.
- --
- -- This leaves us with 'lhs = (Foo x) y' and 'rhs = Bar z'.
- -- In case the tail contained more laziness annotations, they would be
- -- processed similarly. This makes '~' right-associative.
- go lhs [] = return lhs
- go lhs (x:xs)
- | L loc (HsBangTy _ (HsSrcBang NoSourceText NoSrcUnpack SrcLazy) t) <- x
- = do { rhs <- splitTilde (t:xs)
- ; let r = mkLHsOpTy lhs (tildeOp loc) rhs
- ; moveAnnotations loc (getLoc r)
- ; return r }
- | otherwise
- = go (mkHsAppTy lhs x) xs
-
- tildeOp loc = L (srcSpanFirstCharacter loc) eqTyCon_RDR
-
-- | Either an operator or an operand.
data TyEl = TyElOpr RdrName | TyElOpd (HsType GhcPs)
+ | TyElTilde | TyElBang
+ | TyElUnpackedness ([AddAnn], SourceText, SrcUnpackedness)
+ | TyElDocPrev HsDocString
+
+instance Outputable TyEl where
+ ppr (TyElOpr name) = ppr name
+ ppr (TyElOpd ty) = ppr ty
+ ppr TyElTilde = text "~"
+ ppr TyElBang = text "!"
+ ppr (TyElUnpackedness (_, _, unpk)) = ppr unpk
+ ppr (TyElDocPrev doc) = ppr doc
+
+tyElStrictness :: TyEl -> Maybe (AnnKeywordId, SrcStrictness)
+tyElStrictness TyElTilde = Just (AnnTilde, SrcLazy)
+tyElStrictness TyElBang = Just (AnnBang, SrcStrict)
+tyElStrictness _ = Nothing
+
+-- | Extract a strictness/unpackedness annotation from the front of a reversed
+-- 'TyEl' list.
+pStrictMark
+ :: [Located TyEl] -- reversed TyEl
+ -> Maybe ( Located HsSrcBang {- a strictness/upnackedness marker -}
+ , [AddAnn]
+ , [Located TyEl] {- remaining TyEl -})
+pStrictMark (L l1 x1 : L l2 x2 : xs)
+ | Just (strAnnId, str) <- tyElStrictness x1
+ , TyElUnpackedness (unpkAnns, prag, unpk) <- x2
+ = Just ( L (combineSrcSpans l1 l2) (HsSrcBang prag unpk str)
+ , unpkAnns ++ [\s -> addAnnotation s strAnnId l1]
+ , xs )
+pStrictMark (L l x1 : xs)
+ | Just (strAnnId, str) <- tyElStrictness x1
+ = Just ( L l (HsSrcBang NoSourceText NoSrcUnpack str)
+ , [\s -> addAnnotation s strAnnId l]
+ , xs )
+pStrictMark (L l x1 : xs)
+ | TyElUnpackedness (anns, prag, unpk) <- x1
+ = Just ( L l (HsSrcBang prag unpk NoSrcStrict)
+ , anns
+ , xs )
+pStrictMark _ = Nothing
+
+pBangTy
+ :: LHsType GhcPs -- a type to be wrapped inside HsBangTy
+ -> [Located TyEl] -- reversed TyEl
+ -> ( Bool {- has a strict mark been consumed? -}
+ , LHsType GhcPs {- the resulting BangTy -}
+ , P () {- add annotations -}
+ , [Located TyEl] {- remaining TyEl -})
+pBangTy lt@(L l1 _) xs =
+ case pStrictMark xs of
+ Nothing -> (False, lt, pure (), xs)
+ Just (L l2 strictMark, anns, xs') ->
+ let bl = combineSrcSpans l1 l2
+ bt = HsBangTy noExt strictMark lt
+ in (True, L bl bt, addAnnsAt bl anns, xs')
-- | Merge a /reversed/ and /non-empty/ soup of operators and operands
-- into a type.
@@ -1338,22 +1348,71 @@ data TyEl = TyElOpr RdrName | TyElOpd (HsType GhcPs)
--
-- It's a bit silly that we're doing it at all, as the renamer will have to
-- rearrange this, and it'd be easier to keep things separate.
+--
+-- See Note [Parsing data constructors is hard]
mergeOps :: [Located TyEl] -> P (LHsType GhcPs)
-mergeOps = go [] id
+mergeOps (L l1 (TyElOpd t) : xs)
+ | (_, t', addAnns, xs') <- pBangTy (L l1 t) xs
+ , null xs' -- We accept a BangTy only when there are no preceding TyEl.
+ = addAnns >> return t'
+mergeOps all_xs = go (0 :: Int) [] id all_xs
where
+ -- clause (err.1):
+ -- we do not expect to encounter any (NO)UNPACK pragmas
+ go k acc ops_acc (L l (TyElUnpackedness (_, unpkSrc, unpk)):_) =
+ if not (null acc) && (k > 1 || length acc > 1)
+ then failOpUnpackednessCompound (L l unpkSDoc) (ops_acc (mergeAcc acc))
+ else failOpUnpackednessPosition (L l unpkSDoc)
+ where
+ unpkSDoc = case unpkSrc of
+ NoSourceText -> ppr unpk
+ SourceText str -> text str <> text " #-}"
+
+ -- clause (err.2):
+ -- we do not expect to encounter any docs
+ go _ _ _ (L l (TyElDocPrev _):_) =
+ failOpDocPrev l
+
+ -- clause (err.3):
+ -- to improve error messages, we do a bit of guesswork to determine if the
+ -- user intended a '!' or a '~' as a strictness annotation
+ go k acc ops_acc (L l x : xs)
+ | Just (_, str) <- tyElStrictness x
+ , let guess [] = True
+ guess (L _ (TyElOpd _):_) = False
+ guess (L _ (TyElOpr _):_) = True
+ guess (L _ (TyElTilde):_) = True
+ guess (L _ (TyElBang):_) = True
+ guess (L _ (TyElUnpackedness _):_) = True
+ guess (L _ (TyElDocPrev _):xs') = guess xs'
+ in guess xs
+ = if not (null acc) && (k > 1 || length acc > 1)
+ then failOpStrictnessCompound (L l str) (ops_acc (mergeAcc acc))
+ else failOpStrictnessPosition (L l str)
+
-- clause (a):
-- when we encounter an operator, we must have accumulated
-- something for its rhs, and there must be something left
-- to build its lhs.
- go acc ops_acc (L l (TyElOpr op):xs) =
+ go k acc ops_acc (L l (TyElOpr op):xs) =
if null acc || null xs
then failOpFewArgs (L l op)
- else do { a <- splitTilde acc
- ; go [] (\c -> mkLHsOpTy c (L l op) (ops_acc a)) xs }
+ else do { let a = mergeAcc acc
+ ; go (k + 1) [] (\c -> mkLHsOpTy c (L l op) (ops_acc a)) xs }
+
+ -- clause (a.1): interpret 'TyElTilde' as an operator
+ go k acc ops_acc (L l TyElTilde:xs) =
+ let op = eqTyCon_RDR
+ in go k acc ops_acc (L l (TyElOpr op):xs)
+
+ -- clause (a.2): interpret 'TyElBang' as an operator
+ go k acc ops_acc (L l TyElBang:xs) =
+ let op = mkUnqual tcClsName (fsLit "!")
+ in go k acc ops_acc (L l (TyElOpr op):xs)
-- clause (b):
-- whenever an operand is encountered, it is added to the accumulator
- go acc ops_acc (L l (TyElOpd a):xs) = go (L l a:acc) ops_acc xs
+ go k acc ops_acc (L l (TyElOpd a):xs) = go k (L l a:acc) ops_acc xs
-- clause (c):
-- at this point we know that 'acc' is non-empty because
@@ -1364,9 +1423,211 @@ mergeOps = go [] id
-- operator, this is handled by clause (a)
-- 3. 'mergeOps' was called with a list where the head is an
-- operand, this is handled by clause (b)
- go acc ops_acc [] =
- do { a <- splitTilde acc
- ; return (ops_acc a) }
+ go _ acc ops_acc [] =
+ return (ops_acc (mergeAcc acc))
+
+ mergeAcc [] = panic "mergeOps.mergeAcc: empty input"
+ mergeAcc (x:xs) = mkHsAppTys x xs
+
+pInfixSide :: [Located TyEl] -> Maybe (LHsType GhcPs, P (), [Located TyEl])
+pInfixSide (L l (TyElOpd t):xs)
+ | (True, t', addAnns, xs') <- pBangTy (L l t) xs
+ = Just (t', addAnns, xs')
+pInfixSide (L l1 (TyElOpd t1):xs1) = go [L l1 t1] xs1
+ where
+ go acc (L l (TyElOpd t):xs) = go (L l t:acc) xs
+ go acc xs = Just (mergeAcc acc, pure (), xs)
+ mergeAcc [] = panic "pInfixSide.mergeAcc: empty input"
+ mergeAcc (x:xs) = mkHsAppTys x xs
+pInfixSide _ = Nothing
+
+pDocPrev :: [Located TyEl] -> (Maybe LHsDocString, [Located TyEl])
+pDocPrev = go Nothing
+ where
+ go mTrailingDoc (L l (TyElDocPrev doc):xs) =
+ go (mTrailingDoc `mplus` Just (L l doc)) xs
+ go mTrailingDoc xs = (mTrailingDoc, xs)
+
+orErr :: Maybe a -> b -> Either b a
+orErr (Just a) _ = Right a
+orErr Nothing b = Left b
+
+{- Note [isFunLhs vs mergeDataCon]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When parsing a function LHS, we do not know whether to treat (!) as
+a strictness annotation or an infix operator:
+
+ f ! a = ...
+
+Without -XBangPatterns, this parses as (!) f a = ...
+ with -XBangPatterns, this parses as f (!a) = ...
+
+So in function declarations we opted to always parse as if -XBangPatterns
+were off, and then rejig in 'isFunLhs'.
+
+There are two downsides to this approach:
+
+1. It is not particularly elegant, as there's a point in our pipeline where
+ the representation is awfully incorrect. For instance,
+ f !a b !c = ...
+ will be first parsed as
+ (f ! a b) ! c = ...
+
+2. There are cases that it fails to cover, for instance infix declarations:
+ !a + !b = ...
+ will trigger an error.
+
+Unfortunately, we cannot define different productions in the 'happy' grammar
+depending on whether -XBangPatterns are enabled.
+
+When parsing data constructors, we face a similar issue:
+ (a) data T1 = C ! D
+ (b) data T2 = C ! D => ...
+
+In (a) the first bang is a strictness annotation, but in (b) it is a type
+operator. A 'happy'-based parser does not have unlimited lookahead to check for
+=>, so we must first parse (C ! D) into a common representation.
+
+If we tried to mirror the approach used in functions, we would parse both sides
+of => as types, and then rejig. However, we take a different route and use an
+intermediate data structure, a reversed list of 'TyEl'.
+See Note [Parsing data constructors is hard] for details.
+
+This approach does not suffer from the issues of 'isFunLhs':
+
+1. A sequence of 'TyEl' is a dedicated intermediate representation, not an
+ incorrectly parsed type. Therefore, we do not have confusing states in our
+ pipeline. (Except for representing data constructors as type variables).
+
+2. We can handle infix data constructors with strictness annotations:
+ data T a b = !a :+ !b
+
+-}
+
+
+-- | Merge a /reversed/ and /non-empty/ soup of operators and operands
+-- into a data constructor.
+--
+-- User input: @C !A B -- ^ doc@
+-- Input to 'mergeDataCon': ["doc", B, !, A, C]
+-- Output: (C, PrefixCon [!A, B], "doc")
+--
+-- See Note [Parsing data constructors is hard]
+-- See Note [isFunLhs vs mergeDataCon]
+mergeDataCon
+ :: [Located TyEl]
+ -> P ( Located RdrName -- constructor name
+ , HsConDeclDetails GhcPs -- constructor field information
+ , Maybe LHsDocString -- docstring to go on the constructor
+ )
+mergeDataCon all_xs =
+ do { (addAnns, a) <- eitherToP res
+ ; addAnns
+ ; return a }
+ where
+ -- We start by splitting off the trailing documentation comment,
+ -- if any exists.
+ (mTrailingDoc, all_xs') = pDocPrev all_xs
+
+ -- Determine whether the trailing documentation comment exists and is the
+ -- only docstring in this constructor declaration.
+ --
+ -- When true, it means that it applies to the constructor itself:
+ -- data T = C
+ -- A
+ -- B -- ^ Comment on C (singleDoc == True)
+ --
+ -- When false, it means that it applies to the last field:
+ -- data T = C -- ^ Comment on C
+ -- A -- ^ Comment on A
+ -- B -- ^ Comment on B (singleDoc == False)
+ singleDoc = isJust mTrailingDoc &&
+ null [ () | L _ (TyElDocPrev _) <- all_xs' ]
+
+ -- The result of merging the list of reversed TyEl into a
+ -- data constructor, along with [AddAnn].
+ res = goFirst all_xs'
+
+ -- Take the trailing docstring into account when interpreting
+ -- the docstring near the constructor.
+ --
+ -- data T = C -- ^ docstring right after C
+ -- A
+ -- B -- ^ trailing docstring
+ --
+ -- 'mkConDoc' must be applied to the docstring right after C, so that it
+ -- falls back to the trailing docstring when appropriate (see singleDoc).
+ mkConDoc mDoc | singleDoc = mDoc `mplus` mTrailingDoc
+ | otherwise = mDoc
+
+ -- The docstring for the last field of a data constructor.
+ trailingFieldDoc | singleDoc = Nothing
+ | otherwise = mTrailingDoc
+
+ goFirst [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ]
+ = do { data_con <- tyConToDataCon l tc
+ ; return (pure (), (data_con, PrefixCon [], mTrailingDoc)) }
+ goFirst (L l (TyElOpd (HsRecTy _ fields)):xs)
+ | (mConDoc, xs') <- pDocPrev xs
+ , [ L l' (TyElOpd (HsTyVar _ _ (L _ tc))) ] <- xs'
+ = do { data_con <- tyConToDataCon l' tc
+ ; let mDoc = mTrailingDoc `mplus` mConDoc
+ ; return (pure (), (data_con, RecCon (L l fields), mDoc)) }
+ goFirst [L l (TyElOpd (HsTupleTy _ HsBoxedOrConstraintTuple ts))]
+ = return ( pure ()
+ , ( L l (getRdrName (tupleDataCon Boxed (length ts)))
+ , PrefixCon ts
+ , mTrailingDoc ) )
+ goFirst (L l (TyElOpd t):xs)
+ | (_, t', addAnns, xs') <- pBangTy (L l t) xs
+ = go addAnns Nothing [mkLHsDocTyMaybe t' trailingFieldDoc] xs'
+ goFirst xs =
+ go (pure ()) mTrailingDoc [] xs
+
+ go addAnns mLastDoc ts [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ]
+ = do { data_con <- tyConToDataCon l tc
+ ; return (addAnns, (data_con, PrefixCon ts, mkConDoc mLastDoc)) }
+ go addAnns mLastDoc ts (L l (TyElDocPrev doc):xs) =
+ go addAnns (mLastDoc `mplus` Just (L l doc)) ts xs
+ go addAnns mLastDoc ts (L l (TyElOpd t):xs)
+ | (_, t', addAnns', xs') <- pBangTy (L l t) xs
+ , t'' <- mkLHsDocTyMaybe t' mLastDoc
+ = go (addAnns >> addAnns') Nothing (t'':ts) xs'
+ go _ _ _ (L _ (TyElOpr _):_) =
+ -- Encountered an operator: backtrack to the beginning and attempt
+ -- to parse as an infix definition.
+ goInfix
+ go _ _ _ _ = Left malformedErr
+ where
+ malformedErr =
+ ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs')
+ , text "Cannot parse data constructor" <+>
+ text "in a data/newtype declaration:" $$
+ nest 2 (hsep . reverse $ map ppr all_xs'))
+
+ goInfix =
+ do { let xs0 = all_xs'
+ ; (rhs_t, rhs_addAnns, xs1) <- pInfixSide xs0 `orErr` malformedErr
+ ; let (mOpDoc, xs2) = pDocPrev xs1
+ ; (op, xs3) <- case xs2 of
+ L l (TyElOpr op) : xs3 ->
+ do { data_con <- tyConToDataCon l op
+ ; return (data_con, xs3) }
+ _ -> Left malformedErr
+ ; let (mLhsDoc, xs4) = pDocPrev xs3
+ ; (lhs_t, lhs_addAnns, xs5) <- pInfixSide xs4 `orErr` malformedErr
+ ; unless (null xs5) (Left malformedErr)
+ ; let rhs = mkLHsDocTyMaybe rhs_t trailingFieldDoc
+ lhs = mkLHsDocTyMaybe lhs_t mLhsDoc
+ addAnns = lhs_addAnns >> rhs_addAnns
+ ; return (addAnns, (op, InfixCon lhs rhs, mkConDoc mOpDoc)) }
+ where
+ malformedErr =
+ ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs')
+ , text "Cannot parse an infix data constructor" <+>
+ text "in a data/newtype declaration:" $$
+ nest 2 (hsep . reverse $ map ppr all_xs'))
---------------------------------------------------------------------------
-- Check for monad comprehensions
@@ -1785,6 +2046,35 @@ failOpFewArgs (L loc op) =
where
too_few = text "Operator applied to too few arguments:" <+> ppr op
+failOpDocPrev :: SrcSpan -> P a
+failOpDocPrev loc = parseErrorSDoc loc msg
+ where
+ msg = text "Unexpected documentation comment."
+
+failOpStrictnessCompound :: Located SrcStrictness -> LHsType GhcPs -> P a
+failOpStrictnessCompound (L _ str) (L loc ty) = parseErrorSDoc loc msg
+ where
+ msg = text "Strictness annotation applied to a compound type." $$
+ text "Did you mean to add parentheses?" $$
+ nest 2 (ppr str <> parens (ppr ty))
+
+failOpStrictnessPosition :: Located SrcStrictness -> P a
+failOpStrictnessPosition (L loc _) = parseErrorSDoc loc msg
+ where
+ msg = text "Strictness annotation cannot appear in this position."
+
+failOpUnpackednessCompound :: Located SDoc -> LHsType GhcPs -> P a
+failOpUnpackednessCompound (L _ unpkSDoc) (L loc ty) = parseErrorSDoc loc msg
+ where
+ msg = unpkSDoc <+> text "applied to a compound type." $$
+ text "Did you mean to add parentheses?" $$
+ nest 2 (unpkSDoc <+> parens (ppr ty))
+
+failOpUnpackednessPosition :: Located SDoc -> P a
+failOpUnpackednessPosition (L loc unpkSDoc) = parseErrorSDoc loc msg
+ where
+ msg = unpkSDoc <+> text "cannot appear in this position."
+
-----------------------------------------------------------------------------
-- Misc utils
@@ -1824,3 +2114,11 @@ mkLHsOpTy :: LHsType GhcPs -> Located RdrName -> LHsType GhcPs -> LHsType GhcPs
mkLHsOpTy x op y =
let loc = getLoc x `combineSrcSpans` getLoc op `combineSrcSpans` getLoc y
in L loc (mkHsOpTy x op y)
+
+mkLHsDocTy :: LHsType GhcPs -> LHsDocString -> LHsType GhcPs
+mkLHsDocTy t doc =
+ let loc = getLoc t `combineSrcSpans` getLoc doc
+ in L loc (HsDocTy noExt t doc)
+
+mkLHsDocTyMaybe :: LHsType GhcPs -> Maybe LHsDocString -> LHsType GhcPs
+mkLHsDocTyMaybe t = maybe t (mkLHsDocTy t)