3 files changed, 515 insertions, 227 deletions
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index bceb48bf48..f820007bf2 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -72,8 +72,7 @@ module Lexer (
    addWarning,
    lexTokenStream,
    addAnnotation,AddAnn,addAnnsAt,mkParensApiAnn,
-   commentToAnnotation,
-   moveAnnotations
+   commentToAnnotation
   ) where
 
 import GhcPrelude
@@ -3069,23 +3068,6 @@ mkParensApiAnn s@(RealSrcSpan ss) = [mj AnnOpenP lo,mj AnnCloseP lc]
     lo = mkSrcSpan (srcSpanStart s)         (mkSrcLoc f sl (sc+1))
     lc = mkSrcSpan (mkSrcLoc f el (ec - 1)) (srcSpanEnd s)
 
--- | Move the annotations and comments belonging to the @old@ span to the @new@
---   one.
-moveAnnotations :: SrcSpan -> SrcSpan -> P ()
-moveAnnotations old new = P $ \s ->
-  let
-    updateAnn ((l,a),v)
-      | l == old = ((new,a),v)
-      | otherwise = ((l,a),v)
-    updateComment (l,c)
-      | l == old = (new,c)
-      | otherwise = (l,c)
-  in
-    POk s {
-       annotations = map updateAnn (annotations s)
-     , annotations_comments = map updateComment (annotations_comments s)
-     } ()
-
 queueComment :: Located Token -> P()
 queueComment c = P $ \s -> POk s {
   comment_q = commentToAnnotation c : comment_q s
diff --git a/compiler/parser/Parser.y b/compiler/parser/Parser.y
index dd9beadc4d..adfbf2c332 100644
--- a/compiler/parser/Parser.y
+++ b/compiler/parser/Parser.y
@@ -1772,19 +1772,6 @@ sigtypes1 :: { (OrdList (LHsSigType GhcPs)) }
 -----------------------------------------------------------------------------
 -- Types
 
-strict_mark :: { Located ([AddAnn],HsSrcBang) }
-        : strictness { sL1 $1 (let (a, str) = unLoc $1 in (a, HsSrcBang NoSourceText NoSrcUnpack str)) }
-        | unpackedness { sL1 $1 (let (a, prag, unpk) = unLoc $1 in (a, HsSrcBang prag unpk NoSrcStrict)) }
-        | unpackedness strictness { sLL $1 $> (let { (a, prag, unpk) = unLoc $1
-                                                   ; (a', str) = unLoc $2 }
-                                                in (a ++ a', HsSrcBang prag unpk str)) }
-        -- Although UNPACK with no '!' without StrictData and UNPACK with '~' are illegal,
-        -- we get a better error message if we parse them here
-
-strictness :: { Located ([AddAnn], SrcStrictness) }
-        : '!' { sL1 $1 ([mj AnnBang $1], SrcStrict) }
-        | '~' { sL1 $1 ([mj AnnTilde $1], SrcLazy) }
-
 unpackedness :: { Located ([AddAnn], SourceText, SrcUnpackedness) }
         : '{-# UNPACK' '#-}'   { sLL $1 $> ([mo $1, mc $2], getUNPACK_PRAGs $1, SrcUnpack) }
         | '{-# NOUNPACK' '#-}' { sLL $1 $> ([mo $1, mc $2], getNOUNPACK_PRAGs $1, SrcNoUnpack) }
@@ -1806,8 +1793,8 @@ ctype   :: { LHsType GhcPs }
                                              [mu AnnDcolon $2] }
         | type                        { $1 }
 
-----------------------
--- Notes for 'ctypedoc'
+-- Note [ctype and ctypedoc]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- It would have been nice to simplify the grammar by unifying `ctype` and
 -- ctypedoc` into one production, allowing comments on types everywhere (and
 -- rejecting them after parsing, where necessary).  This is however not possible
@@ -1840,11 +1827,6 @@ ctypedoc :: { LHsType GhcPs }
 --      (Eq a, Ord a)
 -- looks so much like a tuple type.  We can't tell until we find the =>
 
--- We have the t1 ~ t2 form both in 'context' and in type,
--- to permit an individual equational constraint without parenthesis.
--- Thus for some reason we allow    f :: a~b => blah
--- but not                          f :: ?x::Int => blah
--- See Note [Parsing ~]
 context :: { LHsContext GhcPs }
         :  btype                        {% do { (anns,ctx) <- checkContext $1
                                                 ; if null (unLoc ctx)
@@ -1853,14 +1835,14 @@ context :: { LHsContext GhcPs }
                                                 ; ams ctx anns
                                                 } }
 
-context_no_ops :: { LHsContext GhcPs }
-        : btype_no_ops                 {% do { ty <- splitTilde (reverse (unLoc $1))
-                                             ; (anns,ctx) <- checkContext ty
-                                             ; if null (unLoc ctx)
-                                                   then addAnnotation (gl ty) AnnUnit (gl ty)
+-- See Note [Constr variatons of non-terminals]
+constr_context :: { LHsContext GhcPs }
+        :  constr_btype                 {% do { (anns,ctx) <- checkContext $1
+                                                ; if null (unLoc ctx)
+                                                   then addAnnotation (gl $1) AnnUnit (gl $1)
                                                    else return ()
-                                             ; ams ctx anns
-                                             } }
+                                                ; ams ctx anns
+                                                } }
 
 {- Note [GADT decl discards annotations]
 ~~~~~~~~~~~~~~~~~~~~~
@@ -1906,23 +1888,26 @@ typedoc :: { LHsType GhcPs }
                                                          $4)
                                                 [mu AnnRarrow $3] }
 
+-- See Note [Constr variatons of non-terminals]
+constr_btype :: { LHsType GhcPs }
+        : constr_tyapps                 {% mergeOps (unLoc $1) }
 
+-- See Note [Constr variatons of non-terminals]
+constr_tyapps :: { Located [Located TyEl] } -- NB: This list is reversed
+        : constr_tyapp                  { sL1 $1 [$1] }
+        | constr_tyapps constr_tyapp    { sLL $1 $> $ $2 : (unLoc $1) }
 
--- See Note [Parsing ~]
-btype :: { LHsType GhcPs }
-      : tyapps                      {%  mergeOps (unLoc $1) }
+-- See Note [Constr variatons of non-terminals]
+constr_tyapp :: { Located TyEl }
+        : tyapp                         { $1 }
+        | docprev                       { sL1 $1 $ TyElDocPrev (unLoc $1) }
 
--- Used for parsing Haskell98-style data constructors,
--- in order to forbid the blasphemous
--- > data Foo = Int :+ Char :* Bool
--- See also Note [Parsing data constructors is hard] in RdrHsSyn
-btype_no_ops :: { Located [LHsType GhcPs] } -- NB: This list is reversed
-        : atype_docs                    { sL1 $1 [$1] }
-        | btype_no_ops atype_docs       { sLL $1 $> $ $2 : (unLoc $1) }
+btype :: { LHsType GhcPs }
+        : tyapps                        {% mergeOps $1 }
 
-tyapps :: { Located [Located TyEl] } -- NB: This list is reversed
-        : tyapp                         { sL1 $1 [$1] }
-        | tyapps tyapp                  { sLL $1 $> $ $2 : (unLoc $1) }
+tyapps :: { [Located TyEl] } -- NB: This list is reversed
+        : tyapp                         { [$1] }
+        | tyapps tyapp                  { $2 : $1 }
 
 tyapp :: { Located TyEl }
         : atype                         { sL1 $1 $ TyElOpd (unLoc $1) }
@@ -1932,18 +1917,15 @@ tyapp :: { Located TyEl }
                                                [mj AnnSimpleQuote $1,mj AnnVal $2] }
         | SIMPLEQUOTE varop             {% ams (sLL $1 $> $ TyElOpr (unLoc $2))
                                                [mj AnnSimpleQuote $1,mj AnnVal $2] }
-
-atype_docs :: { LHsType GhcPs }
-        : atype docprev                 { sLL $1 $> $ HsDocTy noExt $1 $2 }
-        | atype                         { $1 }
+        | '~'                           { sL1 $1 TyElTilde }
+        | '!'                           { sL1 $1 TyElBang }
+        | unpackedness                  { sL1 $1 $ TyElUnpackedness (unLoc $1) }
 
 atype :: { LHsType GhcPs }
         : ntgtycon                       { sL1 $1 (HsTyVar noExt NotPromoted $1) }      -- Not including unit tuples
         | tyvar                          { sL1 $1 (HsTyVar noExt NotPromoted $1) }      -- (See Note [Unit tuples])
         | '*'                            {% do { warnStarIsType (getLoc $1)
                                                ; return $ sL1 $1 (HsStarTy noExt (isUnicode $1)) } }
-        | strict_mark atype              {% ams (sLL $1 $> (HsBangTy noExt (snd $ unLoc $1) $2))
-                                                (fst $ unLoc $1) }  -- Constructor sigs only
         | '{' fielddecls '}'             {% amms (checkRecordSyntax
                                                     (sLL $1 $> $ HsRecTy noExt $2))
                                                         -- Constructor sigs only
@@ -2054,23 +2036,6 @@ varids0 :: { Located [Located RdrName] }
         : {- empty -}                   { noLoc [] }
         | varids0 tyvar                 { sLL $1 $> ($2 : unLoc $1) }
 
-{-
-Note [Parsing ~]
-~~~~~~~~~~~~~~~~
-
-Due to parsing conflicts between laziness annotations in data type
-declarations (see strict_mark) and equality types ~'s are always
-parsed as laziness annotations, and turned into HsOpTy's in the
-correct places using RdrHsSyn.splitTilde.
-
-Since strict_mark is parsed as part of atype which is part of type,
-typedoc and context (where HsEqTy previously appeared) it made most
-sense and was simplest to parse ~ as part of strict_mark and later
-turn them into HsOpTy's.
-
--}
-
-
 -----------------------------------------------------------------------------
 -- Kinds
 
@@ -2167,8 +2132,60 @@ constrs1 :: { Located [LConDecl GhcPs] }
                >> return (sLL $1 $> (addConDoc $5 $2 : addConDocFirst (unLoc $1) $4)) }
         | constr                                          { sL1 $1 [$1] }
 
+{- Note [Constr variatons of non-terminals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In record declarations we assume that 'ctype' used to parse the type will not
+consume the trailing docprev:
+
+  data R = R { field :: Int -- ^ comment on the field }
+
+In 'R' we expect the comment to apply to the entire field, not to 'Int'. The
+same issue is detailed in Note [ctype and ctypedoc].
+
+So, we do not want 'ctype'  to consume 'docprev', therefore
+    we do not want 'btype'  to consume 'docprev', therefore
+    we do not want 'tyapps' to consume 'docprev'.
+
+At the same time, when parsing a 'constr', we do want to consume 'docprev':
+
+  data T = C Int  -- ^ comment on Int
+             Bool -- ^ comment on Bool
+
+So, we do want 'constr_stuff' to consume 'docprev'.
+
+The problem arises because the clauses in 'constr' have the following
+structure:
+
+  (a)  context '=>' constr_stuff   (e.g.  data T a = Ord a => C a)
+  (b)               constr_stuff   (e.g.  data T a =          C a)
+
+and to avoid a reduce/reduce conflict, 'context' and 'constr_stuff' must be
+compatible. And for 'context' to be compatible with 'constr_stuff', it must
+consume 'docprev'.
+
+So, we want 'context'  to consume 'docprev', therefore
+    we want 'btype'    to consume 'docprev', therefore
+    we want 'tyapps'   to consume 'docprev'.
+
+Our requirements end up conflicting: for parsing record types, we want 'tyapps'
+to leave 'docprev' alone, but for parsing constructors, we want it to consume
+'docprev'.
+
+As the result, we maintain two parallel hierarchies of non-terminals that
+either consume 'docprev' or not:
+
+  tyapps      constr_tyapps
+  btype       constr_btype
+  context     constr_context
+  ...
+
+They must be kept identical except for their treatment of 'docprev'.
+
+-}
+
 constr :: { LConDecl GhcPs }
-        : maybe_docnext forall context_no_ops '=>' constr_stuff
+        : maybe_docnext forall constr_context '=>' constr_stuff
                 {% ams (let (con,details,doc_prev) = unLoc $5 in
                   addConDoc (L (comb4 $2 $3 $4 $5) (mkConDeclH98 con
                                                        (snd $ unLoc $2)
@@ -2190,17 +2207,8 @@ forall :: { Located ([AddAnn], Maybe [LHsTyVarBndr GhcPs]) }
         | {- empty -}                 { noLoc ([], Nothing) }
 
 constr_stuff :: { Located (Located RdrName, HsConDeclDetails GhcPs, Maybe LHsDocString) }
-    -- See Note [Parsing data constructors is hard] in RdrHsSyn
-        : btype_no_ops                     {% do { c <- splitCon (unLoc $1)
+        : constr_tyapps                    {% do { c <- mergeDataCon (unLoc $1)
                                                  ; return $ sL1 $1 c } }
-        | btype_no_ops conop maybe_docprev btype_no_ops
-            {% do { lhs <- splitTilde (reverse (unLoc $1))
-                  ; (_, ds_l) <- checkInfixConstr lhs
-                  ; let rhs1 = foldl1 mkHsAppTy (reverse (unLoc $4))
-                  ; (rhs, ds_r) <- checkInfixConstr rhs1
-                  ; return $ if isJust (ds_l `mplus` $3)
-                               then sLL $1 $> ($2, InfixCon lhs rhs1, $3)
-                               else sLL $1 $> ($2, InfixCon lhs rhs, ds_r) } }
 
 fielddecls :: { [LConDeclField GhcPs] }
         : {- empty -}     { [] }
diff --git a/compiler/parser/RdrHsSyn.hs b/compiler/parser/RdrHsSyn.hs
index e4f74d6b73..b43b0456bd 100644
--- a/compiler/parser/RdrHsSyn.hs
+++ b/compiler/parser/RdrHsSyn.hs
@@ -19,7 +19,7 @@ module   RdrHsSyn (
         mkTySynonym, mkTyFamInstEqn,
         mkTyFamInst,
         mkFamDecl, mkLHsSigType,
-        splitCon, mkInlinePragma,
+        mkInlinePragma,
         mkPatSynMatchGroup,
         mkRecConstrOrUpdate, -- HsExp -> [HsFieldUpdate] -> P HsExp
         mkTyClD, mkInstD,
@@ -46,7 +46,6 @@ module   RdrHsSyn (
         checkBlockArguments,
         checkPrecP,           -- Int -> P Int
         checkContext,         -- HsType -> P HsContext
-        checkInfixConstr,
         checkPattern,         -- HsExp -> P HsPat
         bang_RDR,
         checkPatterns,        -- SrcLoc -> [HsExp] -> P [HsPat]
@@ -58,8 +57,7 @@ module   RdrHsSyn (
         checkRecordSyntax,
         checkEmptyGADTs,
         parseErrorSDoc, hintBangPat,
-        splitTilde,
-        TyEl(..), mergeOps,
+        TyEl(..), mergeOps, mergeDataCon,
 
         -- Help with processing exports
         ImpExpSubSpec(..),
@@ -462,91 +460,92 @@ has_args ((L _ (XMatch _)) : _) = panic "has_args"
 
 {- Note [Parsing data constructors is hard]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We parse the RHS of the constructor declaration
-     data T = C t1 t2
-as a btype_no_ops (treating C as a type constructor) and then convert C to be
-a data constructor.  Reason: it might continue like this:
-     data T = C t1 t2 :% D Int
-in which case C really /would/ be a type constructor.  We can't resolve this
-ambiguity till we come across the constructor oprerator :% (or not, more usually)
-
-So the plan is:
-
-* Parse the data constructor declration as a type (actually btype_no_ops)
-
-* Use 'splitCon' to rejig it into the data constructor, the args, and possibly
-  extract a docstring for the constructor
-
-* In doing so, we use 'tyConToDataCon' to convert the RdrName for
-  the data con, which has been parsed as a tycon, back to a datacon.
-  This is more than just adjusting the name space; for operators we
-  need to check that it begins with a colon.  E.g.
-     data T = (+++)
-  will parse ok (since tycons can be operators), but we should reject
-  it (Trac #12051).
-
-'splitCon' takes a reversed list @apps@ of types as input, such that
-@foldl1 mkHsAppTy (reverse apps)@ yields the original type. This is because
-this is easy for the parser to produce and we avoid the overhead of unrolling
-'HsAppTy'.
+
+The problem with parsing data constructors is that they look a lot like types.
+Compare:
+
+  (s1)   data T = C t1 t2
+  (s2)   type T = C t1 t2
+
+Syntactically, there's little difference between these declarations, except in
+(s1) 'C' is a data constructor, but in (s2) 'C' is a type constructor.
+
+This similarity would pose no problem if we knew ahead of time if we are
+parsing a type or a constructor declaration. Looking at (s1) and (s2), a simple
+(but wrong!) rule comes to mind: in 'data' declarations assume we are parsing
+data constructors, and in other contexts (e.g. 'type' declarations) assume we
+are parsing type constructors.
+
+This simple rule does not work because of two problematic cases:
+
+  (p1)   data T = C t1 t2 :+ t3
+  (p2)   data T = C t1 t2 => t3
+
+In (p1) we encounter (:+) and it turns out we are parsing an infix data
+declaration, so (C t1 t2) is a type and 'C' is a type constructor.
+In (p2) we encounter (=>) and it turns out we are parsing an existential
+context, so (C t1 t2) is a constraint and 'C' is a type constructor.
+
+As the result, in order to determine whether (C t1 t2) declares a data
+constructor, a type, or a context, we would need unlimited lookahead which
+'happy' is not so happy with.
+
+To further complicate matters, the interpretation of (!) and (~) is different
+in constructors and types:
+
+  (b1)   type T = C ! D
+  (b2)   data T = C ! D
+  (b3)   data T = C ! D => E
+
+In (b1) and (b3), (!) is a type operator with two arguments: 'C' and 'D'. At
+the same time, in (b2) it is a strictness annotation: 'C' is a data constructor
+with a single strict argument 'D'. For the programmer, these cases are usually
+easy to tell apart due to whitespace conventions:
+
+  (b2)   data T = C !D         -- no space after the bang hints that
+                               -- it is a strictness annotation
+
+For the parser, on the other hand, this whitespace does not matter. We cannot
+tell apart (b2) from (b3) until we encounter (=>), so it requires unlimited
+lookahead.
+
+The solution that accounts for all of these issues is to initially parse data
+declarations and types as a reversed list of TyEl:
+
+  data TyEl = TyElOpr RdrName
+            | TyElOpd (HsType GhcPs)
+            | TyElBang | TyElTilde
+            | ...
+
+For example, both occurences of (C ! D) in the following example are parsed
+into equal lists of TyEl:
+
+  data T = C ! D => C ! D   results in   [ TyElOpd (HsTyVar "D")
+                                         , TyElBang
+                                         , TyElOpd (HsTyVar "C") ]
+
+Note that elements are in reverse order. Also, 'C' is parsed as a type
+constructor (HsTyVar) even when it is a data constructor. We fix this in
+`tyConToDataCon`.
+
+By the time the list of TyEl is assembled, we have looked ahead enough to
+decide whether to reduce using `mergeOps` (for types) or `mergeDataCon` (for
+data constructors). These functions are where the actual job of parsing is
+done.
 
 -}
 
-splitCon :: [LHsType GhcPs]
-      -> P ( Located RdrName         -- constructor name
-           , HsConDeclDetails GhcPs  -- constructor field information
-           , Maybe LHsDocString      -- docstring to go on the constructor
-           )
+-- | Reinterpret a type constructor, including type operators, as a data
+--   constructor.
 -- See Note [Parsing data constructors is hard]
--- This gets given a "type" that should look like
---      C Int Bool
--- or   C { x::Int, y::Bool }
--- and returns the pieces
-splitCon apps
- = split apps' []
- where
-   oneDoc = [ () | L _ (HsDocTy{}) <- apps ] `lengthIs` 1
-   ty = foldl1 mkHsAppTy (reverse apps)
-
-   -- the trailing doc, if any, can be extracted first
-   (apps', trailing_doc)
-     = case apps of
-         L _ (HsDocTy _ t ds) : ts | oneDoc -> (t : ts, Just ds)
-         ts -> (ts, Nothing)
-
-   -- A comment on the constructor is handled a bit differently - it doesn't
-   -- remain an 'HsDocTy', but gets lifted out and returned as the third
-   -- element of the tuple.
-   split [ L _ (HsDocTy _ con con_doc) ] ts = do
-     (data_con, con_details, con_doc') <- split [con] ts
-     return (data_con, con_details, con_doc' `mplus` Just con_doc)
-   split [ L l (HsTyVar _ _ (L _ tc)) ] ts = do
-     data_con <- tyConToDataCon l tc
-     return (data_con, mk_rest ts, trailing_doc)
-   split [ L l (HsTupleTy _ HsBoxedOrConstraintTuple ts) ] []
-     = return ( L l (getRdrName (tupleDataCon Boxed (length ts)))
-              , PrefixCon ts
-              , trailing_doc
-              )
-   split [ L l _ ] _ = parseErrorSDoc l (text msg <+> ppr ty)
-     where msg = "Cannot parse data constructor in a data/newtype declaration:"
-   split (u : us) ts = split us (u : ts)
-   split _ _ = panic "RdrHsSyn:splitCon"
-
-   mk_rest [L _ (HsDocTy _ t@(L _ HsRecTy{}) _)] = mk_rest [t]
-   mk_rest [L l (HsRecTy _ flds)] = RecCon (L l flds)
-   mk_rest ts                     = PrefixCon ts
-
-tyConToDataCon :: SrcSpan -> RdrName -> P (Located RdrName)
--- See Note [Parsing data constructors is hard]
--- Data constructor RHSs are parsed as types
+tyConToDataCon :: SrcSpan -> RdrName -> Either (SrcSpan, SDoc) (Located RdrName)
 tyConToDataCon loc tc
-  | isTcOcc occ
+  | isTcOcc occ || isDataOcc occ
   , isLexCon (occNameFS occ)
   = return (L loc (setRdrNameSpace tc srcDataName))
 
   | otherwise
-  = parseErrorSDoc loc (msg $$ extra)
+  = Left (loc, msg $$ extra)
   where
     occ = rdrNameOcc tc
 
@@ -555,22 +554,6 @@ tyConToDataCon loc tc
           = text "Perhaps you intended to use ExistentialQuantification"
           | otherwise = empty
 
--- | Split a type to extract the trailing doc string (if there is one) from a
--- type produced by the 'btype_no_ops' production.
-splitDocTy :: LHsType GhcPs -> (LHsType GhcPs, Maybe LHsDocString)
-splitDocTy (L l (HsAppTy x t1 t2)) = (L l (HsAppTy x t1 t2'), ds)
-  where ~(t2', ds) = splitDocTy t2
-splitDocTy (L _ (HsDocTy _ ty ds)) = (ty, Just ds)
-splitDocTy ty = (ty, Nothing)
-
--- | Given a type that is a field to an infix data constructor, try to split
--- off a trailing docstring on the type, and check that there are no other
--- docstrings.
-checkInfixConstr :: LHsType GhcPs -> P (LHsType GhcPs, Maybe LHsDocString)
-checkInfixConstr ty = checkNoDocs msg ty' *> pure (ty', doc_string)
-  where (ty', doc_string) = splitDocTy ty
-        msg = text "infix constructor field"
-
 mkPatSynMatchGroup :: Located RdrName
                    -> Located (OrdList (LHsDecl GhcPs))
                    -> P (MatchGroup GhcPs (LHsExpr GhcPs))
@@ -1235,6 +1218,7 @@ splitBang (L _ (OpApp _ l_arg bang@(L _ (HsVar _ (L _ op))) r_arg))
     split_bang e                   es = (e,es)
 splitBang _ = Nothing
 
+-- See Note [isFunLhs vs mergeDataCon]
 isFunLhs :: LHsExpr GhcPs
       -> P (Maybe (Located RdrName, LexicalFixity, [LHsExpr GhcPs],[AddAnn]))
 -- A variable binding is parsed as a FunBind.
@@ -1295,38 +1279,64 @@ isFunLhs e = go e [] []
                  _ -> return Nothing }
    go _ _ _ = return Nothing
 
--- | Transform a list of 'atype' with 'strict_mark' into
--- HsOpTy's of 'eqTyCon_RDR':
---
---   [~a, ~b, c, ~d] ==> (~a) ~ ((b c) ~ d)
---
--- See Note [Parsing ~]
-splitTilde :: [LHsType GhcPs] -> P (LHsType GhcPs)
-splitTilde [] = panic "splitTilde"
-splitTilde (x:xs) = go x xs
-  where
-    -- We accumulate applications in the LHS until we encounter a laziness
-    -- annotation. For example, if we have [Foo, x, y, ~Bar, z], the 'lhs'
-    -- accumulator will become '(Foo x) y'. Then we strip the laziness
-    -- annotation off 'Bar' and process the tail [Bar, z] recursively.
-    --
-    -- This leaves us with 'lhs = (Foo x) y' and 'rhs = Bar z'.
-    -- In case the tail contained more laziness annotations, they would be
-    -- processed similarly. This makes '~' right-associative.
-    go lhs [] = return lhs
-    go lhs (x:xs)
-      | L loc (HsBangTy _ (HsSrcBang NoSourceText NoSrcUnpack SrcLazy) t) <- x
-      = do { rhs <- splitTilde (t:xs)
-           ; let r = mkLHsOpTy lhs (tildeOp loc) rhs
-           ; moveAnnotations loc (getLoc r)
-           ; return r }
-      | otherwise
-      = go (mkHsAppTy lhs x) xs
-
-    tildeOp loc = L (srcSpanFirstCharacter loc) eqTyCon_RDR
-
 -- | Either an operator or an operand.
 data TyEl = TyElOpr RdrName | TyElOpd (HsType GhcPs)
+          | TyElTilde | TyElBang
+          | TyElUnpackedness ([AddAnn], SourceText, SrcUnpackedness)
+          | TyElDocPrev HsDocString
+
+instance Outputable TyEl where
+  ppr (TyElOpr name) = ppr name
+  ppr (TyElOpd ty) = ppr ty
+  ppr TyElTilde = text "~"
+  ppr TyElBang = text "!"
+  ppr (TyElUnpackedness (_, _, unpk)) = ppr unpk
+  ppr (TyElDocPrev doc) = ppr doc
+
+tyElStrictness :: TyEl -> Maybe (AnnKeywordId, SrcStrictness)
+tyElStrictness TyElTilde = Just (AnnTilde, SrcLazy)
+tyElStrictness TyElBang = Just (AnnBang, SrcStrict)
+tyElStrictness _ = Nothing
+
+-- | Extract a strictness/unpackedness annotation from the front of a reversed
+-- 'TyEl' list.
+pStrictMark
+  :: [Located TyEl] -- reversed TyEl
+  -> Maybe ( Located HsSrcBang {- a strictness/upnackedness marker -}
+           , [AddAnn]
+           , [Located TyEl] {- remaining TyEl -})
+pStrictMark (L l1 x1 : L l2 x2 : xs)
+  | Just (strAnnId, str) <- tyElStrictness x1
+  , TyElUnpackedness (unpkAnns, prag, unpk) <- x2
+  = Just ( L (combineSrcSpans l1 l2) (HsSrcBang prag unpk str)
+         , unpkAnns ++ [\s -> addAnnotation s strAnnId l1]
+         , xs )
+pStrictMark (L l x1 : xs)
+  | Just (strAnnId, str) <- tyElStrictness x1
+  = Just ( L l (HsSrcBang NoSourceText NoSrcUnpack str)
+         , [\s -> addAnnotation s strAnnId l]
+         , xs )
+pStrictMark (L l x1 : xs)
+  | TyElUnpackedness (anns, prag, unpk) <- x1
+  = Just ( L l (HsSrcBang prag unpk NoSrcStrict)
+         , anns
+         , xs )
+pStrictMark _ = Nothing
+
+pBangTy
+  :: LHsType GhcPs  -- a type to be wrapped inside HsBangTy
+  -> [Located TyEl] -- reversed TyEl
+  -> ( Bool           {- has a strict mark been consumed? -}
+     , LHsType GhcPs  {- the resulting BangTy -}
+     , P ()           {- add annotations -}
+     , [Located TyEl] {- remaining TyEl -})
+pBangTy lt@(L l1 _) xs =
+  case pStrictMark xs of
+    Nothing -> (False, lt, pure (), xs)
+    Just (L l2 strictMark, anns, xs') ->
+      let bl = combineSrcSpans l1 l2
+          bt = HsBangTy noExt strictMark lt
+      in (True, L bl bt, addAnnsAt bl anns, xs')
 
 -- | Merge a /reversed/ and /non-empty/ soup of operators and operands
 --   into a type.
@@ -1338,22 +1348,71 @@ data TyEl = TyElOpr RdrName | TyElOpd (HsType GhcPs)
 --
 -- It's a bit silly that we're doing it at all, as the renamer will have to
 -- rearrange this, and it'd be easier to keep things separate.
+--
+-- See Note [Parsing data constructors is hard]
 mergeOps :: [Located TyEl] -> P (LHsType GhcPs)
-mergeOps = go [] id
+mergeOps (L l1 (TyElOpd t) : xs)
+  | (_, t', addAnns, xs') <- pBangTy (L l1 t) xs
+  , null xs' -- We accept a BangTy only when there are no preceding TyEl.
+  = addAnns >> return t'
+mergeOps all_xs = go (0 :: Int) [] id all_xs
   where
+    -- clause (err.1):
+    -- we do not expect to encounter any (NO)UNPACK pragmas
+    go k acc ops_acc (L l (TyElUnpackedness (_, unpkSrc, unpk)):_) =
+      if not (null acc) && (k > 1 || length acc > 1)
+      then failOpUnpackednessCompound (L l unpkSDoc) (ops_acc (mergeAcc acc))
+      else failOpUnpackednessPosition (L l unpkSDoc)
+      where
+        unpkSDoc = case unpkSrc of
+          NoSourceText -> ppr unpk
+          SourceText str -> text str <> text " #-}"
+
+    -- clause (err.2):
+    -- we do not expect to encounter any docs
+    go _ _ _ (L l (TyElDocPrev _):_) =
+      failOpDocPrev l
+
+    -- clause (err.3):
+    -- to improve error messages, we do a bit of guesswork to determine if the
+    -- user intended a '!' or a '~' as a strictness annotation
+    go k acc ops_acc (L l x : xs)
+      | Just (_, str) <- tyElStrictness x
+      , let guess [] = True
+            guess (L _ (TyElOpd _):_) = False
+            guess (L _ (TyElOpr _):_) = True
+            guess (L _ (TyElTilde):_) = True
+            guess (L _ (TyElBang):_) = True
+            guess (L _ (TyElUnpackedness _):_) = True
+            guess (L _ (TyElDocPrev _):xs') = guess xs'
+        in guess xs
+      = if not (null acc) && (k > 1 || length acc > 1)
+        then failOpStrictnessCompound (L l str) (ops_acc (mergeAcc acc))
+        else failOpStrictnessPosition (L l str)
+
     -- clause (a):
     -- when we encounter an operator, we must have accumulated
     -- something for its rhs, and there must be something left
     -- to build its lhs.
-    go acc ops_acc (L l (TyElOpr op):xs) =
+    go k acc ops_acc (L l (TyElOpr op):xs) =
       if null acc || null xs
         then failOpFewArgs (L l op)
-        else do { a <- splitTilde acc
-                ; go [] (\c -> mkLHsOpTy c (L l op) (ops_acc a)) xs }
+        else do { let a = mergeAcc acc
+                ; go (k + 1) [] (\c -> mkLHsOpTy c (L l op) (ops_acc a)) xs }
+
+    -- clause (a.1): interpret 'TyElTilde' as an operator
+    go k acc ops_acc (L l TyElTilde:xs) =
+      let op = eqTyCon_RDR
+      in go k acc ops_acc (L l (TyElOpr op):xs)
+
+    -- clause (a.2): interpret 'TyElBang' as an operator
+    go k acc ops_acc (L l TyElBang:xs) =
+      let op = mkUnqual tcClsName (fsLit "!")
+      in go k acc ops_acc (L l (TyElOpr op):xs)
 
     -- clause (b):
     -- whenever an operand is encountered, it is added to the accumulator
-    go acc ops_acc (L l (TyElOpd a):xs) = go (L l a:acc) ops_acc xs
+    go k acc ops_acc (L l (TyElOpd a):xs) = go k (L l a:acc) ops_acc xs
 
     -- clause (c):
     -- at this point we know that 'acc' is non-empty because
@@ -1364,9 +1423,211 @@ mergeOps = go [] id
     --    operator, this is handled by clause (a)
     -- 3. 'mergeOps' was called with a list where the head is an
     --    operand, this is handled by clause (b)
-    go acc ops_acc [] =
-      do { a <- splitTilde acc
-         ; return (ops_acc a) }
+    go _ acc ops_acc [] =
+      return (ops_acc (mergeAcc acc))
+
+    mergeAcc [] = panic "mergeOps.mergeAcc: empty input"
+    mergeAcc (x:xs) = mkHsAppTys x xs
+
+pInfixSide :: [Located TyEl] -> Maybe (LHsType GhcPs, P (), [Located TyEl])
+pInfixSide (L l (TyElOpd t):xs)
+  | (True, t', addAnns, xs') <- pBangTy (L l t) xs
+  = Just (t', addAnns, xs')
+pInfixSide (L l1 (TyElOpd t1):xs1) = go [L l1 t1] xs1
+  where
+    go acc (L l (TyElOpd t):xs) = go (L l t:acc) xs
+    go acc xs = Just (mergeAcc acc, pure (), xs)
+    mergeAcc [] = panic "pInfixSide.mergeAcc: empty input"
+    mergeAcc (x:xs) = mkHsAppTys x xs
+pInfixSide _ = Nothing
+
+pDocPrev :: [Located TyEl] -> (Maybe LHsDocString, [Located TyEl])
+pDocPrev = go Nothing
+  where
+    go mTrailingDoc (L l (TyElDocPrev doc):xs) =
+      go (mTrailingDoc `mplus` Just (L l doc)) xs
+    go mTrailingDoc xs = (mTrailingDoc, xs)
+
+orErr :: Maybe a -> b -> Either b a
+orErr (Just a) _ = Right a
+orErr Nothing b = Left b
+
+{- Note [isFunLhs vs mergeDataCon]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When parsing a function LHS, we do not know whether to treat (!) as
+a strictness annotation or an infix operator:
+
+  f ! a = ...
+
+Without -XBangPatterns, this parses as   (!) f a = ...
+   with -XBangPatterns, this parses as   f (!a) = ...
+
+So in function declarations we opted to always parse as if -XBangPatterns
+were off, and then rejig in 'isFunLhs'.
+
+There are two downsides to this approach:
+
+1. It is not particularly elegant, as there's a point in our pipeline where
+   the representation is awfully incorrect. For instance,
+      f !a b !c = ...
+   will be first parsed as
+      (f ! a b) ! c = ...
+
+2. There are cases that it fails to cover, for instance infix declarations:
+      !a + !b = ...
+   will trigger an error.
+
+Unfortunately, we cannot define different productions in the 'happy' grammar
+depending on whether -XBangPatterns are enabled.
+
+When parsing data constructors, we face a similar issue:
+  (a) data T1 = C ! D
+  (b) data T2 = C ! D => ...
+
+In (a) the first bang is a strictness annotation, but in (b) it is a type
+operator. A 'happy'-based parser does not have unlimited lookahead to check for
+=>, so we must first parse (C ! D) into a common representation.
+
+If we tried to mirror the approach used in functions, we would parse both sides
+of => as types, and then rejig. However, we take a different route and use an
+intermediate data structure, a reversed list of 'TyEl'.
+See Note [Parsing data constructors is hard] for details.
+
+This approach does not suffer from the issues of 'isFunLhs':
+
+1. A sequence of 'TyEl' is a dedicated intermediate representation, not an
+   incorrectly parsed type. Therefore, we do not have confusing states in our
+   pipeline. (Except for representing data constructors as type variables).
+
+2. We can handle infix data constructors with strictness annotations:
+    data T a b = !a :+ !b
+
+-}
+
+
+-- | Merge a /reversed/ and /non-empty/ soup of operators and operands
+--   into a data constructor.
+--
+-- User input: @C !A B -- ^ doc@
+-- Input to 'mergeDataCon': ["doc", B, !, A, C]
+-- Output: (C, PrefixCon [!A, B], "doc")
+--
+-- See Note [Parsing data constructors is hard]
+-- See Note [isFunLhs vs mergeDataCon]
+mergeDataCon
+      :: [Located TyEl]
+      -> P ( Located RdrName         -- constructor name
+           , HsConDeclDetails GhcPs  -- constructor field information
+           , Maybe LHsDocString      -- docstring to go on the constructor
+           )
+mergeDataCon all_xs =
+  do { (addAnns, a) <- eitherToP res
+     ; addAnns
+     ; return a }
+  where
+    -- We start by splitting off the trailing documentation comment,
+    -- if any exists.
+    (mTrailingDoc, all_xs') = pDocPrev all_xs
+
+    -- Determine whether the trailing documentation comment exists and is the
+    -- only docstring in this constructor declaration.
+    --
+    -- When true, it means that it applies to the constructor itself:
+    --    data T = C
+    --             A
+    --             B -- ^ Comment on C (singleDoc == True)
+    --
+    -- When false, it means that it applies to the last field:
+    --    data T = C -- ^ Comment on C
+    --             A -- ^ Comment on A
+    --             B -- ^ Comment on B (singleDoc == False)
+    singleDoc = isJust mTrailingDoc &&
+                null [ () | L _ (TyElDocPrev _) <- all_xs' ]
+
+    -- The result of merging the list of reversed TyEl into a
+    -- data constructor, along with [AddAnn].
+    res = goFirst all_xs'
+
+    -- Take the trailing docstring into account when interpreting
+    -- the docstring near the constructor.
+    --
+    --    data T = C -- ^ docstring right after C
+    --             A
+    --             B -- ^ trailing docstring
+    --
+    -- 'mkConDoc' must be applied to the docstring right after C, so that it
+    -- falls back to the trailing docstring when appropriate (see singleDoc).
+    mkConDoc mDoc | singleDoc = mDoc `mplus` mTrailingDoc
+                  | otherwise = mDoc
+
+    -- The docstring for the last field of a data constructor.
+    trailingFieldDoc | singleDoc = Nothing
+                     | otherwise = mTrailingDoc
+
+    goFirst [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ]
+      = do { data_con <- tyConToDataCon l tc
+           ; return (pure (), (data_con, PrefixCon [], mTrailingDoc)) }
+    goFirst (L l (TyElOpd (HsRecTy _ fields)):xs)
+      | (mConDoc, xs') <- pDocPrev xs
+      , [ L l' (TyElOpd (HsTyVar _ _ (L _ tc))) ] <- xs'
+      = do { data_con <- tyConToDataCon l' tc
+           ; let mDoc = mTrailingDoc `mplus` mConDoc
+           ; return (pure (), (data_con, RecCon (L l fields), mDoc)) }
+    goFirst [L l (TyElOpd (HsTupleTy _ HsBoxedOrConstraintTuple ts))]
+      = return ( pure ()
+               , ( L l (getRdrName (tupleDataCon Boxed (length ts)))
+                 , PrefixCon ts
+                 , mTrailingDoc ) )
+    goFirst (L l (TyElOpd t):xs)
+      | (_, t', addAnns, xs') <- pBangTy (L l t) xs
+      = go addAnns Nothing [mkLHsDocTyMaybe t' trailingFieldDoc] xs'
+    goFirst xs =
+      go (pure ()) mTrailingDoc [] xs
+
+    go addAnns mLastDoc ts [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ]
+      = do { data_con <- tyConToDataCon l tc
+           ; return (addAnns, (data_con, PrefixCon ts, mkConDoc mLastDoc)) }
+    go addAnns mLastDoc ts (L l (TyElDocPrev doc):xs) =
+      go addAnns (mLastDoc `mplus` Just (L l doc)) ts xs
+    go addAnns mLastDoc ts (L l (TyElOpd t):xs)
+      | (_, t', addAnns', xs') <- pBangTy (L l t) xs
+      , t'' <- mkLHsDocTyMaybe t' mLastDoc
+      = go (addAnns >> addAnns') Nothing (t'':ts) xs'
+    go _ _ _ (L _ (TyElOpr _):_) =
+      -- Encountered an operator: backtrack to the beginning and attempt
+      -- to parse as an infix definition.
+      goInfix
+    go _ _ _ _ = Left malformedErr
+      where
+        malformedErr =
+          ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs')
+          , text "Cannot parse data constructor" <+>
+            text "in a data/newtype declaration:" $$
+            nest 2 (hsep . reverse $ map ppr all_xs'))
+
+    goInfix =
+      do { let xs0 = all_xs'
+         ; (rhs_t, rhs_addAnns, xs1) <- pInfixSide xs0 `orErr` malformedErr
+         ; let (mOpDoc, xs2) = pDocPrev xs1
+         ; (op, xs3) <- case xs2 of
+              L l (TyElOpr op) : xs3 ->
+                do { data_con <- tyConToDataCon l op
+                   ; return (data_con, xs3) }
+              _ -> Left malformedErr
+         ; let (mLhsDoc, xs4) = pDocPrev xs3
+         ; (lhs_t, lhs_addAnns, xs5) <- pInfixSide xs4 `orErr` malformedErr
+         ; unless (null xs5) (Left malformedErr)
+         ; let rhs = mkLHsDocTyMaybe rhs_t trailingFieldDoc
+               lhs = mkLHsDocTyMaybe lhs_t mLhsDoc
+               addAnns = lhs_addAnns >> rhs_addAnns
+         ; return (addAnns, (op, InfixCon lhs rhs, mkConDoc mOpDoc)) }
+      where
+        malformedErr =
+          ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs')
+          , text "Cannot parse an infix data constructor" <+>
+            text "in a data/newtype declaration:" $$
+            nest 2 (hsep . reverse $ map ppr all_xs'))
 
 ---------------------------------------------------------------------------
 -- Check for monad comprehensions
@@ -1785,6 +2046,35 @@ failOpFewArgs (L loc op) =
   where
     too_few = text "Operator applied to too few arguments:" <+> ppr op
 
+failOpDocPrev :: SrcSpan -> P a
+failOpDocPrev loc = parseErrorSDoc loc msg
+  where
+    msg = text "Unexpected documentation comment."
+
+failOpStrictnessCompound :: Located SrcStrictness -> LHsType GhcPs -> P a
+failOpStrictnessCompound (L _ str) (L loc ty) = parseErrorSDoc loc msg
+  where
+    msg = text "Strictness annotation applied to a compound type." $$
+          text "Did you mean to add parentheses?" $$
+          nest 2 (ppr str <> parens (ppr ty))
+
+failOpStrictnessPosition :: Located SrcStrictness -> P a
+failOpStrictnessPosition (L loc _) = parseErrorSDoc loc msg
+  where
+    msg = text "Strictness annotation cannot appear in this position."
+
+failOpUnpackednessCompound :: Located SDoc -> LHsType GhcPs -> P a
+failOpUnpackednessCompound (L _ unpkSDoc) (L loc ty) = parseErrorSDoc loc msg
+  where
+    msg = unpkSDoc <+> text "applied to a compound type." $$
+          text "Did you mean to add parentheses?" $$
+          nest 2 (unpkSDoc <+> parens (ppr ty))
+
+failOpUnpackednessPosition :: Located SDoc -> P a
+failOpUnpackednessPosition (L loc unpkSDoc) = parseErrorSDoc loc msg
+  where
+    msg = unpkSDoc <+> text "cannot appear in this position."
+
 -----------------------------------------------------------------------------
 -- Misc utils
 
@@ -1824,3 +2114,11 @@ mkLHsOpTy :: LHsType GhcPs -> Located RdrName -> LHsType GhcPs -> LHsType GhcPs
 mkLHsOpTy x op y =
   let loc = getLoc x `combineSrcSpans` getLoc op `combineSrcSpans` getLoc y
   in L loc (mkHsOpTy x op y)
+
+mkLHsDocTy :: LHsType GhcPs -> LHsDocString -> LHsType GhcPs
+mkLHsDocTy t doc =
+  let loc = getLoc t `combineSrcSpans` getLoc doc
+  in L loc (HsDocTy noExt t doc)
+
+mkLHsDocTyMaybe :: LHsType GhcPs -> Maybe LHsDocString -> LHsType GhcPs
+mkLHsDocTyMaybe t = maybe t (mkLHsDocTy t)