EPA: update some comments in Annotations.

Follow-up from !2418, see #19579
author: Alan Zimmerman <alan.zimm@gmail.com> 2021-04-27 21:04:07 +0100
committer: Alan Zimmerman <alan.zimm@gmail.com> 2021-05-08 11:19:26 +0100
commit: ad5a3aeb1d3094acbc17f07a2cc8388676d59dd1 (patch)
tree: f25cc604e0e4e0a5328e2302ec0a2275a7c83e20
parent: a32eb0f3d5037b0c6fefa38ec19ff8c22076d102 (diff)
download: haskell-ad5a3aeb1d3094acbc17f07a2cc8388676d59dd1.tar.gz
1 files changed, 82 insertions, 119 deletions
diff --git a/compiler/GHC/Parser/Annotation.hs b/compiler/GHC/Parser/Annotation.hs
index 8dc12555a0..986ee8c197 100644
--- a/compiler/GHC/Parser/Annotation.hs
+++ b/compiler/GHC/Parser/Annotation.hs
@@ -28,8 +28,8 @@ module GHC.Parser.Annotation (
 
   -- ** Annotations in 'GenLocated'
   LocatedA, LocatedL, LocatedC, LocatedN, LocatedAn, LocatedP,
-  SrcSpanAnnA, SrcSpanAnnL, SrcSpanAnnP, SrcSpanAnnC, SrcSpanAnnN, SrcSpanAnn'(..),
-  SrcAnn,
+  SrcSpanAnnA, SrcSpanAnnL, SrcSpanAnnP, SrcSpanAnnC, SrcSpanAnnN,
+  SrcSpanAnn'(..), SrcAnn,
 
   -- ** Annotation data types used in 'GenLocated'
 
@@ -69,7 +69,8 @@ module GHC.Parser.Annotation (
   combineSrcSpansA,
   addCLocA, addCLocAA,
 
-  -- ** Constructing 'GenLocated' annotation types when we do not care about annotations.
+  -- ** Constructing 'GenLocated' annotation types when we do not care
+  -- about annotations.
   noLocA, getLocA,
   noSrcSpanA,
   noAnnSrcSpan,
@@ -184,11 +185,8 @@ https://gitlab.haskell.org/ghc/ghc/wikis/api-annotations
 
 -- | Exact print annotations exist so that tools can perform source to
 -- source conversions of Haskell code. They are used to keep track of
--- the various syntactic keywords that are not captured in the
--- existing AST.
---
--- The annotations, together with original source comments are made available in
--- the @'pm_parsed_source@ field of @'GHC.Driver.Env.HsParsedModule'@.
+-- the various syntactic keywords that are not otherwise captured in the
+-- AST.
 --
 -- The wiki page describing this feature is
 -- https://gitlab.haskell.org/ghc/ghc/wikis/api-annotations
@@ -311,49 +309,6 @@ data AnnKeywordId
 instance Outputable AnnKeywordId where
   ppr x = text (show x)
 
--- ---------------------------------------------------------------------
-
-data EpaComment =
-  EpaComment
-    { ac_tok :: EpaCommentTok
-    , ac_prior_tok :: RealSrcSpan
-    -- ^ The location of the prior token, used in exact printing.  The
-    -- 'EpaComment' appears as an 'LEpaComment' containing its
-    -- location.  The difference between the end of the prior token
-    -- and the start of this location is used for the spacing when
-    -- exact printing the comment.
-    }
-    deriving (Eq, Ord, Data, Show)
-
-data EpaCommentTok =
-  -- Documentation annotations
-    EpaDocCommentNext  String     -- ^ something beginning '-- |'
-  | EpaDocCommentPrev  String     -- ^ something beginning '-- ^'
-  | EpaDocCommentNamed String     -- ^ something beginning '-- $'
-  | EpaDocSection      Int String -- ^ a section heading
-  | EpaDocOptions      String     -- ^ doc options (prune, ignore-exports, etc)
-  | EpaLineComment     String     -- ^ comment starting by "--"
-  | EpaBlockComment    String     -- ^ comment in {- -}
-  | EpaEofComment                 -- ^ empty comment, capturing
-                                  -- location of EOF
-
-  -- See #19697 for a discussion of its use and how it should be
-  -- removed in favour of capturing it in the location for
-  -- 'Located HsModule' in the parser.
-
-    deriving (Eq, Ord, Data, Show)
--- Note: these are based on the Token versions, but the Token type is
--- defined in GHC.Parser.Lexer and bringing it in here would create a loop
-
-instance Outputable EpaComment where
-  ppr x = text (show x)
-
--- | - 'GHC.Parser.Annotation.AnnKeywordId' : 'GHC.Parser.Annotation.AnnOpen',
---             'GHC.Parser.Annotation.AnnClose','GHC.Parser.Annotation.AnnComma',
---             'GHC.Parser.Annotation.AnnRarrow'
---             'GHC.Parser.Annotation.AnnTilde'
---   - May have 'GHC.Parser.Annotation.AnnComma' when in a list
-
 -- | Certain tokens can have alternate representations when unicode syntax is
 -- enabled. This flag is attached to those tokens in the lexer so that the
 -- original source representation can be reproduced in the corresponding
@@ -391,6 +346,43 @@ data HasE = HasE | NoE
 
 -- ---------------------------------------------------------------------
 
+data EpaComment =
+  EpaComment
+    { ac_tok :: EpaCommentTok
+    , ac_prior_tok :: RealSrcSpan
+    -- ^ The location of the prior token, used in exact printing.  The
+    -- 'EpaComment' appears as an 'LEpaComment' containing its
+    -- location.  The difference between the end of the prior token
+    -- and the start of this location is used for the spacing when
+    -- exact printing the comment.
+    }
+    deriving (Eq, Ord, Data, Show)
+
+data EpaCommentTok =
+  -- Documentation annotations
+    EpaDocCommentNext  String     -- ^ something beginning '-- |'
+  | EpaDocCommentPrev  String     -- ^ something beginning '-- ^'
+  | EpaDocCommentNamed String     -- ^ something beginning '-- $'
+  | EpaDocSection      Int String -- ^ a section heading
+  | EpaDocOptions      String     -- ^ doc options (prune, ignore-exports, etc)
+  | EpaLineComment     String     -- ^ comment starting by "--"
+  | EpaBlockComment    String     -- ^ comment in {- -}
+  | EpaEofComment                 -- ^ empty comment, capturing
+                                  -- location of EOF
+
+  -- See #19697 for a discussion of EpaEofComment's use and how it
+  -- should be removed in favour of capturing it in the location for
+  -- 'Located HsModule' in the parser.
+
+    deriving (Eq, Ord, Data, Show)
+-- Note: these are based on the Token versions, but the Token type is
+-- defined in GHC.Parser.Lexer and bringing it in here would create a loop
+
+instance Outputable EpaComment where
+  ppr x = text (show x)
+
+-- ---------------------------------------------------------------------
+
 -- | Captures an annotation, storing the @'AnnKeywordId'@ and its
 -- location.  The parser only ever inserts @'EpaLocation'@ fields with a
 -- RealSrcSpan being the original location of the annotation in the
@@ -412,12 +404,16 @@ data EpaLocation = EpaSpan RealSrcSpan
                  | EpaDelta DeltaPos
                deriving (Data,Show,Eq,Ord)
 
--- | Relative position, line then column.  If 'deltaLine' is zero then
--- 'deltaColumn' gives the number of spaces between the end of the
--- preceding output element and the start of the one this is attached
--- to, on the same line.  If 'deltaLine' is > 0, then it is the number
--- of lines to advance, and 'deltaColumn' is the start column on the
--- new line.
+-- | Spacing between output items when exact printing.  It captures
+-- the spacing from the current print position on the page to the
+-- position required for the thing about to be printed.  This is
+-- either on the same line in which case is is simply the number of
+-- spaces to emit, or it is some number of lines down, with a given
+-- column offset.  The exact printing algorithm keeps track of the
+-- column offset pertaining to the current anchor position, so the
+-- `deltaColumn` is the additional spaces to add in this case.  See
+-- https://gitlab.haskell.org/ghc/ghc/wikis/api-annotations for
+-- details.
 data DeltaPos
   = SameLine { deltaColumn :: !Int }
   | DifferentLine
@@ -425,6 +421,8 @@ data DeltaPos
         deltaColumn :: !Int
       } deriving (Show,Eq,Ord,Data)
 
+-- | Smart constructor for a 'DeltaPos'. It preserves the invariant
+-- that for the 'DifferentLine' constructor 'deltaLine' is always > 0.
 deltaPos :: Int -> Int -> DeltaPos
 deltaPos l c = case l of
   0 -> SameLine c
@@ -450,40 +448,10 @@ instance Outputable AddEpAnn where
 
 -- ---------------------------------------------------------------------
 
-{-
-Note [In-tree Api annotations]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-GHC 7.10 brought in the concept of API Annotations,
-https://gitlab.haskell.org/ghc/ghc/-/wikis/api-annotations:
-
-  The hsSyn AST does not directly capture the locations of certain
-  keywords and punctuation, such as 'let', 'in', 'do', etc.
-
-  These locations are required by any tools wanting to parse a haskell
-  file, transform the AST in some way, and then regenerate the
-  original layout for the unchaged parts."
-
-These were returned in a separate data structure, linked to the main
-AST via a combination of SrcSpan and constructor name.
-
-This indirect linkage kept the AST uncluttered, but made working with
-the annotations complex, as two separate data structures had to be
-changed at the same time in a coherent way.
-
-From GHC 9.2.1, these annotations are captured directly in the AST,
-using the types in this file, and the Trees That Grow (TTG) extension
-points for GhcPs.
-
-See https://gitlab.haskell.org/ghc/ghc/wikis/api-annotations
-
-See Note [XRec and Anno in the AST] for details of how this is done.
--}
-
--- | The API Annotations are now kept in the HsSyn AST for the GhcPs
---   phase. We do not always have API Annotations though, only for
---   parsed code. This type captures that, and allows the
---   representation decision to be easily revisited as it evolves.
+-- | The exact print annotations (EPAs) are kept in the HsSyn AST for
+--   the GhcPs phase. We do not always have EPAs though, only for code
+--   that has been parsed as they do not exist for generated
+--   code. This type captures that they may be missing.
 --
 -- A goal of the annotations is that an AST can be edited, including
 -- moving subtrees from one place to another, duplicating them, and so
@@ -496,8 +464,8 @@ See Note [XRec and Anno in the AST] for details of how this is done.
 -- fragment are also captured here.
 --
 -- The 'ann' type parameter allows this general structure to be
--- specialised to the specific set of locations of original API
--- Annotation elements.  So for 'HsLet' we have
+-- specialised to the specific set of locations of original exact
+-- print annotation elements.  So for 'HsLet' we have
 --
 --    type instance XLet GhcPs = EpAnn AnnsLet
 --    data AnnsLet
@@ -507,11 +475,12 @@ See Note [XRec and Anno in the AST] for details of how this is done.
 --          } deriving Data
 --
 -- The spacing between the items under the scope of a given EpAnn is
--- derived from the original 'Anchor'.  But there is no requirement
--- that the items included in the sub-element have a "matching"
--- location in their relative anchors. This allows us to freely move
--- elements around, and stitch together new AST fragments out of old
--- ones, and have them still printed out in a reasonable way.
+-- normally derived from the original 'Anchor'.  But if a sub-element
+-- is not in its original position, the required spacing can be
+-- directly captured in the 'anchor_op' field of the 'entry' Anchor.
+-- This allows us to freely move elements around, and stitch together
+-- new AST fragments out of old ones, and have them still printed out
+-- in a precise way.
 data EpAnn ann
   = EpAnn { entry   :: Anchor
            -- ^ Base location for the start of the syntactic element
@@ -528,8 +497,11 @@ data EpAnn ann
 -- | An 'Anchor' records the base location for the start of the
 -- syntactic element holding the annotations, and is used as the point
 -- of reference for calculating delta positions for contained
--- annotations.  If an AST element is moved or deleted, the original
--- location is also tracked, for printing the source without gaps.
+-- annotations.
+-- It is also normally used as the reference point for the spacing of
+-- the element relative to its container. If it is moved, that
+-- relationship is tracked in the 'anchor_op' instead.
+
 data Anchor = Anchor        { anchor :: RealSrcSpan
                                  -- ^ Base location for the start of
                                  -- the syntactic element holding
@@ -557,8 +529,8 @@ realSpanAsAnchor s  = Anchor s UnchangedAnchor
 
 -- | When we are parsing we add comments that belong a particular AST
 -- element, and print them together with the element, interleaving
--- them into the output stream.  But when editin the AST, to move
--- fragments around, it is useful to be able to first separate the
+-- them into the output stream.  But when editing the AST to move
+-- fragments around it is useful to be able to first separate the
 -- comments into those occuring before the AST element and those
 -- following it.  The 'EpaCommentsBalanced' constructor is used to do
 -- this. The GHC parser will only insert the 'EpaComments' form.
@@ -587,14 +559,7 @@ data SrcSpanAnn' a = SrcSpanAnn { ann :: a, locA :: SrcSpan }
 
 -- | We mostly use 'SrcSpanAnn\'' with an 'EpAnn\''
 type SrcAnn ann = SrcSpanAnn' (EpAnn ann)
--- AZ: is SrcAnn the right abbreviation here? Any better suggestions?
-
--- AZ: should we rename LocatedA to LocatedL?  The name comes from
--- this being the most common usage, and hence being the default
--- annotation. It also has a matching set if utility functions such as
--- locA, noLocA, etc.  LocatedL would then need a new name, but it is
--- relatively rare, and captures a list having an openinc and closing
--- adorment, such as parens, braces, etc.
+
 type LocatedA = GenLocated SrcSpanAnnA
 type LocatedN = GenLocated SrcSpanAnnN
 
@@ -617,7 +582,7 @@ type LocatedAn an = GenLocated (SrcAnn an)
 Note [XRec and Anno in the AST]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The API annotations are now captured directly inside the AST, using
+The exact print annotations are captured directly inside the AST, using
 TTG extension points. However certain annotations need to be captured
 on the Located versions too.  While there is a general form for these,
 captured in the type SrcSpanAnn', there are also specific usages in
@@ -683,8 +648,6 @@ data AnnListItem
 -- keywords such as 'where'.
 data AnnList
   = AnnList {
-      -- TODO:AZ: should we distinguish AnnList variants for lists
-      -- with layout and without?
       al_anchor    :: Maybe Anchor, -- ^ start point of a list having layout
       al_open      :: Maybe AddEpAnn,
       al_close     :: Maybe AddEpAnn,
@@ -696,7 +659,7 @@ data AnnList
 -- Annotations for parenthesised elements, such as tuples, lists
 -- ---------------------------------------------------------------------
 
--- | API Annotation for an item having surrounding "brackets", such as
+-- | exact print annotation for an item having surrounding "brackets", such as
 -- tuples or lists
 data AnnParen
   = AnnParen {
@@ -705,7 +668,7 @@ data AnnParen
       ap_close     :: EpaLocation
       } deriving (Data)
 
--- | Detail of the "brackets" used in an 'AnnParen' API Annotation.
+-- | Detail of the "brackets" used in an 'AnnParen' exact print annotation.
 data ParenType
   = AnnParens       -- ^ '(', ')'
   | AnnParensHash   -- ^ '(#', '#)'
@@ -721,7 +684,7 @@ parenTypeKws AnnParensSquare = (AnnOpenS, AnnCloseS)
 
 -- ---------------------------------------------------------------------
 
--- | API Annotation for the 'Context' data type.
+-- | Exact print annotation for the 'Context' data type.
 data AnnContext
   = AnnContext {
       ac_darrow    :: Maybe (IsUnicodeSyntax, EpaLocation),
@@ -735,7 +698,7 @@ data AnnContext
 -- Annotations for names
 -- ---------------------------------------------------------------------
 
--- | API Annotations for a 'RdrName'.  There are many kinds of
+-- | exact print annotations for a 'RdrName'.  There are many kinds of
 -- adornment that can be attached to a given 'RdrName'. This type
 -- captures them, as detailed on the individual constructors.
 data NameAnn
@@ -793,8 +756,8 @@ data NameAdornment
 
 -- ---------------------------------------------------------------------
 
--- | API Annotation used for capturing the locations of annotations in
--- pragmas.
+-- | exact print annotation used for capturing the locations of
+-- annotations in pragmas.
 data AnnPragma
   = AnnPragma {
       apr_open      :: AddEpAnn,
author	Alan Zimmerman <alan.zimm@gmail.com>	2021-04-27 21:04:07 +0100
committer	Alan Zimmerman <alan.zimm@gmail.com>	2021-05-08 11:19:26 +0100
commit	ad5a3aeb1d3094acbc17f07a2cc8388676d59dd1 (patch)
tree	f25cc604e0e4e0a5328e2302ec0a2275a7c83e20
parent	a32eb0f3d5037b0c6fefa38ec19ff8c22076d102 (diff)
download	haskell-ad5a3aeb1d3094acbc17f07a2cc8388676d59dd1.tar.gz