From 8d3ba23f2d9e6c87794d059412a0808c9cbacb25 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Fri, 6 Jul 2012 00:28:32 +0000 Subject: Implement AST classes for comments, a real parser for Doxygen comments and a very simple semantic analysis that just builds the AST; minor changes for lexer to pick up source locations I didn't think about before. Comments AST is modelled along the ideas of HTML AST: block and inline content. * Block content is a paragraph or a command that has a paragraph as an argument or verbatim command. * Inline content is placed within some block. Inline content includes plain text, inline commands and HTML as tag soup. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/AST/ASTContext.h | 17 +- include/clang/AST/CMakeLists.txt | 5 + include/clang/AST/Comment.h | 742 ++++++++++++++++++ include/clang/AST/CommentLexer.h | 221 +++++- include/clang/AST/CommentParser.h | 112 +++ include/clang/AST/CommentSema.h | 121 +++ include/clang/AST/CommentVisitor.h | 63 ++ include/clang/AST/Makefile | 8 +- include/clang/Basic/CommentNodes.td | 26 + lib/ARCMigrate/CMakeLists.txt | 1 + lib/AST/ASTContext.cpp | 40 +- lib/AST/CMakeLists.txt | 5 + lib/AST/Comment.cpp | 90 +++ lib/AST/CommentDumper.cpp | 206 +++++ lib/AST/CommentLexer.cpp | 44 +- lib/AST/CommentParser.cpp | 414 ++++++++++ lib/AST/CommentSema.cpp | 268 +++++++ lib/Analysis/CMakeLists.txt | 1 + lib/CodeGen/CMakeLists.txt | 1 + lib/Frontend/CMakeLists.txt | 1 + lib/Parse/CMakeLists.txt | 1 + lib/Rewrite/CMakeLists.txt | 1 + lib/Sema/CMakeLists.txt | 1 + lib/Serialization/CMakeLists.txt | 1 + lib/StaticAnalyzer/Checkers/CMakeLists.txt | 1 + lib/StaticAnalyzer/Core/CMakeLists.txt | 1 + lib/StaticAnalyzer/Frontend/CMakeLists.txt | 1 + unittests/AST/CommentLexer.cpp | 190 ++++- unittests/AST/CommentParser.cpp | 1126 ++++++++++++++++++++++++++++ utils/TableGen/TableGen.cpp | 6 + 30 files changed, 3670 insertions(+), 45 deletions(-) create mode 100644 include/clang/AST/Comment.h create mode 100644 include/clang/AST/CommentParser.h create mode 100644 include/clang/AST/CommentSema.h create mode 100644 include/clang/AST/CommentVisitor.h create mode 100644 include/clang/Basic/CommentNodes.td create mode 100644 lib/AST/Comment.cpp create mode 100644 lib/AST/CommentDumper.cpp create mode 100644 lib/AST/CommentParser.cpp create mode 100644 lib/AST/CommentSema.cpp create mode 100644 unittests/AST/CommentParser.cpp diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h index a2177273dd..b19d7ad42c 100644 --- a/include/clang/AST/ASTContext.h +++ b/include/clang/AST/ASTContext.h @@ -20,6 +20,7 @@ #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/VersionTuple.h" +#include "clang/AST/Comment.h" #include "clang/AST/Decl.h" #include "clang/AST/LambdaMangleContext.h" #include "clang/AST/NestedNameSpecifier.h" @@ -424,10 +425,14 @@ public: /// \brief True if comments are already loaded from ExternalASTSource. mutable bool CommentsLoaded; - /// \brief Mapping from declarations to their comments (stored within - /// Comments list), once we have already looked up the comment associated - /// with a given declaration. - mutable llvm::DenseMap DeclComments; + typedef std::pair + RawAndParsedComment; + + /// \brief Mapping from declarations to their comments. + /// + /// Raw comments are owned by Comments list. This mapping is populated + /// lazily. + mutable llvm::DenseMap DeclComments; /// \brief Return the documentation comment attached to a given declaration, /// without looking into cache. @@ -442,6 +447,10 @@ public: /// Returns NULL if no comment is attached. const RawComment *getRawCommentForDecl(const Decl *D) const; + /// Return parsed documentation comment attached to a given declaration. + /// Returns NULL if no comment is attached. + comments::FullComment *getCommentForDecl(const Decl *D) const; + /// \brief Retrieve the attributes for the given declaration. AttrVec& getDeclAttrs(const Decl *D); diff --git a/include/clang/AST/CMakeLists.txt b/include/clang/AST/CMakeLists.txt index c10cda84fb..d7458aa790 100644 --- a/include/clang/AST/CMakeLists.txt +++ b/include/clang/AST/CMakeLists.txt @@ -15,3 +15,8 @@ clang_tablegen(StmtNodes.inc -gen-clang-stmt-nodes clang_tablegen(DeclNodes.inc -gen-clang-decl-nodes SOURCE ../Basic/DeclNodes.td TARGET ClangDeclNodes) + +clang_tablegen(CommentNodes.inc -gen-clang-comment-nodes + SOURCE ../Basic/CommentNodes.td + TARGET ClangCommentNodes) + diff --git a/include/clang/AST/Comment.h b/include/clang/AST/Comment.h new file mode 100644 index 0000000000..3df8fb1338 --- /dev/null +++ b/include/clang/AST/Comment.h @@ -0,0 +1,742 @@ +//===--- Comment.h - Comment AST nodes --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines comment AST nodes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_COMMENT_H +#define LLVM_CLANG_AST_COMMENT_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace comments { + +/// Any part of the comment. +/// Abstract class. +class Comment { +protected: + /// Preferred location to show caret. + SourceLocation Loc; + + /// Source range of this AST node. + SourceRange Range; + + class CommentBitfields { + friend class Comment; + + /// Type of this AST node. + unsigned Kind : 8; + }; + enum { NumCommentBits = 8 }; + + class InlineContentCommentBitfields { + friend class InlineContentComment; + + unsigned : NumCommentBits; + + /// True if there is a newline after this inline content node. + /// (There is no separate AST node for a newline.) + unsigned HasTrailingNewline : 1; + }; + enum { NumInlineContentCommentBitfields = 9 }; + + class ParamCommandCommentBitfields { + friend class ParamCommandComment; + + unsigned : NumCommentBits; + + /// Parameter passing direction, see ParamCommandComment::PassDirection. + unsigned Direction : 2; + + /// True if direction was specified explicitly in the comment. + unsigned IsDirectionExplicit : 1; + }; + enum { NumParamCommandCommentBitfields = 11 }; + + union { + CommentBitfields CommentBits; + InlineContentCommentBitfields InlineContentCommentBits; + ParamCommandCommentBitfields ParamCommandCommentBits; + }; + + void setSourceRange(SourceRange SR) { + Range = SR; + } + + void setLocation(SourceLocation L) { + Loc = L; + } + +public: + enum CommentKind { + NoCommentKind = 0, +#define COMMENT(CLASS, PARENT) CLASS##Kind, +#define COMMENT_RANGE(BASE, FIRST, LAST) \ + First##BASE##Constant=FIRST##Kind, Last##BASE##Constant=LAST##Kind, +#define LAST_COMMENT_RANGE(BASE, FIRST, LAST) \ + First##BASE##Constant=FIRST##Kind, Last##BASE##Constant=LAST##Kind +#define ABSTRACT_COMMENT(COMMENT) +#include "clang/AST/CommentNodes.inc" + }; + + Comment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd) : + Loc(LocBegin), Range(SourceRange(LocBegin, LocEnd)) { + CommentBits.Kind = K; + } + + CommentKind getCommentKind() const { + return static_cast(CommentBits.Kind); + } + + const char *getCommentKindName() const; + + LLVM_ATTRIBUTE_USED void dump() const; + LLVM_ATTRIBUTE_USED void dump(SourceManager &SM) const; + + static bool classof(const Comment *) { return true; } + + typedef Comment * const *child_iterator; + + SourceRange getSourceRange() const LLVM_READONLY { return Range; } + + SourceLocation getLocStart() const LLVM_READONLY { + return Range.getBegin(); + } + + SourceLocation getLocEnd() const LLVM_READONLY { + return Range.getEnd(); + } + + SourceLocation getLocation() const LLVM_READONLY { return Loc; } + + child_iterator child_begin() const; + child_iterator child_end() const; + + unsigned child_count() const { + return child_end() - child_begin(); + } +}; + +/// Inline content (contained within a block). +/// Abstract class. +class InlineContentComment : public Comment { +protected: + InlineContentComment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd) : + Comment(K, LocBegin, LocEnd) { + InlineContentCommentBits.HasTrailingNewline = 0; + } + +public: + static bool classof(const Comment *C) { + return C->getCommentKind() >= FirstInlineContentCommentConstant && + C->getCommentKind() <= LastInlineContentCommentConstant; + } + + static bool classof(const InlineContentComment *) { return true; } + + void addTrailingNewline() { + InlineContentCommentBits.HasTrailingNewline = 1; + } + + bool hasTrailingNewline() const { + return InlineContentCommentBits.HasTrailingNewline; + } +}; + +/// Plain text. +class TextComment : public InlineContentComment { + StringRef Text; + +public: + TextComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Text) : + InlineContentComment(TextCommentKind, LocBegin, LocEnd), + Text(Text) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == TextCommentKind; + } + + static bool classof(const TextComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + StringRef getText() const LLVM_READONLY { return Text; } +}; + +/// A command with word-like arguments that is considered inline content. +class InlineCommandComment : public InlineContentComment { +public: + struct Argument { + SourceRange Range; + StringRef Text; + + Argument(SourceRange Range, StringRef Text) : Range(Range), Text(Text) { } + }; + +protected: + /// Command name. + StringRef Name; + + /// Command arguments. + llvm::ArrayRef Args; + +public: + InlineCommandComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name, + llvm::ArrayRef Args) : + InlineContentComment(InlineCommandCommentKind, LocBegin, LocEnd), + Name(Name), Args(Args) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == InlineCommandCommentKind; + } + + static bool classof(const InlineCommandComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + StringRef getCommandName() const { + return Name; + } + + SourceRange getCommandNameRange() const { + return SourceRange(getLocStart().getLocWithOffset(-1), + getLocEnd()); + } + + unsigned getArgCount() const { + return Args.size(); + } + + StringRef getArgText(unsigned Idx) const { + return Args[Idx].Text; + } + + SourceRange getArgRange(unsigned Idx) const { + return Args[Idx].Range; + } +}; + +/// Abstract class for opening and closing HTML tags. HTML tags are always +/// treated as inline content (regardless HTML semantics); opening and closing +/// tags are not matched. +class HTMLTagComment : public InlineContentComment { +protected: + StringRef TagName; + SourceRange TagNameRange; + + HTMLTagComment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef TagName, + SourceLocation TagNameBegin, + SourceLocation TagNameEnd) : + InlineContentComment(K, LocBegin, LocEnd), + TagName(TagName), + TagNameRange(TagNameBegin, TagNameEnd) { + setLocation(TagNameBegin); + } + +public: + static bool classof(const Comment *C) { + return C->getCommentKind() >= FirstHTMLTagCommentConstant && + C->getCommentKind() <= LastHTMLTagCommentConstant; + } + + static bool classof(const HTMLTagComment *) { return true; } + + StringRef getTagName() const LLVM_READONLY { return TagName; } + + SourceRange getTagNameSourceRange() const LLVM_READONLY { + SourceLocation L = getLocation(); + return SourceRange(L.getLocWithOffset(1), + L.getLocWithOffset(1 + TagName.size())); + } +}; + +/// An opening HTML tag with attributes. +class HTMLOpenTagComment : public HTMLTagComment { +public: + class Attribute { + public: + SourceLocation NameLocBegin; + StringRef Name; + + SourceLocation EqualsLoc; + + SourceRange ValueRange; + StringRef Value; + + Attribute() { } + + Attribute(SourceLocation NameLocBegin, StringRef Name) : + NameLocBegin(NameLocBegin), Name(Name), + EqualsLoc(SourceLocation()), + ValueRange(SourceRange()), Value(StringRef()) + { } + + Attribute(SourceLocation NameLocBegin, StringRef Name, + SourceLocation EqualsLoc, + SourceRange ValueRange, StringRef Value) : + NameLocBegin(NameLocBegin), Name(Name), + EqualsLoc(EqualsLoc), + ValueRange(ValueRange), Value(Value) + { } + + SourceLocation getNameLocEnd() const { + return NameLocBegin.getLocWithOffset(Name.size()); + } + + SourceRange getNameRange() const { + return SourceRange(NameLocBegin, getNameLocEnd()); + } + }; + +private: + ArrayRef Attributes; + +public: + HTMLOpenTagComment(SourceLocation LocBegin, + StringRef TagName) : + HTMLTagComment(HTMLOpenTagCommentKind, + LocBegin, LocBegin.getLocWithOffset(1 + TagName.size()), + TagName, + LocBegin.getLocWithOffset(1), + LocBegin.getLocWithOffset(1 + TagName.size())) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == HTMLOpenTagCommentKind; + } + + static bool classof(const HTMLOpenTagComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + unsigned getAttrCount() const { + return Attributes.size(); + } + + const Attribute &getAttr(unsigned Idx) const { + return Attributes[Idx]; + } + + void setAttrs(ArrayRef Attrs) { + Attributes = Attrs; + if (!Attrs.empty()) { + const Attribute &Attr = Attrs.back(); + SourceLocation L = Attr.ValueRange.getEnd(); + if (L.isValid()) + Range.setEnd(L); + else { + Range.setEnd(Attr.getNameLocEnd()); + } + } + } + + void setGreaterLoc(SourceLocation GreaterLoc) { + Range.setEnd(GreaterLoc); + } +}; + +/// A closing HTML tag. +class HTMLCloseTagComment : public HTMLTagComment { +public: + HTMLCloseTagComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef TagName) : + HTMLTagComment(HTMLCloseTagCommentKind, + LocBegin, LocEnd, + TagName, + LocBegin.getLocWithOffset(2), + LocBegin.getLocWithOffset(2 + TagName.size())) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == HTMLCloseTagCommentKind; + } + + static bool classof(const HTMLCloseTagComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } +}; + +/// Block content (contains inline content). +/// Abstract class. +class BlockContentComment : public Comment { +protected: + BlockContentComment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd) : + Comment(K, LocBegin, LocEnd) + { } + +public: + static bool classof(const Comment *C) { + return C->getCommentKind() >= FirstBlockContentCommentConstant && + C->getCommentKind() <= LastBlockContentCommentConstant; + } + + static bool classof(const BlockContentComment *) { return true; } +}; + +/// A single paragraph that contains inline content. +class ParagraphComment : public BlockContentComment { + llvm::ArrayRef Content; + +public: + ParagraphComment(llvm::ArrayRef Content) : + BlockContentComment(ParagraphCommentKind, + SourceLocation(), + SourceLocation()), + Content(Content) { + if (Content.empty()) + return; + + setSourceRange(SourceRange(Content.front()->getLocStart(), + Content.back()->getLocEnd())); + setLocation(Content.front()->getLocStart()); + } + + static bool classof(const Comment *C) { + return C->getCommentKind() == ParagraphCommentKind; + } + + static bool classof(const ParagraphComment *) { return true; } + + child_iterator child_begin() const { + return reinterpret_cast(Content.begin()); + } + + child_iterator child_end() const { + return reinterpret_cast(Content.end()); + } +}; + +/// A command that has zero or more word-like arguments (number of word-like +/// arguments depends on command name) and a paragraph as an argument +/// (e. g., \\brief). +class BlockCommandComment : public BlockContentComment { +public: + struct Argument { + SourceRange Range; + StringRef Text; + + Argument() { } + Argument(SourceRange Range, StringRef Text) : Range(Range), Text(Text) { } + }; + +protected: + /// Command name. + StringRef Name; + + /// Word-like arguments. + llvm::ArrayRef Args; + + /// Paragraph argument. + ParagraphComment *Paragraph; + + BlockCommandComment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) : + BlockContentComment(K, LocBegin, LocEnd), + Name(Name), + Paragraph(NULL) { + setLocation(getCommandNameRange().getBegin()); + } + +public: + BlockCommandComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) : + BlockContentComment(BlockCommandCommentKind, LocBegin, LocEnd), + Name(Name), + Paragraph(NULL) { + setLocation(getCommandNameRange().getBegin()); + } + + static bool classof(const Comment *C) { + return C->getCommentKind() == BlockCommandCommentKind; + } + + static bool classof(const BlockCommandComment *) { return true; } + + child_iterator child_begin() const { + return reinterpret_cast(&Paragraph); + } + + child_iterator child_end() const { + return reinterpret_cast(&Paragraph + 1); + } + + StringRef getCommandName() const { + return Name; + } + + SourceRange getCommandNameRange() const { + return SourceRange(getLocStart().getLocWithOffset(1), + getLocStart().getLocWithOffset(1 + Name.size())); + } + + unsigned getArgCount() const { + return Args.size(); + } + + StringRef getArgText(unsigned Idx) const { + return Args[Idx].Text; + } + + SourceRange getArgRange(unsigned Idx) const { + return Args[Idx].Range; + } + + void setArgs(llvm::ArrayRef A) { + Args = A; + } + + ParagraphComment *getParagraph() const LLVM_READONLY { + return Paragraph; + } + + void setParagraph(ParagraphComment *PC) { + Paragraph = PC; + SourceLocation NewLocEnd = PC->getLocEnd(); + if (NewLocEnd.isValid()) + setSourceRange(SourceRange(getLocStart(), NewLocEnd)); + } +}; + +/// Doxygen \\param command. +class ParamCommandComment : public BlockCommandComment { +public: + enum PassDirection { + In, + Out, + InOut + }; + +public: + ParamCommandComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) : + BlockCommandComment(ParamCommandCommentKind, LocBegin, LocEnd, Name) { + ParamCommandCommentBits.Direction = In; + ParamCommandCommentBits.IsDirectionExplicit = false; + } + + static bool classof(const Comment *C) { + return C->getCommentKind() == ParamCommandCommentKind; + } + + static bool classof(const ParamCommandComment *) { return true; } + + PassDirection getDirection() const LLVM_READONLY { + return static_cast(ParamCommandCommentBits.Direction); + } + + bool isDirectionExplicit() const LLVM_READONLY { + return ParamCommandCommentBits.IsDirectionExplicit; + } + + void setDirection(PassDirection Direction, bool Explicit) { + ParamCommandCommentBits.Direction = Direction; + ParamCommandCommentBits.IsDirectionExplicit = Explicit; + } + + bool hasParamName() const { + return getArgCount() > 0; + } + + StringRef getParamName() const { + return Args[0].Text; + } + + SourceRange getParamNameRange() const { + return Args[0].Range; + } +}; + +/// A line of text contained in a verbatim block. +class VerbatimBlockLineComment : public Comment { + StringRef Text; + +public: + VerbatimBlockLineComment(SourceLocation LocBegin, + StringRef Text) : + Comment(VerbatimBlockLineCommentKind, + LocBegin, + LocBegin.getLocWithOffset(Text.size())), + Text(Text) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == VerbatimBlockLineCommentKind; + } + + static bool classof(const VerbatimBlockLineComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + StringRef getText() const LLVM_READONLY { + return Text; + } +}; + +/// Verbatim block (e. g., preformatted code). Verbatim block has an opening +/// and a closing command and contains multiple lines of text +/// (VerbatimBlockLineComment nodes). +class VerbatimBlockComment : public BlockCommandComment { +protected: + StringRef CloseName; + SourceLocation CloseNameLocBegin; + llvm::ArrayRef Lines; + +public: + VerbatimBlockComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) : + BlockCommandComment(VerbatimBlockCommentKind, + LocBegin, LocEnd, Name) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == VerbatimBlockCommentKind; + } + + static bool classof(const VerbatimBlockComment *) { return true; } + + child_iterator child_begin() const { + return reinterpret_cast(Lines.begin()); + } + + child_iterator child_end() const { + return reinterpret_cast(Lines.end()); + } + + void setCloseName(StringRef Name, SourceLocation LocBegin) { + CloseName = Name; + CloseNameLocBegin = LocBegin; + } + + void setLines(llvm::ArrayRef L) { + Lines = L; + } + + StringRef getCloseName() const { + return CloseName; + } + + unsigned getLineCount() const { + return Lines.size(); + } + + StringRef getText(unsigned LineIdx) const { + return Lines[LineIdx]->getText(); + } +}; + +/// Verbatim line. Verbatim line has an opening command and a single line of +/// text (up to the newline after the opening command). +class VerbatimLineComment : public BlockCommandComment { +protected: + StringRef Text; + SourceLocation TextBegin; + +public: + VerbatimLineComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name, + SourceLocation TextBegin, + StringRef Text) : + BlockCommandComment(VerbatimLineCommentKind, + LocBegin, LocEnd, + Name), + Text(Text), + TextBegin(TextBegin) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == VerbatimLineCommentKind; + } + + static bool classof(const VerbatimLineComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + StringRef getText() const { + return Text; + } + + SourceRange getTextRange() const { + return SourceRange(TextBegin, getLocEnd()); + } +}; + +/// A full comment attached to a declaration, contains block content. +class FullComment : public Comment { + llvm::ArrayRef Blocks; + +public: + FullComment(llvm::ArrayRef Blocks) : + Comment(FullCommentKind, SourceLocation(), SourceLocation()), + Blocks(Blocks) { + if (Blocks.empty()) + return; + + setSourceRange(SourceRange(Blocks.front()->getLocStart(), + Blocks.back()->getLocEnd())); + setLocation(Blocks.front()->getLocStart()); + } + + static bool classof(const Comment *C) { + return C->getCommentKind() == FullCommentKind; + } + + static bool classof(const FullComment *) { return true; } + + child_iterator child_begin() const { + return reinterpret_cast(Blocks.begin()); + } + + child_iterator child_end() const { + return reinterpret_cast(Blocks.end()); + } +}; + +} // end namespace comments +} // end namespace clang + +#endif + diff --git a/include/clang/AST/CommentLexer.h b/include/clang/AST/CommentLexer.h index 226bc73351..f8dfd278ee 100644 --- a/include/clang/AST/CommentLexer.h +++ b/include/clang/AST/CommentLexer.h @@ -18,12 +18,14 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" namespace clang { namespace comments { class Lexer; +class TextTokenRetokenizer; namespace tok { enum TokenKind { @@ -41,7 +43,7 @@ enum TokenKind { html_equals, // = html_quoted_string, // "blah\"blah" or 'blah\'blah' html_greater, // > - html_tag_close // + html_tag_close // '. + LS_HTMLCloseTag }; /// Current lexing mode. @@ -319,6 +331,8 @@ private: void lexHTMLOpenTag(Token &T); + void setupAndLexHTMLCloseTag(Token &T); + void lexHTMLCloseTag(Token &T); public: @@ -338,6 +352,209 @@ public: void addVerbatimLineCommand(StringRef Name); }; +/// Re-lexes a sequence of tok::text tokens. +class TextTokenRetokenizer { + llvm::BumpPtrAllocator &Allocator; + static const unsigned MaxTokens = 16; + SmallVector Toks; + + struct Position { + unsigned CurToken; + const char *BufferStart; + const char *BufferEnd; + const char *BufferPtr; + SourceLocation BufferStartLoc; + }; + + /// Current position in Toks. + Position Pos; + + bool isEnd() const { + return Pos.CurToken >= Toks.size(); + } + + /// Sets up the buffer pointers to point to current token. + void setupBuffer() { + assert(Pos.CurToken < Toks.size()); + const Token &Tok = Toks[Pos.CurToken]; + + Pos.BufferStart = Tok.getText().begin(); + Pos.BufferEnd = Tok.getText().end(); + Pos.BufferPtr = Pos.BufferStart; + Pos.BufferStartLoc = Tok.getLocation(); + } + + SourceLocation getSourceLocation() const { + const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; + return Pos.BufferStartLoc.getLocWithOffset(CharNo); + } + + char peek() const { + assert(!isEnd()); + assert(Pos.BufferPtr != Pos.BufferEnd); + return *Pos.BufferPtr; + } + + void consumeChar() { + assert(!isEnd()); + assert(Pos.BufferPtr != Pos.BufferEnd); + Pos.BufferPtr++; + if (Pos.BufferPtr == Pos.BufferEnd) { + Pos.CurToken++; + if (Pos.CurToken < Toks.size()) + setupBuffer(); + } + } + + static bool isWhitespace(char C) { + return C == ' ' || C == '\n' || C == '\r' || + C == '\t' || C == '\f' || C == '\v'; + } + + void consumeWhitespace() { + while (!isEnd()) { + if (isWhitespace(peek())) + consumeChar(); + else + break; + } + } + + void formTokenWithChars(Token &Result, + SourceLocation Loc, + const char *TokBegin, + unsigned TokLength, + StringRef Text) { + Result.setLocation(Loc); + Result.setKind(tok::text); + Result.setLength(TokLength); +#ifndef NDEBUG + Result.TextPtr1 = ""; + Result.TextLen1 = 7; +#endif + Result.setText(Text); + } + +public: + TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator): + Allocator(Allocator) { + Pos.CurToken = 0; + } + + /// Add a token. + /// Returns true on success, false if it seems like we have enough tokens. + bool addToken(const Token &Tok) { + assert(Tok.is(tok::text)); + if (Toks.size() >= MaxTokens) + return false; + + Toks.push_back(Tok); + if (Toks.size() == 1) + setupBuffer(); + return true; + } + + /// Extract a word -- sequence of non-whitespace characters. + bool lexWord(Token &Tok) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + while (!isEnd()) { + const char C = peek(); + if (!isWhitespace(C)) { + WordText.push_back(C); + consumeChar(); + } else + break; + } + const unsigned Length = WordText.size(); + if (Length == 0) { + Pos = SavedPos; + return false; + } + + char *TextPtr = new (Allocator) char[Length + 1]; + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, + Pos.BufferPtr - WordBegin, Text); + return true; + } + + bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + bool Error = false; + if (!isEnd()) { + const char C = peek(); + if (C == OpenDelim) { + WordText.push_back(C); + consumeChar(); + } else + Error = true; + } + char C; + while (!Error && !isEnd()) { + C = peek(); + WordText.push_back(C); + consumeChar(); + if (C == CloseDelim) + break; + } + if (!Error && C != CloseDelim) + Error = true; + + if (Error) { + Pos = SavedPos; + return false; + } + + const unsigned Length = WordText.size(); + char *TextPtr = new (Allocator) char[Length + 1]; + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, + Pos.BufferPtr - WordBegin, Text); + return true; + } + + /// Return a text token. Useful to take tokens back. + bool lexText(Token &Tok) { + if (isEnd()) + return false; + + if (Pos.BufferPtr != Pos.BufferStart) + formTokenWithChars(Tok, getSourceLocation(), + Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, + StringRef(Pos.BufferPtr, + Pos.BufferEnd - Pos.BufferPtr)); + else + Tok = Toks[Pos.CurToken]; + + Pos.CurToken++; + if (Pos.CurToken < Toks.size()) + setupBuffer(); + return true; + } +}; + } // end namespace comments } // end namespace clang diff --git a/include/clang/AST/CommentParser.h b/include/clang/AST/CommentParser.h new file mode 100644 index 0000000000..53c58662bf --- /dev/null +++ b/include/clang/AST/CommentParser.h @@ -0,0 +1,112 @@ +//===--- CommentParser.h - Doxygen comment parser ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Doxygen comment parser. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_COMMENT_PARSER_H +#define LLVM_CLANG_AST_COMMENT_PARSER_H + +#include "clang/AST/CommentLexer.h" +#include "clang/AST/Comment.h" +#include "clang/AST/CommentSema.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace comments { + +/// Doxygen comment parser. +class Parser { + Lexer &L; + + Sema &S; + + llvm::BumpPtrAllocator &Allocator; + + template + ArrayRef copyArray(ArrayRef Source) { + size_t Size = Source.size(); + if (Size != 0) { + T *Mem = new (Allocator) T[Size]; + std::copy(Source.begin(), Source.end(), Mem); + return llvm::makeArrayRef(Mem, Size); + } else + return llvm::makeArrayRef(static_cast(NULL), 0); + } + + /// Current lookahead token. We can safely assume that all tokens are from + /// a single source file. + Token Tok; + + /// A stack of additional lookahead tokens. + SmallVector MoreLATokens; + + SourceLocation consumeToken() { + SourceLocation Loc = Tok.getLocation(); + if (MoreLATokens.empty()) + L.lex(Tok); + else { + Tok = MoreLATokens.back(); + MoreLATokens.pop_back(); + } + return Loc; + } + + void putBack(const Token &OldTok) { + MoreLATokens.push_back(Tok); + Tok = OldTok; + } + + void putBack(ArrayRef Toks) { + if (Toks.empty()) + return; + + MoreLATokens.push_back(Tok); + for (const Token *I = &Toks.back(), + *B = &Toks.front() + 1; + I != B; --I) { + MoreLATokens.push_back(*I); + } + + Tok = Toks[0]; + } + +public: + Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator); + + /// Parse arguments for \\param command. + ParamCommandComment *parseParamCommandArgs( + ParamCommandComment *PC, + TextTokenRetokenizer &Retokenizer); + + BlockCommandComment *parseBlockCommandArgs( + BlockCommandComment *BC, + TextTokenRetokenizer &Retokenizer, + unsigned NumArgs); + + BlockCommandComment *parseBlockCommand(); + InlineCommandComment *parseInlineCommand(); + + HTMLOpenTagComment *parseHTMLOpenTag(); + HTMLCloseTagComment *parseHTMLCloseTag(); + + BlockContentComment *parseParagraphOrBlockCommand(); + + VerbatimBlockComment *parseVerbatimBlock(); + VerbatimLineComment *parseVerbatimLine(); + BlockContentComment *parseBlockContent(); + FullComment *parseFullComment(); +}; + +} // end namespace comments +} // end namespace clang + +#endif + diff --git a/include/clang/AST/CommentSema.h b/include/clang/AST/CommentSema.h new file mode 100644 index 0000000000..4d853eb086 --- /dev/null +++ b/include/clang/AST/CommentSema.h @@ -0,0 +1,121 @@ +//===--- CommentSema.h - Doxygen comment semantic analysis ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the semantic analysis class for Doxygen comments. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_COMMENT_SEMA_H +#define LLVM_CLANG_AST_COMMENT_SEMA_H + +#include "clang/Basic/SourceLocation.h" +#include "clang/AST/Comment.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace comments { + +class Sema { + llvm::BumpPtrAllocator &Allocator; + +public: + Sema(llvm::BumpPtrAllocator &Allocator); + + ParagraphComment *actOnParagraphComment( + ArrayRef Content); + + BlockCommandComment *actOnBlockCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name); + + BlockCommandComment *actOnBlockCommandArgs( + BlockCommandComment *Command, + ArrayRef Args); + + BlockCommandComment *actOnBlockCommandFinish(BlockCommandComment *Command, + ParagraphComment *Paragraph); + + ParamCommandComment *actOnParamCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name); + + ParamCommandComment *actOnParamCommandArg(ParamCommandComment *Command, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg, + bool IsDirection); + + ParamCommandComment *actOnParamCommandFinish(ParamCommandComment *Command, + ParagraphComment *Paragraph); + + InlineCommandComment *actOnInlineCommand(SourceLocation CommandLocBegin, + SourceLocation CommandLocEnd, + StringRef CommandName); + + InlineCommandComment *actOnInlineCommand(SourceLocation CommandLocBegin, + SourceLocation CommandLocEnd, + StringRef CommandName, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg); + + InlineContentComment *actOnUnknownCommand(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name); + + TextComment *actOnText(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Text); + + VerbatimBlockComment *actOnVerbatimBlockStart(SourceLocation Loc, + StringRef Name); + + VerbatimBlockLineComment *actOnVerbatimBlockLine(SourceLocation Loc, + StringRef Text); + + VerbatimBlockComment *actOnVerbatimBlockFinish( + VerbatimBlockComment *Block, + SourceLocation CloseNameLocBegin, + StringRef CloseName, + ArrayRef Lines); + + VerbatimLineComment *actOnVerbatimLine(SourceLocation LocBegin, + StringRef Name, + SourceLocation TextBegin, + StringRef Text); + + HTMLOpenTagComment *actOnHTMLOpenTagStart(SourceLocation LocBegin, + StringRef TagName); + + HTMLOpenTagComment *actOnHTMLOpenTagFinish( + HTMLOpenTagComment *Tag, + ArrayRef Attrs, + SourceLocation GreaterLoc); + + HTMLCloseTagComment *actOnHTMLCloseTag(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef TagName); + + FullComment *actOnFullComment(ArrayRef Blocks); + + bool isBlockCommand(StringRef Name); + bool isParamCommand(StringRef Name); + unsigned getBlockCommandNumArgs(StringRef Name); + + bool isInlineCommand(StringRef Name); + bool HTMLOpenTagNeedsClosing(StringRef Name); +}; + +} // end namespace comments +} // end namespace clang + +#endif + diff --git a/include/clang/AST/CommentVisitor.h b/include/clang/AST/CommentVisitor.h new file mode 100644 index 0000000000..159725af26 --- /dev/null +++ b/include/clang/AST/CommentVisitor.h @@ -0,0 +1,63 @@ +//===--- CommentVisitor.h - Visitor for Comment subclasses ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Comment.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace comments { + +template struct make_ptr { typedef T *type; }; +template struct make_const_ptr { typedef const T *type; }; + +template