diff options
author | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-06 00:28:32 +0000 |
---|---|---|
committer | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-06 00:28:32 +0000 |
commit | 8d3ba23f2d9e6c87794d059412a0808c9cbacb25 (patch) | |
tree | c72c618faeffa1c098c4df33857bd12a72c62fb1 | |
parent | 1838703fea568b394407b83d1055b4c7f52fb105 (diff) | |
download | clang-8d3ba23f2d9e6c87794d059412a0808c9cbacb25.tar.gz |
Implement AST classes for comments, a real parser for Doxygen comments and a
very simple semantic analysis that just builds the AST; minor changes for lexer
to pick up source locations I didn't think about before.
Comments AST is modelled along the ideas of HTML AST: block and inline content.
* Block content is a paragraph or a command that has a paragraph as an argument
or verbatim command.
* Inline content is placed within some block. Inline content includes plain
text, inline commands and HTML as tag soup.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8
30 files changed, 3670 insertions, 45 deletions
diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h index a2177273dd..b19d7ad42c 100644 --- a/include/clang/AST/ASTContext.h +++ b/include/clang/AST/ASTContext.h @@ -20,6 +20,7 @@ #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/VersionTuple.h" +#include "clang/AST/Comment.h" #include "clang/AST/Decl.h" #include "clang/AST/LambdaMangleContext.h" #include "clang/AST/NestedNameSpecifier.h" @@ -424,10 +425,14 @@ public: /// \brief True if comments are already loaded from ExternalASTSource. mutable bool CommentsLoaded; - /// \brief Mapping from declarations to their comments (stored within - /// Comments list), once we have already looked up the comment associated - /// with a given declaration. - mutable llvm::DenseMap<const Decl *, const RawComment *> DeclComments; + typedef std::pair<const RawComment *, comments::FullComment *> + RawAndParsedComment; + + /// \brief Mapping from declarations to their comments. + /// + /// Raw comments are owned by Comments list. This mapping is populated + /// lazily. + mutable llvm::DenseMap<const Decl *, RawAndParsedComment> DeclComments; /// \brief Return the documentation comment attached to a given declaration, /// without looking into cache. @@ -442,6 +447,10 @@ public: /// Returns NULL if no comment is attached. const RawComment *getRawCommentForDecl(const Decl *D) const; + /// Return parsed documentation comment attached to a given declaration. + /// Returns NULL if no comment is attached. + comments::FullComment *getCommentForDecl(const Decl *D) const; + /// \brief Retrieve the attributes for the given declaration. AttrVec& getDeclAttrs(const Decl *D); diff --git a/include/clang/AST/CMakeLists.txt b/include/clang/AST/CMakeLists.txt index c10cda84fb..d7458aa790 100644 --- a/include/clang/AST/CMakeLists.txt +++ b/include/clang/AST/CMakeLists.txt @@ -15,3 +15,8 @@ clang_tablegen(StmtNodes.inc -gen-clang-stmt-nodes clang_tablegen(DeclNodes.inc -gen-clang-decl-nodes SOURCE ../Basic/DeclNodes.td TARGET ClangDeclNodes) + +clang_tablegen(CommentNodes.inc -gen-clang-comment-nodes + SOURCE ../Basic/CommentNodes.td + TARGET ClangCommentNodes) + diff --git a/include/clang/AST/Comment.h b/include/clang/AST/Comment.h new file mode 100644 index 0000000000..3df8fb1338 --- /dev/null +++ b/include/clang/AST/Comment.h @@ -0,0 +1,742 @@ +//===--- Comment.h - Comment AST nodes --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines comment AST nodes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_COMMENT_H +#define LLVM_CLANG_AST_COMMENT_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace comments { + +/// Any part of the comment. +/// Abstract class. +class Comment { +protected: + /// Preferred location to show caret. + SourceLocation Loc; + + /// Source range of this AST node. + SourceRange Range; + + class CommentBitfields { + friend class Comment; + + /// Type of this AST node. + unsigned Kind : 8; + }; + enum { NumCommentBits = 8 }; + + class InlineContentCommentBitfields { + friend class InlineContentComment; + + unsigned : NumCommentBits; + + /// True if there is a newline after this inline content node. + /// (There is no separate AST node for a newline.) + unsigned HasTrailingNewline : 1; + }; + enum { NumInlineContentCommentBitfields = 9 }; + + class ParamCommandCommentBitfields { + friend class ParamCommandComment; + + unsigned : NumCommentBits; + + /// Parameter passing direction, see ParamCommandComment::PassDirection. + unsigned Direction : 2; + + /// True if direction was specified explicitly in the comment. + unsigned IsDirectionExplicit : 1; + }; + enum { NumParamCommandCommentBitfields = 11 }; + + union { + CommentBitfields CommentBits; + InlineContentCommentBitfields InlineContentCommentBits; + ParamCommandCommentBitfields ParamCommandCommentBits; + }; + + void setSourceRange(SourceRange SR) { + Range = SR; + } + + void setLocation(SourceLocation L) { + Loc = L; + } + +public: + enum CommentKind { + NoCommentKind = 0, +#define COMMENT(CLASS, PARENT) CLASS##Kind, +#define COMMENT_RANGE(BASE, FIRST, LAST) \ + First##BASE##Constant=FIRST##Kind, Last##BASE##Constant=LAST##Kind, +#define LAST_COMMENT_RANGE(BASE, FIRST, LAST) \ + First##BASE##Constant=FIRST##Kind, Last##BASE##Constant=LAST##Kind +#define ABSTRACT_COMMENT(COMMENT) +#include "clang/AST/CommentNodes.inc" + }; + + Comment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd) : + Loc(LocBegin), Range(SourceRange(LocBegin, LocEnd)) { + CommentBits.Kind = K; + } + + CommentKind getCommentKind() const { + return static_cast<CommentKind>(CommentBits.Kind); + } + + const char *getCommentKindName() const; + + LLVM_ATTRIBUTE_USED void dump() const; + LLVM_ATTRIBUTE_USED void dump(SourceManager &SM) const; + + static bool classof(const Comment *) { return true; } + + typedef Comment * const *child_iterator; + + SourceRange getSourceRange() const LLVM_READONLY { return Range; } + + SourceLocation getLocStart() const LLVM_READONLY { + return Range.getBegin(); + } + + SourceLocation getLocEnd() const LLVM_READONLY { + return Range.getEnd(); + } + + SourceLocation getLocation() const LLVM_READONLY { return Loc; } + + child_iterator child_begin() const; + child_iterator child_end() const; + + unsigned child_count() const { + return child_end() - child_begin(); + } +}; + +/// Inline content (contained within a block). +/// Abstract class. +class InlineContentComment : public Comment { +protected: + InlineContentComment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd) : + Comment(K, LocBegin, LocEnd) { + InlineContentCommentBits.HasTrailingNewline = 0; + } + +public: + static bool classof(const Comment *C) { + return C->getCommentKind() >= FirstInlineContentCommentConstant && + C->getCommentKind() <= LastInlineContentCommentConstant; + } + + static bool classof(const InlineContentComment *) { return true; } + + void addTrailingNewline() { + InlineContentCommentBits.HasTrailingNewline = 1; + } + + bool hasTrailingNewline() const { + return InlineContentCommentBits.HasTrailingNewline; + } +}; + +/// Plain text. +class TextComment : public InlineContentComment { + StringRef Text; + +public: + TextComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Text) : + InlineContentComment(TextCommentKind, LocBegin, LocEnd), + Text(Text) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == TextCommentKind; + } + + static bool classof(const TextComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + StringRef getText() const LLVM_READONLY { return Text; } +}; + +/// A command with word-like arguments that is considered inline content. +class InlineCommandComment : public InlineContentComment { +public: + struct Argument { + SourceRange Range; + StringRef Text; + + Argument(SourceRange Range, StringRef Text) : Range(Range), Text(Text) { } + }; + +protected: + /// Command name. + StringRef Name; + + /// Command arguments. + llvm::ArrayRef<Argument> Args; + +public: + InlineCommandComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name, + llvm::ArrayRef<Argument> Args) : + InlineContentComment(InlineCommandCommentKind, LocBegin, LocEnd), + Name(Name), Args(Args) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == InlineCommandCommentKind; + } + + static bool classof(const InlineCommandComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + StringRef getCommandName() const { + return Name; + } + + SourceRange getCommandNameRange() const { + return SourceRange(getLocStart().getLocWithOffset(-1), + getLocEnd()); + } + + unsigned getArgCount() const { + return Args.size(); + } + + StringRef getArgText(unsigned Idx) const { + return Args[Idx].Text; + } + + SourceRange getArgRange(unsigned Idx) const { + return Args[Idx].Range; + } +}; + +/// Abstract class for opening and closing HTML tags. HTML tags are always +/// treated as inline content (regardless HTML semantics); opening and closing +/// tags are not matched. +class HTMLTagComment : public InlineContentComment { +protected: + StringRef TagName; + SourceRange TagNameRange; + + HTMLTagComment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef TagName, + SourceLocation TagNameBegin, + SourceLocation TagNameEnd) : + InlineContentComment(K, LocBegin, LocEnd), + TagName(TagName), + TagNameRange(TagNameBegin, TagNameEnd) { + setLocation(TagNameBegin); + } + +public: + static bool classof(const Comment *C) { + return C->getCommentKind() >= FirstHTMLTagCommentConstant && + C->getCommentKind() <= LastHTMLTagCommentConstant; + } + + static bool classof(const HTMLTagComment *) { return true; } + + StringRef getTagName() const LLVM_READONLY { return TagName; } + + SourceRange getTagNameSourceRange() const LLVM_READONLY { + SourceLocation L = getLocation(); + return SourceRange(L.getLocWithOffset(1), + L.getLocWithOffset(1 + TagName.size())); + } +}; + +/// An opening HTML tag with attributes. +class HTMLOpenTagComment : public HTMLTagComment { +public: + class Attribute { + public: + SourceLocation NameLocBegin; + StringRef Name; + + SourceLocation EqualsLoc; + + SourceRange ValueRange; + StringRef Value; + + Attribute() { } + + Attribute(SourceLocation NameLocBegin, StringRef Name) : + NameLocBegin(NameLocBegin), Name(Name), + EqualsLoc(SourceLocation()), + ValueRange(SourceRange()), Value(StringRef()) + { } + + Attribute(SourceLocation NameLocBegin, StringRef Name, + SourceLocation EqualsLoc, + SourceRange ValueRange, StringRef Value) : + NameLocBegin(NameLocBegin), Name(Name), + EqualsLoc(EqualsLoc), + ValueRange(ValueRange), Value(Value) + { } + + SourceLocation getNameLocEnd() const { + return NameLocBegin.getLocWithOffset(Name.size()); + } + + SourceRange getNameRange() const { + return SourceRange(NameLocBegin, getNameLocEnd()); + } + }; + +private: + ArrayRef<Attribute> Attributes; + +public: + HTMLOpenTagComment(SourceLocation LocBegin, + StringRef TagName) : + HTMLTagComment(HTMLOpenTagCommentKind, + LocBegin, LocBegin.getLocWithOffset(1 + TagName.size()), + TagName, + LocBegin.getLocWithOffset(1), + LocBegin.getLocWithOffset(1 + TagName.size())) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == HTMLOpenTagCommentKind; + } + + static bool classof(const HTMLOpenTagComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + unsigned getAttrCount() const { + return Attributes.size(); + } + + const Attribute &getAttr(unsigned Idx) const { + return Attributes[Idx]; + } + + void setAttrs(ArrayRef<Attribute> Attrs) { + Attributes = Attrs; + if (!Attrs.empty()) { + const Attribute &Attr = Attrs.back(); + SourceLocation L = Attr.ValueRange.getEnd(); + if (L.isValid()) + Range.setEnd(L); + else { + Range.setEnd(Attr.getNameLocEnd()); + } + } + } + + void setGreaterLoc(SourceLocation GreaterLoc) { + Range.setEnd(GreaterLoc); + } +}; + +/// A closing HTML tag. +class HTMLCloseTagComment : public HTMLTagComment { +public: + HTMLCloseTagComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef TagName) : + HTMLTagComment(HTMLCloseTagCommentKind, + LocBegin, LocEnd, + TagName, + LocBegin.getLocWithOffset(2), + LocBegin.getLocWithOffset(2 + TagName.size())) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == HTMLCloseTagCommentKind; + } + + static bool classof(const HTMLCloseTagComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } +}; + +/// Block content (contains inline content). +/// Abstract class. +class BlockContentComment : public Comment { +protected: + BlockContentComment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd) : + Comment(K, LocBegin, LocEnd) + { } + +public: + static bool classof(const Comment *C) { + return C->getCommentKind() >= FirstBlockContentCommentConstant && + C->getCommentKind() <= LastBlockContentCommentConstant; + } + + static bool classof(const BlockContentComment *) { return true; } +}; + +/// A single paragraph that contains inline content. +class ParagraphComment : public BlockContentComment { + llvm::ArrayRef<InlineContentComment *> Content; + +public: + ParagraphComment(llvm::ArrayRef<InlineContentComment *> Content) : + BlockContentComment(ParagraphCommentKind, + SourceLocation(), + SourceLocation()), + Content(Content) { + if (Content.empty()) + return; + + setSourceRange(SourceRange(Content.front()->getLocStart(), + Content.back()->getLocEnd())); + setLocation(Content.front()->getLocStart()); + } + + static bool classof(const Comment *C) { + return C->getCommentKind() == ParagraphCommentKind; + } + + static bool classof(const ParagraphComment *) { return true; } + + child_iterator child_begin() const { + return reinterpret_cast<child_iterator>(Content.begin()); + } + + child_iterator child_end() const { + return reinterpret_cast<child_iterator>(Content.end()); + } +}; + +/// A command that has zero or more word-like arguments (number of word-like +/// arguments depends on command name) and a paragraph as an argument +/// (e. g., \\brief). +class BlockCommandComment : public BlockContentComment { +public: + struct Argument { + SourceRange Range; + StringRef Text; + + Argument() { } + Argument(SourceRange Range, StringRef Text) : Range(Range), Text(Text) { } + }; + +protected: + /// Command name. + StringRef Name; + + /// Word-like arguments. + llvm::ArrayRef<Argument> Args; + + /// Paragraph argument. + ParagraphComment *Paragraph; + + BlockCommandComment(CommentKind K, + SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) : + BlockContentComment(K, LocBegin, LocEnd), + Name(Name), + Paragraph(NULL) { + setLocation(getCommandNameRange().getBegin()); + } + +public: + BlockCommandComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) : + BlockContentComment(BlockCommandCommentKind, LocBegin, LocEnd), + Name(Name), + Paragraph(NULL) { + setLocation(getCommandNameRange().getBegin()); + } + + static bool classof(const Comment *C) { + return C->getCommentKind() == BlockCommandCommentKind; + } + + static bool classof(const BlockCommandComment *) { return true; } + + child_iterator child_begin() const { + return reinterpret_cast<child_iterator>(&Paragraph); + } + + child_iterator child_end() const { + return reinterpret_cast<child_iterator>(&Paragraph + 1); + } + + StringRef getCommandName() const { + return Name; + } + + SourceRange getCommandNameRange() const { + return SourceRange(getLocStart().getLocWithOffset(1), + getLocStart().getLocWithOffset(1 + Name.size())); + } + + unsigned getArgCount() const { + return Args.size(); + } + + StringRef getArgText(unsigned Idx) const { + return Args[Idx].Text; + } + + SourceRange getArgRange(unsigned Idx) const { + return Args[Idx].Range; + } + + void setArgs(llvm::ArrayRef<Argument> A) { + Args = A; + } + + ParagraphComment *getParagraph() const LLVM_READONLY { + return Paragraph; + } + + void setParagraph(ParagraphComment *PC) { + Paragraph = PC; + SourceLocation NewLocEnd = PC->getLocEnd(); + if (NewLocEnd.isValid()) + setSourceRange(SourceRange(getLocStart(), NewLocEnd)); + } +}; + +/// Doxygen \\param command. +class ParamCommandComment : public BlockCommandComment { +public: + enum PassDirection { + In, + Out, + InOut + }; + +public: + ParamCommandComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) : + BlockCommandComment(ParamCommandCommentKind, LocBegin, LocEnd, Name) { + ParamCommandCommentBits.Direction = In; + ParamCommandCommentBits.IsDirectionExplicit = false; + } + + static bool classof(const Comment *C) { + return C->getCommentKind() == ParamCommandCommentKind; + } + + static bool classof(const ParamCommandComment *) { return true; } + + PassDirection getDirection() const LLVM_READONLY { + return static_cast<PassDirection>(ParamCommandCommentBits.Direction); + } + + bool isDirectionExplicit() const LLVM_READONLY { + return ParamCommandCommentBits.IsDirectionExplicit; + } + + void setDirection(PassDirection Direction, bool Explicit) { + ParamCommandCommentBits.Direction = Direction; + ParamCommandCommentBits.IsDirectionExplicit = Explicit; + } + + bool hasParamName() const { + return getArgCount() > 0; + } + + StringRef getParamName() const { + return Args[0].Text; + } + + SourceRange getParamNameRange() const { + return Args[0].Range; + } +}; + +/// A line of text contained in a verbatim block. +class VerbatimBlockLineComment : public Comment { + StringRef Text; + +public: + VerbatimBlockLineComment(SourceLocation LocBegin, + StringRef Text) : + Comment(VerbatimBlockLineCommentKind, + LocBegin, + LocBegin.getLocWithOffset(Text.size())), + Text(Text) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == VerbatimBlockLineCommentKind; + } + + static bool classof(const VerbatimBlockLineComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + StringRef getText() const LLVM_READONLY { + return Text; + } +}; + +/// Verbatim block (e. g., preformatted code). Verbatim block has an opening +/// and a closing command and contains multiple lines of text +/// (VerbatimBlockLineComment nodes). +class VerbatimBlockComment : public BlockCommandComment { +protected: + StringRef CloseName; + SourceLocation CloseNameLocBegin; + llvm::ArrayRef<VerbatimBlockLineComment *> Lines; + +public: + VerbatimBlockComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) : + BlockCommandComment(VerbatimBlockCommentKind, + LocBegin, LocEnd, Name) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == VerbatimBlockCommentKind; + } + + static bool classof(const VerbatimBlockComment *) { return true; } + + child_iterator child_begin() const { + return reinterpret_cast<child_iterator>(Lines.begin()); + } + + child_iterator child_end() const { + return reinterpret_cast<child_iterator>(Lines.end()); + } + + void setCloseName(StringRef Name, SourceLocation LocBegin) { + CloseName = Name; + CloseNameLocBegin = LocBegin; + } + + void setLines(llvm::ArrayRef<VerbatimBlockLineComment *> L) { + Lines = L; + } + + StringRef getCloseName() const { + return CloseName; + } + + unsigned getLineCount() const { + return Lines.size(); + } + + StringRef getText(unsigned LineIdx) const { + return Lines[LineIdx]->getText(); + } +}; + +/// Verbatim line. Verbatim line has an opening command and a single line of +/// text (up to the newline after the opening command). +class VerbatimLineComment : public BlockCommandComment { +protected: + StringRef Text; + SourceLocation TextBegin; + +public: + VerbatimLineComment(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name, + SourceLocation TextBegin, + StringRef Text) : + BlockCommandComment(VerbatimLineCommentKind, + LocBegin, LocEnd, + Name), + Text(Text), + TextBegin(TextBegin) + { } + + static bool classof(const Comment *C) { + return C->getCommentKind() == VerbatimLineCommentKind; + } + + static bool classof(const VerbatimLineComment *) { return true; } + + child_iterator child_begin() const { return NULL; } + + child_iterator child_end() const { return NULL; } + + StringRef getText() const { + return Text; + } + + SourceRange getTextRange() const { + return SourceRange(TextBegin, getLocEnd()); + } +}; + +/// A full comment attached to a declaration, contains block content. +class FullComment : public Comment { + llvm::ArrayRef<BlockContentComment *> Blocks; + +public: + FullComment(llvm::ArrayRef<BlockContentComment *> Blocks) : + Comment(FullCommentKind, SourceLocation(), SourceLocation()), + Blocks(Blocks) { + if (Blocks.empty()) + return; + + setSourceRange(SourceRange(Blocks.front()->getLocStart(), + Blocks.back()->getLocEnd())); + setLocation(Blocks.front()->getLocStart()); + } + + static bool classof(const Comment *C) { + return C->getCommentKind() == FullCommentKind; + } + + static bool classof(const FullComment *) { return true; } + + child_iterator child_begin() const { + return reinterpret_cast<child_iterator>(Blocks.begin()); + } + + child_iterator child_end() const { + return reinterpret_cast<child_iterator>(Blocks.end()); + } +}; + +} // end namespace comments +} // end namespace clang + +#endif + diff --git a/include/clang/AST/CommentLexer.h b/include/clang/AST/CommentLexer.h index 226bc73351..f8dfd278ee 100644 --- a/include/clang/AST/CommentLexer.h +++ b/include/clang/AST/CommentLexer.h @@ -18,12 +18,14 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" namespace clang { namespace comments { class Lexer; +class TextTokenRetokenizer; namespace tok { enum TokenKind { @@ -41,7 +43,7 @@ enum TokenKind { html_equals, // = html_quoted_string, // "blah\"blah" or 'blah\'blah' html_greater, // > - html_tag_close // </tag> + html_tag_close // </tag }; } // end namespace tok @@ -53,6 +55,7 @@ public: /// \brief Comment token. class Token { friend class Lexer; + friend class TextTokenRetokenizer; /// The location of the token. SourceLocation Loc; @@ -72,6 +75,12 @@ public: SourceLocation getLocation() const LLVM_READONLY { return Loc; } void setLocation(SourceLocation SL) { Loc = SL; } + SourceLocation getEndLocation() const LLVM_READONLY { + if (Length == 0 || Length == 1) + return Loc; + return Loc.getLocWithOffset(Length - 1); + } + tok::TokenKind getKind() const LLVM_READONLY { return Kind; } void setKind(tok::TokenKind K) { Kind = K; } @@ -239,7 +248,10 @@ private: LS_VerbatimLineText, /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes. - LS_HTMLOpenTag + LS_HTMLOpenTag, + + /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'. + LS_HTMLCloseTag }; /// Current lexing mode. @@ -319,6 +331,8 @@ private: void lexHTMLOpenTag(Token &T); + void setupAndLexHTMLCloseTag(Token &T); + void lexHTMLCloseTag(Token &T); public: @@ -338,6 +352,209 @@ public: void addVerbatimLineCommand(StringRef Name); }; +/// Re-lexes a sequence of tok::text tokens. +class TextTokenRetokenizer { + llvm::BumpPtrAllocator &Allocator; + static const unsigned MaxTokens = 16; + SmallVector<Token, MaxTokens> Toks; + + struct Position { + unsigned CurToken; + const char *BufferStart; + const char *BufferEnd; + const char *BufferPtr; + SourceLocation BufferStartLoc; + }; + + /// Current position in Toks. + Position Pos; + + bool isEnd() const { + return Pos.CurToken >= Toks.size(); + } + + /// Sets up the buffer pointers to point to current token. + void setupBuffer() { + assert(Pos.CurToken < Toks.size()); + const Token &Tok = Toks[Pos.CurToken]; + + Pos.BufferStart = Tok.getText().begin(); + Pos.BufferEnd = Tok.getText().end(); + Pos.BufferPtr = Pos.BufferStart; + Pos.BufferStartLoc = Tok.getLocation(); + } + + SourceLocation getSourceLocation() const { + const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; + return Pos.BufferStartLoc.getLocWithOffset(CharNo); + } + + char peek() const { + assert(!isEnd()); + assert(Pos.BufferPtr != Pos.BufferEnd); + return *Pos.BufferPtr; + } + + void consumeChar() { + assert(!isEnd()); + assert(Pos.BufferPtr != Pos.BufferEnd); + Pos.BufferPtr++; + if (Pos.BufferPtr == Pos.BufferEnd) { + Pos.CurToken++; + if (Pos.CurToken < Toks.size()) + setupBuffer(); + } + } + + static bool isWhitespace(char C) { + return C == ' ' || C == '\n' || C == '\r' || + C == '\t' || C == '\f' || C == '\v'; + } + + void consumeWhitespace() { + while (!isEnd()) { + if (isWhitespace(peek())) + consumeChar(); + else + break; + } + } + + void formTokenWithChars(Token &Result, + SourceLocation Loc, + const char *TokBegin, + unsigned TokLength, + StringRef Text) { + Result.setLocation(Loc); + Result.setKind(tok::text); + Result.setLength(TokLength); +#ifndef NDEBUG + Result.TextPtr1 = "<UNSET>"; + Result.TextLen1 = 7; +#endif + Result.setText(Text); + } + +public: + TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator): + Allocator(Allocator) { + Pos.CurToken = 0; + } + + /// Add a token. + /// Returns true on success, false if it seems like we have enough tokens. + bool addToken(const Token &Tok) { + assert(Tok.is(tok::text)); + if (Toks.size() >= MaxTokens) + return false; + + Toks.push_back(Tok); + if (Toks.size() == 1) + setupBuffer(); + return true; + } + + /// Extract a word -- sequence of non-whitespace characters. + bool lexWord(Token &Tok) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + while (!isEnd()) { + const char C = peek(); + if (!isWhitespace(C)) { + WordText.push_back(C); + consumeChar(); + } else + break; + } + const unsigned Length = WordText.size(); + if (Length == 0) { + Pos = SavedPos; + return false; + } + + char *TextPtr = new (Allocator) char[Length + 1]; + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, + Pos.BufferPtr - WordBegin, Text); + return true; + } + + bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + bool Error = false; + if (!isEnd()) { + const char C = peek(); + if (C == OpenDelim) { + WordText.push_back(C); + consumeChar(); + } else + Error = true; + } + char C; + while (!Error && !isEnd()) { + C = peek(); + WordText.push_back(C); + consumeChar(); + if (C == CloseDelim) + break; + } + if (!Error && C != CloseDelim) + Error = true; + + if (Error) { + Pos = SavedPos; + return false; + } + + const unsigned Length = WordText.size(); + char *TextPtr = new (Allocator) char[Length + 1]; + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, + Pos.BufferPtr - WordBegin, Text); + return true; + } + + /// Return a text token. Useful to take tokens back. + bool lexText(Token &Tok) { + if (isEnd()) + return false; + + if (Pos.BufferPtr != Pos.BufferStart) + formTokenWithChars(Tok, getSourceLocation(), + Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, + StringRef(Pos.BufferPtr, + Pos.BufferEnd - Pos.BufferPtr)); + else + Tok = Toks[Pos.CurToken]; + + Pos.CurToken++; + if (Pos.CurToken < Toks.size()) + setupBuffer(); + return true; + } +}; + } // end namespace comments } // end namespace clang diff --git a/include/clang/AST/CommentParser.h b/include/clang/AST/CommentParser.h new file mode 100644 index 0000000000..53c58662bf --- /dev/null +++ b/include/clang/AST/CommentParser.h @@ -0,0 +1,112 @@ +//===--- CommentParser.h - Doxygen comment parser ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Doxygen comment parser. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_COMMENT_PARSER_H +#define LLVM_CLANG_AST_COMMENT_PARSER_H + +#include "clang/AST/CommentLexer.h" +#include "clang/AST/Comment.h" +#include "clang/AST/CommentSema.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace comments { + +/// Doxygen comment parser. +class Parser { + Lexer &L; + + Sema &S; + + llvm::BumpPtrAllocator &Allocator; + + template<typename T> + ArrayRef<T> copyArray(ArrayRef<T> Source) { + size_t Size = Source.size(); + if (Size != 0) { + T *Mem = new (Allocator) T[Size]; + std::copy(Source.begin(), Source.end(), Mem); + return llvm::makeArrayRef(Mem, Size); + } else + return llvm::makeArrayRef(static_cast<T *>(NULL), 0); + } + + /// Current lookahead token. We can safely assume that all tokens are from + /// a single source file. + Token Tok; + + /// A stack of additional lookahead tokens. + SmallVector<Token, 8> MoreLATokens; + + SourceLocation consumeToken() { + SourceLocation Loc = Tok.getLocation(); + if (MoreLATokens.empty()) + L.lex(Tok); + else { + Tok = MoreLATokens.back(); + MoreLATokens.pop_back(); + } + return Loc; + } + + void putBack(const Token &OldTok) { + MoreLATokens.push_back(Tok); + Tok = OldTok; + } + + void putBack(ArrayRef<Token> Toks) { + if (Toks.empty()) + return; + + MoreLATokens.push_back(Tok); + for (const Token *I = &Toks.back(), + *B = &Toks.front() + 1; + I != B; --I) { + MoreLATokens.push_back(*I); + } + + Tok = Toks[0]; + } + +public: + Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator); + + /// Parse arguments for \\param command. + ParamCommandComment *parseParamCommandArgs( + ParamCommandComment *PC, + TextTokenRetokenizer &Retokenizer); + + BlockCommandComment *parseBlockCommandArgs( + BlockCommandComment *BC, + TextTokenRetokenizer &Retokenizer, + unsigned NumArgs); + + BlockCommandComment *parseBlockCommand(); + InlineCommandComment *parseInlineCommand(); + + HTMLOpenTagComment *parseHTMLOpenTag(); + HTMLCloseTagComment *parseHTMLCloseTag(); + + BlockContentComment *parseParagraphOrBlockCommand(); + + VerbatimBlockComment *parseVerbatimBlock(); + VerbatimLineComment *parseVerbatimLine(); + BlockContentComment *parseBlockContent(); + FullComment *parseFullComment(); +}; + +} // end namespace comments +} // end namespace clang + +#endif + diff --git a/include/clang/AST/CommentSema.h b/include/clang/AST/CommentSema.h new file mode 100644 index 0000000000..4d853eb086 --- /dev/null +++ b/include/clang/AST/CommentSema.h @@ -0,0 +1,121 @@ +//===--- CommentSema.h - Doxygen comment semantic analysis ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the semantic analysis class for Doxygen comments. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_COMMENT_SEMA_H +#define LLVM_CLANG_AST_COMMENT_SEMA_H + +#include "clang/Basic/SourceLocation.h" +#include "clang/AST/Comment.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace comments { + +class Sema { + llvm::BumpPtrAllocator &Allocator; + +public: + Sema(llvm::BumpPtrAllocator &Allocator); + + ParagraphComment *actOnParagraphComment( + ArrayRef<InlineContentComment *> Content); + + BlockCommandComment *actOnBlockCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name); + + BlockCommandComment *actOnBlockCommandArgs( + BlockCommandComment *Command, + ArrayRef<BlockCommandComment::Argument> Args); + + BlockCommandComment *actOnBlockCommandFinish(BlockCommandComment *Command, + ParagraphComment *Paragraph); + + ParamCommandComment *actOnParamCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name); + + ParamCommandComment *actOnParamCommandArg(ParamCommandComment *Command, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg, + bool IsDirection); + + ParamCommandComment *actOnParamCommandFinish(ParamCommandComment *Command, + ParagraphComment *Paragraph); + + InlineCommandComment *actOnInlineCommand(SourceLocation CommandLocBegin, + SourceLocation CommandLocEnd, + StringRef CommandName); + + InlineCommandComment *actOnInlineCommand(SourceLocation CommandLocBegin, + SourceLocation CommandLocEnd, + StringRef CommandName, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg); + + InlineContentComment *actOnUnknownCommand(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name); + + TextComment *actOnText(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Text); + + VerbatimBlockComment *actOnVerbatimBlockStart(SourceLocation Loc, + StringRef Name); + + VerbatimBlockLineComment *actOnVerbatimBlockLine(SourceLocation Loc, + StringRef Text); + + VerbatimBlockComment *actOnVerbatimBlockFinish( + VerbatimBlockComment *Block, + SourceLocation CloseNameLocBegin, + StringRef CloseName, + ArrayRef<VerbatimBlockLineComment *> Lines); + + VerbatimLineComment *actOnVerbatimLine(SourceLocation LocBegin, + StringRef Name, + SourceLocation TextBegin, + StringRef Text); + + HTMLOpenTagComment *actOnHTMLOpenTagStart(SourceLocation LocBegin, + StringRef TagName); + + HTMLOpenTagComment *actOnHTMLOpenTagFinish( + HTMLOpenTagComment *Tag, + ArrayRef<HTMLOpenTagComment::Attribute> Attrs, + SourceLocation GreaterLoc); + + HTMLCloseTagComment *actOnHTMLCloseTag(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef TagName); + + FullComment *actOnFullComment(ArrayRef<BlockContentComment *> Blocks); + + bool isBlockCommand(StringRef Name); + bool isParamCommand(StringRef Name); + unsigned getBlockCommandNumArgs(StringRef Name); + + bool isInlineCommand(StringRef Name); + bool HTMLOpenTagNeedsClosing(StringRef Name); +}; + +} // end namespace comments +} // end namespace clang + +#endif + diff --git a/include/clang/AST/CommentVisitor.h b/include/clang/AST/CommentVisitor.h new file mode 100644 index 0000000000..159725af26 --- /dev/null +++ b/include/clang/AST/CommentVisitor.h @@ -0,0 +1,63 @@ +//===--- CommentVisitor.h - Visitor for Comment subclasses ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Comment.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace comments { + +template <typename T> struct make_ptr { typedef T *type; }; +template <typename T> struct make_const_ptr { typedef const T *type; }; + +template<template <typename> class Ptr, typename ImplClass, typename RetTy=void> +class CommentVisitorBase { +public: +#define PTR(CLASS) typename Ptr<CLASS>::type +#define DISPATCH(NAME, CLASS) \ + return static_cast<ImplClass*>(this)->visit ## NAME(static_cast<PTR(CLASS)>(C)) + + RetTy visit(PTR(Comment) C) { + switch (C->getCommentKind()) { + default: llvm_unreachable("Unknown comment kind!"); +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + case Comment::CLASS##Kind: DISPATCH(CLASS, CLASS); +#include "clang/AST/CommentNodes.inc" +#undef ABSTRACT_COMMENT +#undef COMMENT + } + } + + // If the derived class does not implement a certain Visit* method, fall back + // on Visit* method for the superclass. +#define ABSTRACT_COMMENT(COMMENT) COMMENT +#define COMMENT(CLASS, PARENT) \ + RetTy visit ## CLASS(PTR(CLASS) C) { DISPATCH(PARENT, PARENT); } +#include "clang/AST/CommentNodes.inc" +#undef ABSTRACT_COMMENT +#undef COMMENT + + RetTy visitComment(PTR(Comment) C) { return RetTy(); } + +#undef PTR +#undef DISPATCH +}; + +template<typename ImplClass, typename RetTy=void> +class CommentVisitor : + public CommentVisitorBase<make_ptr, ImplClass, RetTy> {}; + +template<typename ImplClass, typename RetTy=void> +class ConstCommentVisitor : + public CommentVisitorBase<make_const_ptr, ImplClass, RetTy> {}; + +} // end namespace comments +} // end namespace clang + diff --git a/include/clang/AST/Makefile b/include/clang/AST/Makefile index 2854b7f406..f6dd4ce6e6 100644 --- a/include/clang/AST/Makefile +++ b/include/clang/AST/Makefile @@ -1,6 +1,6 @@ CLANG_LEVEL := ../../.. TD_SRC_DIR = $(PROJ_SRC_DIR)/../Basic -BUILT_SOURCES = Attrs.inc AttrImpl.inc StmtNodes.inc DeclNodes.inc +BUILT_SOURCES = Attrs.inc AttrImpl.inc StmtNodes.inc DeclNodes.inc CommentNodes.inc TABLEGEN_INC_FILES_COMMON = 1 @@ -27,3 +27,9 @@ $(ObjDir)/DeclNodes.inc.tmp : $(TD_SRC_DIR)/DeclNodes.td $(CLANG_TBLGEN) \ $(ObjDir)/.dir $(Echo) "Building Clang declaration node tables with tblgen" $(Verb) $(ClangTableGen) -gen-clang-decl-nodes -o $(call SYSPATH, $@) $< + +$(ObjDir)/CommentNodes.inc.tmp : $(TD_SRC_DIR)/CommentNodes.td $(CLANG_TBLGEN) \ + $(ObjDir)/.dir + $(Echo) "Building Clang comment node tables with tblgen" + $(Verb) $(ClangTableGen) -gen-clang-comment-nodes -o $(call SYSPATH, $@) $< + diff --git a/include/clang/Basic/CommentNodes.td b/include/clang/Basic/CommentNodes.td new file mode 100644 index 0000000000..bcadbace56 --- /dev/null +++ b/include/clang/Basic/CommentNodes.td @@ -0,0 +1,26 @@ +class Comment<bit abstract = 0> { + bit Abstract = abstract; +} + +class DComment<Comment base, bit abstract = 0> : Comment<abstract> { + Comment Base = base; +} + +def InlineContentComment : Comment<1>; + def TextComment : DComment<InlineContentComment>; + def InlineCommandComment : DComment<InlineContentComment>; + def HTMLTagComment : DComment<InlineContentComment, 1>; + def HTMLOpenTagComment : DComment<HTMLTagComment>; + def HTMLCloseTagComment : DComment<HTMLTagComment>; + +def BlockContentComment : Comment<1>; + def ParagraphComment : DComment<BlockContentComment>; + def BlockCommandComment : DComment<BlockContentComment>; + def ParamCommandComment : DComment<BlockCommandComment>; + def VerbatimBlockComment : DComment<BlockCommandComment>; + def VerbatimLineComment : DComment<BlockCommandComment>; + +def VerbatimBlockLineComment : Comment; + +def FullComment : Comment; + diff --git a/lib/ARCMigrate/CMakeLists.txt b/lib/ARCMigrate/CMakeLists.txt index 6527e0fdfc..2cce53d2fd 100644 --- a/lib/ARCMigrate/CMakeLists.txt +++ b/lib/ARCMigrate/CMakeLists.txt @@ -23,6 +23,7 @@ add_clang_library(clangARCMigrate add_dependencies(clangARCMigrate ClangAttrClasses ClangAttrList + ClangCommentNodes ClangDeclNodes ClangStmtNodes) diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index 66096f34e2..f65f9c0afc 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -13,6 +13,9 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" +#include "clang/AST/CommentLexer.h" +#include "clang/AST/CommentSema.h" +#include "clang/AST/CommentParser.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" @@ -149,18 +152,47 @@ const RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { const RawComment *ASTContext::getRawCommentForDecl(const Decl *D) const { // Check whether we have cached a comment string for this declaration // already. - llvm::DenseMap<const Decl *, const RawComment *>::iterator Pos + llvm::DenseMap<const Decl *, RawAndParsedComment>::iterator Pos = DeclComments.find(D); - if (Pos != DeclComments.end()) - return Pos->second; + if (Pos != DeclComments.end()) { + RawAndParsedComment C = Pos->second; + return C.first; + } const RawComment *RC = getRawCommentForDeclNoCache(D); // If we found a comment, it should be a documentation comment. assert(!RC || RC->isDocumentation()); - DeclComments[D] = RC; + DeclComments[D] = RawAndParsedComment(RC, NULL); return RC; } +comments::FullComment *ASTContext::getCommentForDecl(const Decl *D) const { + llvm::DenseMap<const Decl *, RawAndParsedComment>::iterator Pos + = DeclComments.find(D); + const RawComment *RC; + if (Pos != DeclComments.end()) { + RawAndParsedComment C = Pos->second; + if (comments::FullComment *FC = C.second) + return FC; + RC = C.first; + } else + RC = getRawCommentForDecl(D); + + if (!RC) + return NULL; + + const StringRef RawText = RC->getRawText(SourceMgr); + comments::Lexer L(RC->getSourceRange().getBegin(), comments::CommentOptions(), + RawText.begin(), RawText.end()); + + comments::Sema S(this->BumpAlloc); + comments::Parser P(L, S, this->BumpAlloc); + + comments::FullComment *FC = P.parseFullComment(); + DeclComments[D].second = FC; + return FC; +} + void ASTContext::CanonicalTemplateTemplateParm::Profile(llvm::FoldingSetNodeID &ID, TemplateTemplateParmDecl *Parm) { diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt index 5dad60c490..c45f721f97 100644 --- a/lib/AST/CMakeLists.txt +++ b/lib/AST/CMakeLists.txt @@ -8,8 +8,12 @@ add_clang_library(clangAST ASTImporter.cpp AttrImpl.cpp CXXInheritance.cpp + Comment.cpp CommentBriefParser.cpp + CommentDumper.cpp CommentLexer.cpp + CommentParser.cpp + CommentSema.cpp Decl.cpp DeclarationName.cpp DeclBase.cpp @@ -60,6 +64,7 @@ add_dependencies(clangAST ClangAttrList ClangAttrImpl ClangDiagnosticAST + ClangCommentNodes ClangDeclNodes ClangStmtNodes ) diff --git a/lib/AST/Comment.cpp b/lib/AST/Comment.cpp new file mode 100644 index 0000000000..22277ad88a --- /dev/null +++ b/lib/AST/Comment.cpp @@ -0,0 +1,90 @@ +//===--- Comment.cpp - Comment AST node implementation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Comment.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace comments { + +const char *Comment::getCommentKindName() const { + switch (getCommentKind()) { + case NoCommentKind: return "NoCommentKind"; +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + case CLASS##Kind: \ + return #CLASS; +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT + } + llvm_unreachable("Unknown comment kind!"); +} + +namespace { +struct good {}; +struct bad {}; + +template <typename T> +good implements_child_begin_end(Comment::child_iterator (T::*)() const) { + return good(); +} + +static inline bad implements_child_begin_end( + Comment::child_iterator (Comment::*)() const) { + return bad(); +} + +#define ASSERT_IMPLEMENTS_child_begin(function) \ + (void) sizeof(good(implements_child_begin_end(function))) + +static inline void CheckCommentASTNodes() { +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + ASSERT_IMPLEMENTS_child_begin(&CLASS::child_begin); \ + ASSERT_IMPLEMENTS_child_begin(&CLASS::child_end); +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT +} + +#undef ASSERT_IMPLEMENTS_child_begin + +} // end unnamed namespace + +Comment::child_iterator Comment::child_begin() const { + switch (getCommentKind()) { + case NoCommentKind: llvm_unreachable("comment without a kind"); +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + case CLASS##Kind: \ + return static_cast<const CLASS *>(this)->child_begin(); +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT + } +} + +Comment::child_iterator Comment::child_end() const { + switch (getCommentKind()) { + case NoCommentKind: llvm_unreachable("comment without a kind"); +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + case CLASS##Kind: \ + return static_cast<const CLASS *>(this)->child_end(); +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT + } +} + + +} // end namespace comments +} // end namespace clang + diff --git a/lib/AST/CommentDumper.cpp b/lib/AST/CommentDumper.cpp new file mode 100644 index 0000000000..fd7a3942a4 --- /dev/null +++ b/lib/AST/CommentDumper.cpp @@ -0,0 +1,206 @@ +//===--- CommentDumper.cpp - Dumping implementation for Comment ASTs ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentVisitor.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace comments { + +namespace { +class CommentDumper: public comments::ConstCommentVisitor<CommentDumper> { + raw_ostream &OS; + SourceManager *SM; + unsigned IndentLevel; + +public: + CommentDumper(raw_ostream &OS, SourceManager *SM) : + OS(OS), SM(SM), IndentLevel(0) + { } + + void dumpIndent() const { + for (unsigned i = 1, e = IndentLevel; i < e; ++i) + OS << " "; + } + + void dumpLocation(SourceLocation Loc) { + if (SM) + Loc.print(OS, *SM); + } + + void dumpSourceRange(const Comment *C); + + void dumpComment(const Comment *C); + + void dumpSubtree(const Comment *C); + + // Inline content. + void visitTextComment(const TextComment *C); + void visitInlineCommandComment(const InlineCommandComment *C); + void visitHTMLOpenTagComment(const HTMLOpenTagComment *C); + void visitHTMLCloseTagComment(const HTMLCloseTagComment *C); + + // Block content. + void visitParagraphComment(const ParagraphComment *C); + void visitBlockCommandComment(const BlockCommandComment *C); + void visitParamCommandComment(const ParamCommandComment *C); + void visitVerbatimBlockComment(const VerbatimBlockComment *C); + void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C); + void visitVerbatimLineComment(const VerbatimLineComment *C); + + void visitFullComment(const FullComment *C); +}; + +void CommentDumper::dumpSourceRange(const Comment *C) { + if (!SM) + return; + + SourceRange SR = C->getSourceRange(); + + OS << " <"; + dumpLocation(SR.getBegin()); + if (SR.getBegin() != SR.getEnd()) { + OS << ", "; + dumpLocation(SR.getEnd()); + } + OS << ">"; +} + +void CommentDumper::dumpComment(const Comment *C) { + dumpIndent(); + OS << "(" << C->getCommentKindName() + << " " << (void *) C; + dumpSourceRange(C); +} + +void CommentDumper::dumpSubtree(const Comment *C) { + ++IndentLevel; + if (C) { + visit(C); + for (Comment::child_iterator I = C->child_begin(), + E = C->child_end(); + I != E; ++I) { + OS << '\n'; + dumpSubtree(*I); + } + OS << ')'; + } else { + dumpIndent(); + OS << "<<<NULL>>>"; + } + --IndentLevel; +} + +void CommentDumper::visitTextComment(const TextComment *C) { + dumpComment(C); + + OS << " Text=\"" << C->getText() << "\""; +} + +void CommentDumper::visitInlineCommandComment(const InlineCommandComment *C) { + dumpComment(C); + + for (unsigned i = 0, e = C->getArgCount(); i != e; ++i) + OS << " Arg[" << i << "]=\"" << C->getArgText(i) << "\""; +} + +void CommentDumper::visitHTMLOpenTagComment(const HTMLOpenTagComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getTagName() << "\""; + if (C->getAttrCount() != 0) { + OS << " Attrs: "; + for (unsigned i = 0, e = C->getAttrCount(); i != e; ++i) { + const HTMLOpenTagComment::Attribute &Attr = C->getAttr(i); + OS << " \"" << Attr.Name << "=\"" << Attr.Value << "\""; + } + } +} + +void CommentDumper::visitHTMLCloseTagComment(const HTMLCloseTagComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getTagName() << "\""; +} + +void CommentDumper::visitParagraphComment(const ParagraphComment *C) { + dumpComment(C); +} + +void CommentDumper::visitBlockCommandComment(const BlockCommandComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getCommandName() << "\""; +} + +void CommentDumper::visitParamCommandComment(const ParamCommandComment *C) { + dumpComment(C); + + switch (C->getDirection()) { + case ParamCommandComment::In: + OS << " [in]"; + break; + case ParamCommandComment::Out: + OS << " [out]"; + break; + case ParamCommandComment::InOut: + OS << " [in,out]"; + break; + } + + if (C->isDirectionExplicit()) + OS << " explicitly"; + else + OS << " implicitly"; + + if (C->hasParamName()) { + OS << " Param=\"" << C->getParamName() << "\""; + } +} + +void CommentDumper::visitVerbatimBlockComment(const VerbatimBlockComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getCommandName() << "\"" + " CloseName=\"" << C->getCloseName() << "\""; +} + +void CommentDumper::visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C) { + dumpComment(C); + + OS << " Text=\"" << C->getText() << "\""; +} + +void CommentDumper::visitVerbatimLineComment(const VerbatimLineComment *C) { + dumpComment(C); + + OS << " Text=\"" << C->getText() << "\""; +} + +void CommentDumper::visitFullComment(const FullComment *C) { + dumpComment(C); +} + +} // unnamed namespace + +void Comment::dump() const { + CommentDumper D(llvm::errs(), NULL); + D.dumpSubtree(this); + llvm::errs() << '\n'; +} + +void Comment::dump(SourceManager &SM) const { + CommentDumper D(llvm::errs(), &SM); + D.dumpSubtree(this); + llvm::errs() << '\n'; +} + +} // end namespace comments +} // end namespace clang + diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp index c3a801d924..77d2a9b72d 100644 --- a/lib/AST/CommentLexer.cpp +++ b/lib/AST/CommentLexer.cpp @@ -122,6 +122,7 @@ void Lexer::skipLineStartingDecorations() { } namespace { +/// Returns pointer to the first newline character in the string. const char *findNewline(const char *BufferPtr, const char *BufferEnd) { for ( ; BufferPtr != BufferEnd; ++BufferPtr) { const char C = *BufferPtr; @@ -270,6 +271,9 @@ void Lexer::lexCommentText(Token &T) { case LS_HTMLOpenTag: lexHTMLOpenTag(T); return; + case LS_HTMLCloseTag: + lexHTMLCloseTag(T); + return; } assert(State == LS_Normal); @@ -356,7 +360,7 @@ void Lexer::lexCommentText(Token &T) { if (isHTMLIdentifierCharacter(C)) setupAndLexHTMLOpenTag(T); else if (C == '/') - lexHTMLCloseTag(T); + setupAndLexHTMLCloseTag(T); else { StringRef Text(BufferPtr, TokenPtr - BufferPtr); formTokenWithChars(T, TokenPtr, tok::text); @@ -404,6 +408,18 @@ void Lexer::setupAndLexVerbatimBlock(Token &T, formTokenWithChars(T, TextBegin, tok::verbatim_block_begin); T.setVerbatimBlockName(Name); + // If there is a newline following the verbatim opening command, skip the + // newline so that we don't create an tok::verbatim_block_line with empty + // text content. + if (BufferPtr != CommentEnd) { + const char C = *BufferPtr; + if (C == '\n' || C == '\r') { + BufferPtr = skipNewline(BufferPtr, CommentEnd); + State = LS_VerbatimBlockBody; + return; + } + } + State = LS_VerbatimBlockFirstLine; } @@ -419,9 +435,11 @@ void Lexer::lexVerbatimBlockFirstLine(Token &T) { // Look for end command in current line. size_t Pos = Line.find(VerbatimBlockEndCommandName); + const char *TextEnd; const char *NextLine; if (Pos == StringRef::npos) { // Current line is completely verbatim. + TextEnd = Newline; NextLine = skipNewline(Newline, CommentEnd); } else if (Pos == 0) { // Current line contains just an end command. @@ -433,10 +451,11 @@ void Lexer::lexVerbatimBlockFirstLine(Token &T) { return; } else { // There is some text, followed by end command. Extract text first. - NextLine = BufferPtr + Pos; + TextEnd = BufferPtr + Pos; + NextLine = TextEnd; } - StringRef Text(BufferPtr, NextLine - BufferPtr); + StringRef Text(BufferPtr, TextEnd - BufferPtr); formTokenWithChars(T, NextLine, tok::verbatim_block_line); T.setVerbatimBlockText(Text); @@ -542,18 +561,26 @@ void Lexer::lexHTMLOpenTag(Token &T) { } } -void Lexer::lexHTMLCloseTag(Token &T) { +void Lexer::setupAndLexHTMLCloseTag(Token &T) { assert(BufferPtr[0] == '<' && BufferPtr[1] == '/'); const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd); const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd); const char *End = skipWhitespace(TagNameEnd, CommentEnd); - if (End != CommentEnd && *End == '>') - End++; formTokenWithChars(T, End, tok::html_tag_close); T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin)); + + if (BufferPtr != CommentEnd && *BufferPtr == '>') + State = LS_HTMLCloseTag; +} + +void Lexer::lexHTMLCloseTag(Token &T) { + assert(BufferPtr != CommentEnd && *BufferPtr == '>'); + + formTokenWithChars(T, BufferPtr + 1, tok::html_greater); + State = LS_Normal; } Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts, @@ -595,7 +622,8 @@ again: BufferPtr++; CommentState = LCS_InsideBCPLComment; - State = LS_Normal; + if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine) + State = LS_Normal; CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd); goto again; } @@ -628,7 +656,7 @@ again: EndWhitespace++; // Turn any whitespace between comments (and there is only whitespace - // between them) into a newline. We have two newlines between comments + // between them) into a newline. We have two newlines between C comments // in total (first one was synthesized after a comment). formTokenWithChars(T, EndWhitespace, tok::newline); diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp new file mode 100644 index 0000000000..701b6fa1e1 --- /dev/null +++ b/lib/AST/CommentParser.cpp @@ -0,0 +1,414 @@ +//===--- CommentParser.cpp - Doxygen comment parser -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentParser.h" +#include "clang/AST/CommentSema.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace comments { + +Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator): + L(L), S(S), Allocator(Allocator) { + consumeToken(); +} + +ParamCommandComment *Parser::parseParamCommandArgs( + ParamCommandComment *PC, + TextTokenRetokenizer &Retokenizer) { + Token Arg; + // Check if argument looks like direction specification: [dir] + // e.g., [in], [out], [in,out] + if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) + PC = S.actOnParamCommandArg(PC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText(), + /* IsDirection = */ true); + + if (Retokenizer.lexWord(Arg)) + StringRef ArgText = Arg.getText(); + PC = S.actOnParamCommandArg(PC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText(), + /* IsDirection = */ false); + + return PC; +} + +BlockCommandComment *Parser::parseBlockCommandArgs( + BlockCommandComment *BC, + TextTokenRetokenizer &Retokenizer, + unsigned NumArgs) { + typedef BlockCommandComment::Argument Argument; + Argument *Args = new (Allocator) Argument[NumArgs]; + unsigned ParsedArgs = 0; + Token Arg; + while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { + Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), + Arg.getEndLocation()), + Arg.getText()); + ParsedArgs++; + } + + return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); +} + +BlockCommandComment *Parser::parseBlockCommand() { + assert(Tok.is(tok::command)); + + ParamCommandComment *PC; + BlockCommandComment *BC; + bool IsParam = false; + unsigned NumArgs = 0; + if (S.isParamCommand(Tok.getCommandName())) { + IsParam = true; + PC = S.actOnParamCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName()); + } else { + NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); + BC = S.actOnBlockCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName()); + } + consumeToken(); + + if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { + // Block command ahead. We can't nest block commands, so pretend that this + // command has an empty argument. + // TODO: Diag() Warn empty arg to block command + ParagraphComment *PC = S.actOnParagraphComment( + ArrayRef<InlineContentComment *>()); + return S.actOnBlockCommandFinish(BC, PC); + } + + if (IsParam || NumArgs > 0) { + // In order to parse command arguments we need to retokenize a few + // following text tokens. + TextTokenRetokenizer Retokenizer(Allocator); + while (Tok.is(tok::text)) { + if (Retokenizer.addToken(Tok)) + consumeToken(); + } + + if (IsParam) + PC = parseParamCommandArgs(PC, Retokenizer); + else + BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); + + // Put back tokens we didn't use. + Token Text; + while (Retokenizer.lexText(Text)) + putBack(Text); + } + + BlockContentComment *Block = parseParagraphOrBlockCommand(); + // Since we have checked for a block command, we should have parsed a + // paragraph. + if (IsParam) + return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); + else + return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); +} + +InlineCommandComment *Parser::parseInlineCommand() { + assert(Tok.is(tok::command)); + + const Token CommandTok = Tok; + consumeToken(); + + TextTokenRetokenizer Retokenizer(Allocator); + while (Tok.is(tok::text)) { + if (Retokenizer.addToken(Tok)) + consumeToken(); + } + + Token ArgTok; + bool ArgTokValid = Retokenizer.lexWord(ArgTok); + + InlineCommandComment *IC; + if (ArgTokValid) { + IC = S.actOnInlineCommand(CommandTok.getLocation(), + CommandTok.getEndLocation(), + CommandTok.getCommandName(), + ArgTok.getLocation(), + ArgTok.getEndLocation(), + ArgTok.getText()); + } else { + IC = S.actOnInlineCommand(CommandTok.getLocation(), + CommandTok.getEndLocation(), + CommandTok.getCommandName()); + } + + Token Text; + while (Retokenizer.lexText(Text)) + putBack(Text); + + return IC; +} + +HTMLOpenTagComment *Parser::parseHTMLOpenTag() { + assert(Tok.is(tok::html_tag_open)); + HTMLOpenTagComment *HOT = + S.actOnHTMLOpenTagStart(Tok.getLocation(), + Tok.getHTMLTagOpenName()); + consumeToken(); + + SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs; + while (true) { + if (Tok.is(tok::html_ident)) { + Token Ident = Tok; + consumeToken(); + if (Tok.isNot(tok::html_equals)) { + Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), + Ident.getHTMLIdent())); + continue; + } + Token Equals = Tok; + consumeToken(); + if (Tok.isNot(tok::html_quoted_string)) { + // TODO: Diag() expected quoted string + Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(), + Ident.getHTMLIdent())); + continue; + } + Attrs.push_back(HTMLOpenTagComment::Attribute( + Ident.getLocation(), + Ident.getHTMLIdent(), + Equals.getLocation(), + SourceRange(Tok.getLocation(), + Tok.getEndLocation()), + Tok.getHTMLQuotedString())); + consumeToken(); + continue; + } else if (Tok.is(tok::html_greater)) { + HOT = S.actOnHTMLOpenTagFinish(HOT, + copyArray(llvm::makeArrayRef(Attrs)), + Tok.getLocation()); + consumeToken(); + return HOT; + } else if (Tok.is(tok::html_equals) || + Tok.is(tok::html_quoted_string)) { + // TODO: Diag() Err expected ident + while (Tok.is(tok::html_equals) || + Tok.is(tok::html_quoted_string)) + consumeToken(); + } else { + // Not a token from HTML open tag. Thus HTML tag prematurely ended. + // TODO: Diag() Err HTML tag prematurely ended + return S.actOnHTMLOpenTagFinish(HOT, + copyArray(llvm::makeArrayRef(Attrs)), + SourceLocation()); + } + } +} + +HTMLCloseTagComment *Parser::parseHTMLCloseTag() { + assert(Tok.is(tok::html_tag_close)); + Token TokTagOpen = Tok; + consumeToken(); + SourceLocation Loc; + if (Tok.is(tok::html_greater)) { + Loc = Tok.getLocation(); + consumeToken(); + } + + return S.actOnHTMLCloseTag(TokTagOpen.getLocation(), + Loc, + TokTagOpen.getHTMLTagCloseName()); +} + +BlockContentComment *Parser::parseParagraphOrBlockCommand() { + SmallVector<InlineContentComment *, 8> Content; + + while (true) { + switch (Tok.getKind()) { + case tok::verbatim_block_begin: + case tok::verbatim_line_name: + case tok::eof: + assert(Content.size() != 0); + break; // Block content or EOF ahead, finish this parapgaph. + + case tok::command: + if (S.isBlockCommand(Tok.getCommandName())) { + if (Content.size() == 0) + return parseBlockCommand(); + break; // Block command ahead, finish this parapgaph. + } + if (S.isInlineCommand(Tok.getCommandName())) { + Content.push_back(parseInlineCommand()); + continue; + } + + // Not a block command, not an inline command ==> an unknown command. + Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName())); + consumeToken(); + continue; + + case tok::newline: { + consumeToken(); + if (Tok.is(tok::newline) || Tok.is(tok::eof)) { + consumeToken(); + break; // Two newlines -- end of paragraph. + } + if (Content.size() > 0) + Content.back()->addTrailingNewline(); + continue; + } + + // Don't deal with HTML tag soup now. + case tok::html_tag_open: + Content.push_back(parseHTMLOpenTag()); + continue; + + case tok::html_tag_close: + Content.push_back(parseHTMLCloseTag()); + continue; + + case tok::text: + Content.push_back(S.actOnText(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getText())); + consumeToken(); + continue; + + case tok::verbatim_block_line: + case tok::verbatim_block_end: + case tok::verbatim_line_text: + case tok::html_ident: + case tok::html_equals: + case tok::html_quoted_string: + case tok::html_greater: + llvm_unreachable("should not see this token"); + } + break; + } + + return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); +} + +VerbatimBlockComment *Parser::parseVerbatimBlock() { + assert(Tok.is(tok::verbatim_block_begin)); + + VerbatimBlockComment *VB = + S.actOnVerbatimBlockStart(Tok.getLocation(), + Tok.getVerbatimBlockName()); + consumeToken(); + + // Don't create an empty line if verbatim opening command is followed + // by a newline. + if (Tok.is(tok::newline)) + consumeToken(); + + SmallVector<VerbatimBlockLineComment *, 8> Lines; + while (Tok.is(tok::verbatim_block_line) || + Tok.is(tok::newline)) { + VerbatimBlockLineComment *Line; + if (Tok.is(tok::verbatim_block_line)) { + Line = S.actOnVerbatimBlockLine(Tok.getLocation(), + Tok.getVerbatimBlockText()); + consumeToken(); + if (Tok.is(tok::newline)) { + consumeToken(); + } + } else { + // Empty line, just a tok::newline. + Line = S.actOnVerbatimBlockLine(Tok.getLocation(), + ""); + consumeToken(); + } + Lines.push_back(Line); + } + + assert(Tok.is(tok::verbatim_block_end)); + VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), + Tok.getVerbatimBlockName(), + copyArray(llvm::makeArrayRef(Lines))); + consumeToken(); + + return VB; +} + +VerbatimLineComment *Parser::parseVerbatimLine() { + assert(Tok.is(tok::verbatim_line_name)); + + Token NameTok = Tok; + consumeToken(); + + SourceLocation TextBegin; + StringRef Text; + // Next token might not be a tok::verbatim_line_text if verbatim line + // starting command comes just before a newline or comment end. + if (Tok.is(tok::verbatim_line_text)) { + TextBegin = Tok.getLocation(); + Text = Tok.getVerbatimLineText(); + } else { + TextBegin = NameTok.getEndLocation(); + Text = ""; + } + + VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), + NameTok.getVerbatimLineName(), + TextBegin, + Text); + consumeToken(); + return VL; +} + +BlockContentComment *Parser::parseBlockContent() { + switch (Tok.getKind()) { + case tok::text: + case tok::command: + case tok::html_tag_open: + case tok::html_tag_close: + return parseParagraphOrBlockCommand(); + + case tok::verbatim_block_begin: + return parseVerbatimBlock(); + + case tok::verbatim_line_name: + return parseVerbatimLine(); + + case tok::eof: + case tok::newline: + case tok::verbatim_block_line: + case tok::verbatim_block_end: + case tok::verbatim_line_text: + case tok::html_ident: + case tok::html_equals: + case tok::html_quoted_string: + case tok::html_greater: + llvm_unreachable("should not see this token"); + } +} + +FullComment *Parser::parseFullComment() { + // Skip newlines at the beginning of the comment. + while (Tok.is(tok::newline)) + consumeToken(); + + SmallVector<BlockContentComment *, 8> Blocks; + while (Tok.isNot(tok::eof)) { + Blocks.push_back(parseBlockContent()); + + // Skip extra newlines after paragraph end. + while (Tok.is(tok::newline)) + consumeToken(); + } + return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); +} + +} // end namespace comments +} // end namespace clang + + diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp new file mode 100644 index 0000000000..1193e0404a --- /dev/null +++ b/lib/AST/CommentSema.cpp @@ -0,0 +1,268 @@ +//===--- CommentSema.cpp - Doxygen comment semantic analysis --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentSema.h" +#include "llvm/ADT/StringSwitch.h" + +namespace clang { +namespace comments { + +Sema::Sema(llvm::BumpPtrAllocator &Allocator) : + Allocator(Allocator) { +} + +ParagraphComment *Sema::actOnParagraphComment( + ArrayRef<InlineContentComment *> Content) { + return new (Allocator) ParagraphComment(Content); +} + +BlockCommandComment *Sema::actOnBlockCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) { + return new (Allocator) BlockCommandComment(LocBegin, LocEnd, Name); +} + +BlockCommandComment *Sema::actOnBlockCommandArgs( + BlockCommandComment *Command, + ArrayRef<BlockCommandComment::Argument> Args) { + Command->setArgs(Args); + return Command; +} + +BlockCommandComment *Sema::actOnBlockCommandFinish( + BlockCommandComment *Command, + ParagraphComment *Paragraph) { + Command->setParagraph(Paragraph); + return Command; +} + +ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) { + return new (Allocator) ParamCommandComment(LocBegin, LocEnd, Name); +} + +ParamCommandComment *Sema::actOnParamCommandArg(ParamCommandComment *Command, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg, + bool IsDirection) { + if (IsDirection) { + ParamCommandComment::PassDirection Direction; + std::string ArgLower = Arg.lower(); + // TODO: optimize: lower Name first (need an API in SmallString for that), + // after that StringSwitch. + if (ArgLower == "[in]") + Direction = ParamCommandComment::In; + else if (ArgLower == "[out]") + Direction = ParamCommandComment::Out; + else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") + Direction = ParamCommandComment::InOut; + else { + // Remove spaces. + std::string::iterator O = ArgLower.begin(); + for (std::string::iterator I = ArgLower.begin(), E = ArgLower.end(); + I != E; ++I) { + const char C = *I; + if (C != ' ' && C != '\n' && C != '\r' && + C != '\t' && C != '\v' && C != '\f') + *O++ = C; + } + ArgLower.resize(O - ArgLower.begin()); + + bool RemovingWhitespaceHelped = false; + if (ArgLower == "[in]") { + Direction = ParamCommandComment::In; + RemovingWhitespaceHelped = true; + } else if (ArgLower == "[out]") { + Direction = ParamCommandComment::Out; + RemovingWhitespaceHelped = true; + } else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") { + Direction = ParamCommandComment::InOut; + RemovingWhitespaceHelped = true; + } else { + Direction = ParamCommandComment::In; + RemovingWhitespaceHelped = false; + } + // Diag() unrecognized parameter passing direction, valid directions are ... + // if (RemovingWhitespaceHelped) FixIt + } + Command->setDirection(Direction, /* Explicit = */ true); + } else { + if (Command->getArgCount() == 0) { + if (!Command->isDirectionExplicit()) { + // User didn't provide a direction argument. + Command->setDirection(ParamCommandComment::In, /* Explicit = */ false); + } + typedef BlockCommandComment::Argument Argument; + Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin, + ArgLocEnd), + Arg); + Command->setArgs(llvm::makeArrayRef(A, 1)); + // if (...) Diag() unrecognized parameter name + } else { + // Diag() \\param command requires at most 2 arguments + } + } + return Command; +} + +ParamCommandComment *Sema::actOnParamCommandFinish(ParamCommandComment *Command, + ParagraphComment *Paragraph) { + Command->setParagraph(Paragraph); + return Command; +} + +InlineCommandComment *Sema::actOnInlineCommand(SourceLocation CommandLocBegin, + SourceLocation CommandLocEnd, + StringRef CommandName) { + ArrayRef<InlineCommandComment::Argument> Args; + return new (Allocator) InlineCommandComment(CommandLocBegin, + CommandLocEnd, + CommandName, + Args); +} + +InlineCommandComment *Sema::actOnInlineCommand(SourceLocation CommandLocBegin, + SourceLocation CommandLocEnd, + StringRef CommandName, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg) { + typedef InlineCommandComment::Argument Argument; + Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin, + ArgLocEnd), + Arg); + + return new (Allocator) InlineCommandComment(CommandLocBegin, + CommandLocEnd, + CommandName, + llvm::makeArrayRef(A, 1)); +} + +InlineContentComment *Sema::actOnUnknownCommand(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) { + ArrayRef<InlineCommandComment::Argument> Args; + return new (Allocator) InlineCommandComment(LocBegin, LocEnd, Name, Args); +} + +TextComment *Sema::actOnText(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Text) { + return new (Allocator) TextComment(LocBegin, LocEnd, Text); +} + +VerbatimBlockComment *Sema::actOnVerbatimBlockStart(SourceLocation Loc, + StringRef Name) { + return new (Allocator) VerbatimBlockComment( + Loc, + Loc.getLocWithOffset(1 + Name.size()), + Name); +} + +VerbatimBlockLineComment *Sema::actOnVerbatimBlockLine(SourceLocation Loc, + StringRef Text) { + return new (Allocator) VerbatimBlockLineComment(Loc, Text); +} + +VerbatimBlockComment *Sema::actOnVerbatimBlockFinish( + VerbatimBlockComment *Block, + SourceLocation CloseNameLocBegin, + StringRef CloseName, + ArrayRef<VerbatimBlockLineComment *> Lines) { + Block->setCloseName(CloseName, CloseNameLocBegin); + Block->setLines(Lines); + return Block; +} + +VerbatimLineComment *Sema::actOnVerbatimLine(SourceLocation LocBegin, + StringRef Name, + SourceLocation TextBegin, + StringRef Text) { + return new (Allocator) VerbatimLineComment( + LocBegin, + TextBegin.getLocWithOffset(Text.size()), + Name, + TextBegin, + Text); +} + +HTMLOpenTagComment *Sema::actOnHTMLOpenTagStart(SourceLocation LocBegin, + StringRef TagName) { + return new (Allocator) HTMLOpenTagComment(LocBegin, TagName); +} + +HTMLOpenTagComment *Sema::actOnHTMLOpenTagFinish( + HTMLOpenTagComment *Tag, + ArrayRef<HTMLOpenTagComment::Attribute> Attrs, + SourceLocation GreaterLoc) { + Tag->setAttrs(Attrs); + Tag->setGreaterLoc(GreaterLoc); + return Tag; +} + +HTMLCloseTagComment *Sema::actOnHTMLCloseTag(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef TagName) { + return new (Allocator) HTMLCloseTagComment(LocBegin, LocEnd, TagName); +} + +FullComment *Sema::actOnFullComment( + ArrayRef<BlockContentComment *> Blocks) { + return new (Allocator) FullComment(Blocks); +} + +// TODO: tablegen +bool Sema::isBlockCommand(StringRef Name) { + return llvm::StringSwitch<bool>(Name) + .Case("brief", true) + .Case("result", true) + .Case("return", true) + .Case("returns", true) + .Case("author", true) + .Case("authors", true) + .Case("pre", true) + .Case("post", true) + .Default(false) || isParamCommand(Name); +} + +bool Sema::isParamCommand(StringRef Name) { + return llvm::StringSwitch<bool>(Name) + .Case("param", true) + .Case("arg", true) + .Default(false); +} + +unsigned Sema::getBlockCommandNumArgs(StringRef Name) { + return llvm::StringSwitch<unsigned>(Name) + .Case("brief", 0) + .Case("pre", 0) + .Case("post", 0) + .Case("author", 0) + .Case("authors", 0) + .Default(0); +} + +bool Sema::isInlineCommand(StringRef Name) { + return llvm::StringSwitch<bool>(Name) + .Case("c", true) + .Case("em", true) + .Default(false); +} + +bool Sema::HTMLOpenTagNeedsClosing(StringRef Name) { + return llvm::StringSwitch<bool>(Name) + .Case("br", true) + .Default(true); +} + +} // end namespace comments +} // end namespace clang + diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 0a22792bf9..07cc9f03cb 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -22,6 +22,7 @@ add_dependencies(clangAnalysis ClangAttrClasses ClangAttrList ClangDiagnosticAnalysis + ClangCommentNodes ClangDeclNodes ClangStmtNodes ) diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index b659436c43..6aa487d4b6 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -53,6 +53,7 @@ add_clang_library(clangCodeGen add_dependencies(clangCodeGen ClangAttrClasses ClangAttrList + ClangCommentNodes ClangDeclNodes ClangStmtNodes ) diff --git a/lib/Frontend/CMakeLists.txt b/lib/Frontend/CMakeLists.txt index ff658e0248..345dda068b 100644 --- a/lib/Frontend/CMakeLists.txt +++ b/lib/Frontend/CMakeLists.txt @@ -37,6 +37,7 @@ add_dependencies(clangFrontend ClangDiagnosticLex ClangDiagnosticSema ClangDriverOptions + ClangCommentNodes ClangDeclNodes ClangStmtNodes ) diff --git a/lib/Parse/CMakeLists.txt b/lib/Parse/CMakeLists.txt index 0a69a7e1aa..38338bc6b6 100644 --- a/lib/Parse/CMakeLists.txt +++ b/lib/Parse/CMakeLists.txt @@ -17,6 +17,7 @@ add_clang_library(clangParse add_dependencies(clangParse ClangAttrClasses ClangAttrList + ClangCommentNodes ClangDeclNodes ClangDiagnosticParse ClangStmtNodes diff --git a/lib/Rewrite/CMakeLists.txt b/lib/Rewrite/CMakeLists.txt index daf9605834..d8d90fa278 100644 --- a/lib/Rewrite/CMakeLists.txt +++ b/lib/Rewrite/CMakeLists.txt @@ -17,6 +17,7 @@ add_clang_library(clangRewrite add_dependencies(clangRewrite ClangAttrClasses ClangAttrList + ClangCommentNodes ClangDeclNodes ClangStmtNodes ) diff --git a/lib/Sema/CMakeLists.txt b/lib/Sema/CMakeLists.txt index b3ecaef2e7..ad684d3f99 100644 --- a/lib/Sema/CMakeLists.txt +++ b/lib/Sema/CMakeLists.txt @@ -47,6 +47,7 @@ add_dependencies(clangSema ClangAttrClasses ClangAttrList ClangDiagnosticSema + ClangCommentNodes ClangDeclNodes ClangStmtNodes ClangAttrTemplateInstantiate diff --git a/lib/Serialization/CMakeLists.txt b/lib/Serialization/CMakeLists.txt index f5ade70024..a0f4ac8ba6 100644 --- a/lib/Serialization/CMakeLists.txt +++ b/lib/Serialization/CMakeLists.txt @@ -21,6 +21,7 @@ add_dependencies(clangSerialization ClangDiagnosticLex ClangDiagnosticSema ClangDiagnosticSerialization + ClangCommentNodes ClangDeclNodes ClangStmtNodes ) diff --git a/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/lib/StaticAnalyzer/Checkers/CMakeLists.txt index 79b0539ba0..86eb53361a 100644 --- a/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ b/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -74,6 +74,7 @@ add_dependencies(clangStaticAnalyzerCheckers clangStaticAnalyzerCore ClangAttrClasses ClangAttrList + ClangCommentNodes ClangDeclNodes ClangStmtNodes ClangSACheckers diff --git a/lib/StaticAnalyzer/Core/CMakeLists.txt b/lib/StaticAnalyzer/Core/CMakeLists.txt index 4506d2791a..bf5d1dfe8b 100644 --- a/lib/StaticAnalyzer/Core/CMakeLists.txt +++ b/lib/StaticAnalyzer/Core/CMakeLists.txt @@ -43,6 +43,7 @@ add_clang_library(clangStaticAnalyzerCore add_dependencies(clangStaticAnalyzerCore ClangAttrClasses ClangAttrList + ClangCommentNodes ClangDeclNodes ClangStmtNodes ) diff --git a/lib/StaticAnalyzer/Frontend/CMakeLists.txt b/lib/StaticAnalyzer/Frontend/CMakeLists.txt index d1f12a621f..c45326f6f4 100644 --- a/lib/StaticAnalyzer/Frontend/CMakeLists.txt +++ b/lib/StaticAnalyzer/Frontend/CMakeLists.txt @@ -13,6 +13,7 @@ add_dependencies(clangStaticAnalyzerFrontend clangStaticAnalyzerCore ClangAttrClasses ClangAttrList + ClangCommentNodes ClangDeclNodes ClangStmtNodes ) diff --git a/unittests/AST/CommentLexer.cpp b/unittests/AST/CommentLexer.cpp index 5b4712df25..0a52364987 100644 --- a/unittests/AST/CommentLexer.cpp +++ b/unittests/AST/CommentLexer.cpp @@ -359,6 +359,23 @@ TEST_F(CommentLexerTest, DoxygenCommand6) { ASSERT_EQ(tok::newline, Toks[7].getKind()); } +TEST_F(CommentLexerTest, DoxygenCommand7) { + const char *Source = "// \\c\n"; + std::vector<Token> Toks; + + lexString(Source, Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::command, Toks[1].getKind()); + ASSERT_EQ(StringRef("c"), Toks[1].getCommandName()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); +} + // Empty verbatim block. TEST_F(CommentLexerTest, VerbatimBlock1) { const char *Sources[] = { @@ -389,31 +406,45 @@ TEST_F(CommentLexerTest, VerbatimBlock1) { // Empty verbatim block without an end command. TEST_F(CommentLexerTest, VerbatimBlock2) { - const char *Sources[] = { - "/// \\verbatim\n//", - "/** \\verbatim*/" - }; + const char *Source = "/// \\verbatim"; - for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { - std::vector<Token> Toks; + std::vector<Token> Toks; - lexString(Sources[i], Toks); + lexString(Source, Toks); - ASSERT_EQ(4U, Toks.size()); + ASSERT_EQ(3U, Toks.size()); - ASSERT_EQ(tok::text, Toks[0].getKind()); - ASSERT_EQ(StringRef(" "), Toks[0].getText()); + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); - ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); - ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); - ASSERT_EQ(tok::newline, Toks[2].getKind()); - ASSERT_EQ(tok::newline, Toks[3].getKind()); - } + ASSERT_EQ(tok::newline, Toks[2].getKind()); } -// Single-line verbatim block. +// Empty verbatim block without an end command. TEST_F(CommentLexerTest, VerbatimBlock3) { + const char *Source = "/** \\verbatim*/"; + + std::vector<Token> Toks; + + lexString(Source, Toks); + + ASSERT_EQ(4U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); + ASSERT_EQ(tok::newline, Toks[3].getKind()); +} + +// Single-line verbatim block. +TEST_F(CommentLexerTest, VerbatimBlock4) { const char *Sources[] = { "/// Meow \\verbatim aaa \\endverbatim\n//", "/** Meow \\verbatim aaa \\endverbatim*/" @@ -444,7 +475,7 @@ TEST_F(CommentLexerTest, VerbatimBlock3) { } // Single-line verbatim block without an end command. -TEST_F(CommentLexerTest, VerbatimBlock4) { +TEST_F(CommentLexerTest, VerbatimBlock5) { const char *Sources[] = { "/// Meow \\verbatim aaa \n//", "/** Meow \\verbatim aaa */" @@ -471,8 +502,96 @@ TEST_F(CommentLexerTest, VerbatimBlock4) { } } +TEST_F(CommentLexerTest, VerbatimBlock6) { + const char *Source = + "// \\verbatim\n" + "// Aaa\n" + "//\n" + "// Bbb\n" + "// \\endverbatim\n"; + + std::vector<Token> Toks; + + lexString(Source, Toks); + + ASSERT_EQ(11U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); + ASSERT_EQ(StringRef(" Aaa"), Toks[3].getVerbatimBlockText()); + + ASSERT_EQ(tok::newline, Toks[4].getKind()); + + ASSERT_EQ(tok::newline, Toks[5].getKind()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); + ASSERT_EQ(StringRef(" Bbb"), Toks[6].getVerbatimBlockText()); + + ASSERT_EQ(tok::newline, Toks[7].getKind()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[8].getKind()); + ASSERT_EQ(StringRef(" "), Toks[8].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[9].getKind()); + ASSERT_EQ(StringRef("endverbatim"), Toks[9].getVerbatimBlockName()); + + ASSERT_EQ(tok::newline, Toks[10].getKind()); +} + +TEST_F(CommentLexerTest, VerbatimBlock7) { + const char *Source = + "/* \\verbatim\n" + " * Aaa\n" + " *\n" + " * Bbb\n" + " * \\endverbatim\n" + " */"; + + std::vector<Token> Toks; + + lexString(Source, Toks); + + ASSERT_EQ(11U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); + ASSERT_EQ(StringRef(" Aaa"), Toks[2].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); + ASSERT_EQ(StringRef(""), Toks[3].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); + ASSERT_EQ(StringRef(" Bbb"), Toks[4].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind()); + ASSERT_EQ(StringRef(" "), Toks[5].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind()); + ASSERT_EQ(StringRef("endverbatim"), Toks[6].getVerbatimBlockName()); + + ASSERT_EQ(tok::newline, Toks[7].getKind()); + + ASSERT_EQ(tok::text, Toks[8].getKind()); + ASSERT_EQ(StringRef(" "), Toks[8].getText()); + + ASSERT_EQ(tok::newline, Toks[9].getKind()); + ASSERT_EQ(tok::newline, Toks[10].getKind()); +} + // Complex test for verbatim blocks. -TEST_F(CommentLexerTest, VerbatimBlock5) { +TEST_F(CommentLexerTest, VerbatimBlock8) { const char *Source = "/* Meow \\verbatim aaa\\$\\@\n" "bbb \\endverbati\r" @@ -492,13 +611,13 @@ TEST_F(CommentLexerTest, VerbatimBlock5) { ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); - ASSERT_EQ(StringRef(" aaa\\$\\@\n"), Toks[2].getVerbatimBlockText()); + ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks[2].getVerbatimBlockText()); ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); - ASSERT_EQ(StringRef("bbb \\endverbati\r"), Toks[3].getVerbatimBlockText()); + ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText()); ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); - ASSERT_EQ(StringRef("ccc\r\n"), Toks[4].getVerbatimBlockText()); + ASSERT_EQ(StringRef("ccc"), Toks[4].getVerbatimBlockText()); ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind()); ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText()); @@ -513,7 +632,7 @@ TEST_F(CommentLexerTest, VerbatimBlock5) { ASSERT_EQ(StringRef("verbatim"), Toks[8].getVerbatimBlockName()); ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind()); - ASSERT_EQ(StringRef(" eee\n"), Toks[9].getVerbatimBlockText()); + ASSERT_EQ(StringRef(" eee"), Toks[9].getVerbatimBlockText()); ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind()); ASSERT_EQ(StringRef("endverbatim"), Toks[10].getVerbatimBlockName()); @@ -526,7 +645,7 @@ TEST_F(CommentLexerTest, VerbatimBlock5) { } // LaTeX verbatim blocks. -TEST_F(CommentLexerTest, VerbatimBlock6) { +TEST_F(CommentLexerTest, VerbatimBlock9) { const char *Source = "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}"; std::vector<Token> Toks; @@ -968,8 +1087,25 @@ TEST_F(CommentLexerTest, HTML12) { } TEST_F(CommentLexerTest, HTML13) { + const char *Source = "// </tag"; + + std::vector<Token> Toks; + + lexString(Source, Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_close, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagCloseName()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); +} + +TEST_F(CommentLexerTest, HTML14) { const char *Sources[] = { - "// </tag", "// </tag>", "// </ tag>", "// </ tag >" @@ -980,7 +1116,7 @@ TEST_F(CommentLexerTest, HTML13) { lexString(Sources[i], Toks); - ASSERT_EQ(3U, Toks.size()); + ASSERT_EQ(4U, Toks.size()); ASSERT_EQ(tok::text, Toks[0].getKind()); ASSERT_EQ(StringRef(" "), Toks[0].getText()); @@ -988,7 +1124,9 @@ TEST_F(CommentLexerTest, HTML13) { ASSERT_EQ(tok::html_tag_close, Toks[1].getKind()); ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagCloseName()); - ASSERT_EQ(tok::newline, Toks[2].getKind()); + ASSERT_EQ(tok::html_greater, Toks[2].getKind()); + + ASSERT_EQ(tok::newline, Toks[3].getKind()); } } diff --git a/unittests/AST/CommentParser.cpp b/unittests/AST/CommentParser.cpp new file mode 100644 index 0000000000..d5dd0a9c56 --- /dev/null +++ b/unittests/AST/CommentParser.cpp @@ -0,0 +1,1126 @@ +//===- unittests/AST/CommentParser.cpp ------ Comment parser tests --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/AST/Comment.h" +#include "clang/AST/CommentLexer.h" +#include "clang/AST/CommentParser.h" +#include "clang/AST/CommentSema.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Allocator.h" +#include <vector> + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace clang { +namespace comments { + +namespace { + +const bool DEBUG = true; + +class CommentParserTest : public ::testing::Test { +protected: + CommentParserTest() + : FileMgr(FileMgrOpts), + DiagID(new DiagnosticIDs()), + Diags(DiagID, new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr) { + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr<DiagnosticIDs> DiagID; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + llvm::BumpPtrAllocator Allocator; + + FullComment *parseString(const char *Source); +}; + +FullComment *CommentParserTest::parseString(const char *Source) { + MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source); + FileID File = SourceMgr.createFileIDForMemBuffer(Buf); + SourceLocation Begin = SourceMgr.getLocForStartOfFile(File); + + comments::Lexer L(Begin, CommentOptions(), + Source, Source + strlen(Source)); + + comments::Sema S(Allocator); + comments::Parser P(L, S, Allocator); + comments::FullComment *FC = P.parseFullComment(); + + if (DEBUG) { + llvm::errs() << "=== Source:\n" << Source << "\n=== AST:\n"; + FC->dump(SourceMgr); + } + + Token Tok; + L.lex(Tok); + if (Tok.is(tok::eof)) + return FC; + else + return NULL; +} + +::testing::AssertionResult HasChildCount(const Comment *C, size_t Count) { + if (!C) + return ::testing::AssertionFailure() << "Comment is NULL"; + + if (Count != C->child_count()) + return ::testing::AssertionFailure() + << "Count = " << Count + << ", child_count = " << C->child_count(); + + return ::testing::AssertionSuccess(); +} + +template <typename T> +::testing::AssertionResult GetChildAt(const Comment *C, + size_t Idx, + T *&Child) { + if (!C) + return ::testing::AssertionFailure() << "Comment is NULL"; + + if (Idx >= C->child_count()) + return ::testing::AssertionFailure() + << "Idx out of range. Idx = " << Idx + << ", child_count = " << C->child_count(); + + Comment::child_iterator I = C->child_begin() + Idx; + Comment *CommentChild = *I; + if (!CommentChild) + return ::testing::AssertionFailure() << "Child is NULL"; + + Child = dyn_cast<T>(CommentChild); + if (!Child) + return ::testing::AssertionFailure() + << "Child is not of requested type, but a " + << CommentChild->getCommentKindName(); + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasTextAt(const Comment *C, + size_t Idx, + StringRef Text) { + TextComment *TC; + ::testing::AssertionResult AR = GetChildAt(C, Idx, TC); + if (!AR) + return AR; + + StringRef ActualText = TC->getText(); + if (ActualText != Text) + return ::testing::AssertionFailure() + << "TextComment has text \"" << ActualText.str() << "\", " + "expected \"" << Text.str() << "\""; + + if (TC->hasTrailingNewline()) + return ::testing::AssertionFailure() + << "TextComment has a trailing newline"; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasTextWithNewlineAt(const Comment *C, + size_t Idx, + StringRef Text) { + TextComment *TC; + ::testing::AssertionResult AR = GetChildAt(C, Idx, TC); + if (!AR) + return AR; + + StringRef ActualText = TC->getText(); + if (ActualText != Text) + return ::testing::AssertionFailure() + << "TextComment has text \"" << ActualText.str() << "\", " + "expected \"" << Text.str() << "\""; + + if (!TC->hasTrailingNewline()) + return ::testing::AssertionFailure() + << "TextComment has no trailing newline"; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasBlockCommandAt(const Comment *C, + size_t Idx, + BlockCommandComment *&BCC, + StringRef Name, + ParagraphComment *&Paragraph) { + ::testing::AssertionResult AR = GetChildAt(C, Idx, BCC); + if (!AR) + return AR; + + StringRef ActualName = BCC->getCommandName(); + if (ActualName != Name) + return ::testing::AssertionFailure() + << "BlockCommandComment has name \"" << ActualName.str() << "\", " + "expected \"" << Name.str() << "\""; + + Paragraph = BCC->getParagraph(); + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasParamCommandAt( + const Comment *C, + size_t Idx, + ParamCommandComment *&PCC, + StringRef CommandName, + ParamCommandComment::PassDirection Direction, + bool IsDirectionExplicit, + StringRef ParamName, + ParagraphComment *&Paragraph) { + ::testing::AssertionResult AR = GetChildAt(C, Idx, PCC); + if (!AR) + return AR; + + StringRef ActualCommandName = PCC->getCommandName(); + if (ActualCommandName != CommandName) + return ::testing::AssertionFailure() + << "ParamCommandComment has name \"" << ActualCommandName.str() << "\", " + "expected \"" << CommandName.str() << "\""; + + if (PCC->getDirection() != Direction) + return ::testing::AssertionFailure() + << "ParamCommandComment has direction " << PCC->getDirection() << ", " + "expected " << Direction; + + if (PCC->isDirectionExplicit() != IsDirectionExplicit) + return ::testing::AssertionFailure() + << "ParamCommandComment has " + << (PCC->isDirectionExplicit() ? "explicit" : "implicit") + << " direction, " + "expected " << (IsDirectionExplicit ? "explicit" : "implicit"); + + StringRef ActualParamName = PCC->getParamName(); + if (ActualParamName != ParamName) + return ::testing::AssertionFailure() + << "ParamCommandComment has name \"" << ActualParamName.str() << "\", " + "expected \"" << ParamName.str() << "\""; + + Paragraph = PCC->getParagraph(); + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasInlineCommandAt(const Comment *C, + size_t Idx, + InlineCommandComment *&ICC, + StringRef Name) { + ::testing::AssertionResult AR = GetChildAt(C, Idx, ICC); + if (!AR) + return AR; + + StringRef ActualName = ICC->getCommandName(); + if (ActualName != Name) + return ::testing::AssertionFailure() + << "InlineCommandComment has name \"" << ActualName.str() << "\", " + "expected \"" << Name.str() << "\""; + + return ::testing::AssertionSuccess(); +} + +struct NoArgs {}; + +::testing::AssertionResult HasInlineCommandAt(const Comment *C, + size_t Idx, + InlineCommandComment *&ICC, + StringRef Name, + NoArgs) { + ::testing::AssertionResult AR = HasInlineCommandAt(C, Idx, ICC, Name); + if (!AR) + return AR; + + if (ICC->getArgCount() != 0) + return ::testing::AssertionFailure() + << "InlineCommandComment has " << ICC->getArgCount() << " arg(s), " + "expected 0"; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasInlineCommandAt(const Comment *C, + size_t Idx, + InlineCommandComment *&ICC, + StringRef Name, + StringRef Arg) { + ::testing::AssertionResult AR = HasInlineCommandAt(C, Idx, ICC, Name); + if (!AR) + return AR; + + if (ICC->getArgCount() != 1) + return ::testing::AssertionFailure() + << "InlineCommandComment has " << ICC->getArgCount() << " arg(s), " + "expected 1"; + + StringRef ActualArg = ICC->getArgText(0); + if (ActualArg != Arg) + return ::testing::AssertionFailure() + << "InlineCommandComment has argument \"" << ActualArg.str() << "\", " + "expected \"" << Arg.str() << "\""; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasHTMLOpenTagAt(const Comment *C, + size_t Idx, + HTMLOpenTagComment *&HOT, + StringRef TagName) { + ::testing::AssertionResult AR = GetChildAt(C, Idx, HOT); + if (!AR) + return AR; + + StringRef ActualTagName = HOT->getTagName(); + if (ActualTagName != TagName) + return ::testing::AssertionFailure() + << "HTMLOpenTagComment has name \"" << ActualTagName.str() << "\", " + "expected \"" << TagName.str() << "\""; + + return ::testing::AssertionSuccess(); +} + +struct NoAttrs {}; + +::testing::AssertionResult HasHTMLOpenTagAt(const Comment *C, + size_t Idx, + HTMLOpenTagComment *&HOT, + StringRef TagName, + NoAttrs) { + ::testing::AssertionResult AR = HasHTMLOpenTagAt(C, Idx, HOT, TagName); + if (!AR) + return AR; + + if (HOT->getAttrCount() != 0) + return ::testing::AssertionFailure() + << "HTMLOpenTagComment has " << HOT->getAttrCount() << " attr(s), " + "expected 0"; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasHTMLOpenTagAt(const Comment *C, + size_t Idx, + HTMLOpenTagComment *&HOT, + StringRef TagName, + StringRef AttrName, + StringRef AttrValue) { + ::testing::AssertionResult AR = HasHTMLOpenTagAt(C, Idx, HOT, TagName); + if (!AR) + return AR; + + if (HOT->getAttrCount() != 1) + return ::testing::AssertionFailure() + << "HTMLOpenTagComment has " << HOT->getAttrCount() << " attr(s), " + "expected 1"; + + StringRef ActualName = HOT->getAttr(0).Name; + if (ActualName != AttrName) + return ::testing::AssertionFailure() + << "HTMLOpenTagComment has attr \"" << ActualName.str() << "\", " + "expected \"" << AttrName.str() << "\""; + + StringRef ActualValue = HOT->getAttr(0).Value; + if (ActualValue != AttrValue) + return ::testing::AssertionFailure() + << "HTMLOpenTagComment has attr value \"" << ActualValue.str() << "\", " + "expected \"" << AttrValue.str() << "\""; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasHTMLCloseTagAt(const Comment *C, + size_t Idx, + HTMLCloseTagComment *&HCT, + StringRef TagName) { + ::testing::AssertionResult AR = GetChildAt(C, Idx, HCT); + if (!AR) + return AR; + + StringRef ActualTagName = HCT->getTagName(); + if (ActualTagName != TagName) + return ::testing::AssertionFailure() + << "HTMLCloseTagComment has name \"" << ActualTagName.str() << "\", " + "expected \"" << TagName.str() << "\""; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasVerbatimBlockAt(const Comment *C, + size_t Idx, + VerbatimBlockComment *&VBC, + StringRef Name) { + ::testing::AssertionResult AR = GetChildAt(C, Idx, VBC); + if (!AR) + return AR; + + StringRef ActualName = VBC->getCommandName(); + if (ActualName != Name) + return ::testing::AssertionFailure() + << "VerbatimBlockComment has name \"" << ActualName.str() << "\", " + "expected \"" << Name.str() << "\""; + + return ::testing::AssertionSuccess(); +} + +struct NoLines {}; + +::testing::AssertionResult HasVerbatimBlockAt(const Comment *C, + size_t Idx, + VerbatimBlockComment *&VBC, + StringRef Name, + NoLines) { + ::testing::AssertionResult AR = HasVerbatimBlockAt(C, Idx, VBC, Name); + if (!AR) + return AR; + + if (VBC->getLineCount() != 0) + return ::testing::AssertionFailure() + << "VerbatimBlockComment has " << VBC->getLineCount() << " lines(s), " + "expected 0"; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasVerbatimBlockAt(const Comment *C, + size_t Idx, + VerbatimBlockComment *&VBC, + StringRef Name, + StringRef Line0) { + ::testing::AssertionResult AR = HasVerbatimBlockAt(C, Idx, VBC, Name); + if (!AR) + return AR; + + if (VBC->getLineCount() != 1) + return ::testing::AssertionFailure() + << "VerbatimBlockComment has " << VBC->getLineCount() << " lines(s), " + "expected 1"; + + StringRef ActualLine0 = VBC->getText(0); + if (ActualLine0 != Line0) + return ::testing::AssertionFailure() + << "VerbatimBlockComment has lines[0] \"" << ActualLine0.str() << "\", " + "expected \"" << Line0.str() << "\""; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasVerbatimBlockAt(const Comment *C, + size_t Idx, + VerbatimBlockComment *&VBC, + StringRef Name, + StringRef Line0, + StringRef Line1) { + ::testing::AssertionResult AR = HasVerbatimBlockAt(C, Idx, VBC, Name); + if (!AR) + return AR; + + if (VBC->getLineCount() != 2) + return ::testing::AssertionFailure() + << "VerbatimBlockComment has " << VBC->getLineCount() << " lines(s), " + "expected 2"; + + StringRef ActualLine0 = VBC->getText(0); + if (ActualLine0 != Line0) + return ::testing::AssertionFailure() + << "VerbatimBlockComment has lines[0] \"" << ActualLine0.str() << "\", " + "expected \"" << Line0.str() << "\""; + + StringRef ActualLine1 = VBC->getText(1); + if (ActualLine1 != Line1) + return ::testing::AssertionFailure() + << "VerbatimBlockComment has lines[1] \"" << ActualLine1.str() << "\", " + "expected \"" << Line1.str() << "\""; + + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult HasVerbatimLineAt(const Comment *C, + size_t Idx, + VerbatimLineComment *&VLC, + StringRef Name, + StringRef Text) { + ::testing::AssertionResult AR = GetChildAt(C, Idx, VLC); + if (!AR) + return AR; + + StringRef ActualName = VLC->getCommandName(); + if (ActualName != Name) + return ::testing::AssertionFailure() + << "VerbatimLineComment has name \"" << ActualName.str() << "\", " + "expected \"" << Name.str() << "\""; + + StringRef ActualText = VLC->getText(); + if (ActualText != Text) + return ::testing::AssertionFailure() + << "VerbatimLineComment has text \"" << ActualText.str() << "\", " + "expected \"" << Text.str() << "\""; + + return ::testing::AssertionSuccess(); +} + + +TEST_F(CommentParserTest, Basic1) { + const char *Source = "//"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 0)); +} + +TEST_F(CommentParserTest, Basic2) { + const char *Source = "// Meow"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " Meow")); + } +} + +TEST_F(CommentParserTest, Basic3) { + const char *Source = + "// Aaa\n" + "// Bbb"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(HasTextWithNewlineAt(PC, 0, " Aaa")); + ASSERT_TRUE(HasTextAt(PC, 1, " Bbb")); + } +} + +TEST_F(CommentParserTest, Paragraph1) { + const char *Sources[] = { + "// Aaa\n" + "//\n" + "// Bbb", + + "// Aaa\n" + "//\n" + "//\n" + "// Bbb", + }; + + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " Aaa")); + } + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 1, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " Bbb")); + } + } +} + +TEST_F(CommentParserTest, Paragraph2) { + const char *Source = + "// \\brief Aaa\n" + "//\n" + "// Bbb"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 3)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, 1, BCC, "brief", PC)); + + ASSERT_TRUE(GetChildAt(BCC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " Aaa")); + } + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 2, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " Bbb")); + } +} + +TEST_F(CommentParserTest, Paragraph3) { + const char *Source = "// \\brief \\author"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 3)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, 1, BCC, "brief", PC)); + + ASSERT_TRUE(GetChildAt(BCC, 0, PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, 2, BCC, "author", PC)); + + ASSERT_TRUE(GetChildAt(BCC, 0, PC)); + ASSERT_TRUE(HasChildCount(PC, 0)); + } +} + +TEST_F(CommentParserTest, Paragraph4) { + const char *Source = + "// \\brief Aaa\n" + "// Bbb \\author\n" + "// Ccc"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 3)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, 1, BCC, "brief", PC)); + + ASSERT_TRUE(GetChildAt(BCC, 0, PC)); + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(HasTextWithNewlineAt(PC, 0, " Aaa")); + ASSERT_TRUE(HasTextAt(PC, 1, " Bbb ")); + } + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, 2, BCC, "author", PC)); + + ASSERT_TRUE(GetChildAt(BCC, 0, PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " Ccc")); + } +} + +TEST_F(CommentParserTest, ParamCommand1) { + const char *Source = + "// \\param aaa\n" + "// \\param [in] aaa\n" + "// \\param [out] aaa\n" + "// \\param [in,out] aaa\n" + "// \\param [in, out] aaa\n"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 6)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + ParamCommandComment *PCC; + ParagraphComment *PC; + ASSERT_TRUE(HasParamCommandAt(FC, 1, PCC, "param", + ParamCommandComment::In, + /* IsDirectionExplicit = */ false, + "aaa", PC)); + ASSERT_TRUE(HasChildCount(PCC, 1)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + ParamCommandComment *PCC; + ParagraphComment *PC; + ASSERT_TRUE(HasParamCommandAt(FC, 2, PCC, "param", + ParamCommandComment::In, + /* IsDirectionExplicit = */ true, + "aaa", PC)); + ASSERT_TRUE(HasChildCount(PCC, 1)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + ParamCommandComment *PCC; + ParagraphComment *PC; + ASSERT_TRUE(HasParamCommandAt(FC, 3, PCC, "param", + ParamCommandComment::Out, + /* IsDirectionExplicit = */ true, + "aaa", PC)); + ASSERT_TRUE(HasChildCount(PCC, 1)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + ParamCommandComment *PCC; + ParagraphComment *PC; + ASSERT_TRUE(HasParamCommandAt(FC, 4, PCC, "param", + ParamCommandComment::InOut, + /* IsDirectionExplicit = */ true, + "aaa", PC)); + ASSERT_TRUE(HasChildCount(PCC, 1)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + ParamCommandComment *PCC; + ParagraphComment *PC; + ASSERT_TRUE(HasParamCommandAt(FC, 5, PCC, "param", + ParamCommandComment::InOut, + /* IsDirectionExplicit = */ true, + "aaa", PC)); + ASSERT_TRUE(HasChildCount(PCC, 1)); + ASSERT_TRUE(HasChildCount(PC, 0)); + } +} + +TEST_F(CommentParserTest, InlineCommand1) { + const char *Source = "// \\c"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + InlineCommandComment *ICC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasInlineCommandAt(PC, 1, ICC, "c", NoArgs())); + } +} + +TEST_F(CommentParserTest, InlineCommand2) { + const char *Source = "// \\c "; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + InlineCommandComment *ICC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 3)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasInlineCommandAt(PC, 1, ICC, "c", NoArgs())); + ASSERT_TRUE(HasTextAt(PC, 2, " ")); + } +} + +TEST_F(CommentParserTest, InlineCommand3) { + const char *Source = "// \\c aaa\n"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + InlineCommandComment *ICC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasInlineCommandAt(PC, 1, ICC, "c", "aaa")); + } +} + +TEST_F(CommentParserTest, InlineCommand4) { + const char *Source = "// \\c aaa bbb"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + InlineCommandComment *ICC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 3)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasInlineCommandAt(PC, 1, ICC, "c", "aaa")); + ASSERT_TRUE(HasTextAt(PC, 2, " bbb")); + } +} + +TEST_F(CommentParserTest, InlineCommand5) { + const char *Source = "// \\unknown aaa\n"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + InlineCommandComment *ICC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 3)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasInlineCommandAt(PC, 1, ICC, "unknown", NoArgs())); + ASSERT_TRUE(HasTextAt(PC, 2, " aaa")); + } +} + +TEST_F(CommentParserTest, HTML1) { + const char *Sources[] = { + "// <a", + "// <a>", + "// <a >" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + HTMLOpenTagComment *HOT; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasHTMLOpenTagAt(PC, 1, HOT, "a", NoAttrs())); + } + } +} + +TEST_F(CommentParserTest, HTML2) { + const char *Sources[] = { + "// <a href", + "// <a href ", + "// <a href>", + "// <a href >", + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + HTMLOpenTagComment *HOT; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasHTMLOpenTagAt(PC, 1, HOT, "a", "href", "")); + } + } +} + +TEST_F(CommentParserTest, HTML3) { + const char *Sources[] = { + "// <a href=\"bbb\"", + "// <a href=\"bbb\">", + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + HTMLOpenTagComment *HOT; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasHTMLOpenTagAt(PC, 1, HOT, "a", "href", "bbb")); + } + } +} + +TEST_F(CommentParserTest, HTML4) { + const char *Sources[] = { + "// </a", + "// </a>", + "// </a >" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + HTMLCloseTagComment *HCT; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 2)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasHTMLCloseTagAt(PC, 1, HCT, "a")); + } + } +} + +TEST_F(CommentParserTest, HTML5) { + const char *Source = + "// <pre>\n" + "// Aaa\n" + "// Bbb\n" + "// </pre>\n"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + ParagraphComment *PC; + HTMLOpenTagComment *HOT; + HTMLCloseTagComment *HCT; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 6)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + ASSERT_TRUE(HasHTMLOpenTagAt(PC, 1, HOT, "pre", NoAttrs())); + ASSERT_TRUE(HasTextWithNewlineAt(PC, 2, " Aaa")); + ASSERT_TRUE(HasTextWithNewlineAt(PC, 3, " Bbb")); + ASSERT_TRUE(HasTextAt(PC, 4, " ")); + ASSERT_TRUE(HasHTMLCloseTagAt(PC, 5, HCT, "pre")); + } +} + +TEST_F(CommentParserTest, VerbatimBlock1) { + const char *Source = "// \\verbatim\\endverbatim\n"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 2)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + VerbatimBlockComment *VCC; + ASSERT_TRUE(HasVerbatimBlockAt(FC, 1, VCC, "verbatim", NoLines())); + } +} + +TEST_F(CommentParserTest, VerbatimBlock2) { + const char *Source = "// \\verbatim Aaa \\endverbatim\n"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 2)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + VerbatimBlockComment *VBC; + ASSERT_TRUE(HasVerbatimBlockAt(FC, 1, VBC, "verbatim", " Aaa ")); + } +} + +TEST_F(CommentParserTest, VerbatimBlock3) { + const char *Source = + "//\\verbatim\n" + "//\\endverbatim\n"; + + FullComment *FC = parseString(Source); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + VerbatimBlockComment *VBC; + ASSERT_TRUE(HasVerbatimBlockAt(FC, 0, VBC, "verbatim", NoLines())); + } +} + +TEST_F(CommentParserTest, VerbatimBlock4) { + const char *Sources[] = { + "//\\verbatim\n" + "// Aaa\n" + "//\\endverbatim\n", + + "/*\\verbatim\n" + " * Aaa\n" + " *\\endverbatim*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 1)); + + { + VerbatimBlockComment *VBC; + ASSERT_TRUE(HasVerbatimBlockAt(FC, 0, VBC, "verbatim", " Aaa")); + } + } +} + +TEST_F(CommentParserTest, VerbatimBlock5) { + const char *Sources[] = { + "// \\verbatim\n" + "// Aaa\n" + "// \\endverbatim\n", + + "/* \\verbatim\n" + " * Aaa\n" + " * \\endverbatim*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + VerbatimBlockComment *VBC; + ASSERT_TRUE(HasVerbatimBlockAt(FC, 1, VBC, "verbatim", " Aaa", " ")); + } + } +} + +TEST_F(CommentParserTest, VerbatimBlock6) { + const char *Sources[] = { + "// \\verbatim\n" + "// Aaa\n" + "//\n" + "// Bbb\n" + "// \\endverbatim\n", + + "/* \\verbatim\n" + " * Aaa\n" + " *\n" + " * Bbb\n" + " * \\endverbatim*/" + }; + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + VerbatimBlockComment *VBC; + ASSERT_TRUE(HasVerbatimBlockAt(FC, 1, VBC, "verbatim")); + ASSERT_EQ(4U, VBC->getLineCount()); + ASSERT_EQ(" Aaa", VBC->getText(0)); + ASSERT_EQ("", VBC->getText(1)); + ASSERT_EQ(" Bbb", VBC->getText(2)); + ASSERT_EQ(" ", VBC->getText(3)); + } + } +} + +TEST_F(CommentParserTest, VerbatimLine1) { + const char *Sources[] = { + "// \\fn", + "// \\fn\n" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + VerbatimLineComment *VLC; + ASSERT_TRUE(HasVerbatimLineAt(FC, 1, VLC, "fn", "")); + } + } +} + +TEST_F(CommentParserTest, VerbatimLine2) { + const char *Sources[] = { + "/// \\fn void *foo(const char *zzz = \"\\$\");\n//", + "/** \\fn void *foo(const char *zzz = \"\\$\");*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + { + ParagraphComment *PC; + ASSERT_TRUE(GetChildAt(FC, 0, PC)); + + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(HasTextAt(PC, 0, " ")); + } + { + VerbatimLineComment *VLC; + ASSERT_TRUE(HasVerbatimLineAt(FC, 1, VLC, "fn", + " void *foo(const char *zzz = \"\\$\");")); + } + } +} + +} // unnamed namespace + +} // end namespace comments +} // end namespace clang + diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp index d98cc72cf8..d3408ed20f 100644 --- a/utils/TableGen/TableGen.cpp +++ b/utils/TableGen/TableGen.cpp @@ -38,6 +38,7 @@ enum ActionType { GenClangDiagsDefs, GenClangDiagGroups, GenClangDiagsIndexName, + GenClangCommentNodes, GenClangDeclNodes, GenClangStmtNodes, GenClangSACheckers, @@ -86,6 +87,8 @@ namespace { clEnumValN(GenClangDiagsIndexName, "gen-clang-diags-index-name", "Generate Clang diagnostic name index"), + clEnumValN(GenClangCommentNodes, "gen-clang-comment-nodes", + "Generate Clang AST comment nodes"), clEnumValN(GenClangDeclNodes, "gen-clang-decl-nodes", "Generate Clang AST declaration nodes"), clEnumValN(GenClangStmtNodes, "gen-clang-stmt-nodes", @@ -148,6 +151,9 @@ public: case GenClangDiagsIndexName: EmitClangDiagsIndexName(Records, OS); break; + case GenClangCommentNodes: + EmitClangASTNodes(Records, OS, "Comment", ""); + break; case GenClangDeclNodes: EmitClangASTNodes(Records, OS, "Decl", "Decl"); EmitClangDeclContext(Records, OS); |