[AST] Added a helper to extract a user-friendly text of a comment.

Summary: The helper is used in clangd for documentation shown in code completion and storing the docs in the symbols. See D45999. This patch reuses the code of the Doxygen comment lexer, disabling the bits that do command and html tag parsing. The new helper works on all comments, including non-doxygen comments. However, it does not understand or transform any doxygen directives, i.e. cannot extract brief text, etc. Reviewers: sammccall, hokein, ioeric Reviewed By: ioeric Subscribers: mgorny, cfe-commits Differential Revision: https://reviews.llvm.org/D46000 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@332458 91177308-0d34-0410-b5e6-96231b3b80d8
author: Ilya Biryukov <ibiryukov@google.com> 2018-05-16 12:30:09 +0000
committer: Ilya Biryukov <ibiryukov@google.com> 2018-05-16 12:30:09 +0000
commit: bfc7cfab82c332ec0070bf8cfb2c24359bef640d (patch)
tree: 00280f2b6232b877cd3ac843373a21f4080c2ef9 /lib/AST/RawCommentList.cpp
parent: 983ac4e991973db70b0e0ef336baa525f293ed99 (diff)
download: clang-bfc7cfab82c332ec0070bf8cfb2c24359bef640d.tar.gz
1 files changed, 91 insertions, 0 deletions
diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp
index d6a640b7dc..95da9ed6d2 100644
--- a/lib/AST/RawCommentList.cpp
+++ b/lib/AST/RawCommentList.cpp
@@ -335,3 +335,94 @@ void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> Deserialized
              BeforeThanCompare<RawComment>(SourceMgr));
   std::swap(Comments, MergedComments);
 }
+
+std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
+                                         DiagnosticsEngine &Diags) const {
+  llvm::StringRef CommentText = getRawText(SourceMgr);
+  if (CommentText.empty())
+    return "";
+
+  llvm::BumpPtrAllocator Allocator;
+  // We do not parse any commands, so CommentOptions are ignored by
+  // comments::Lexer. Therefore, we just use default-constructed options.
+  CommentOptions DefOpts;
+  comments::CommandTraits EmptyTraits(Allocator, DefOpts);
+  comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
+                    CommentText.begin(), CommentText.end(),
+                    /*ParseCommands=*/false);
+
+  std::string Result;
+  // A column number of the first non-whitespace token in the comment text.
+  // We skip whitespace up to this column, but keep the whitespace after this
+  // column. IndentColumn is calculated when lexing the first line and reused
+  // for the rest of lines.
+  unsigned IndentColumn = 0;
+
+  // Processes one line of the comment and adds it to the result.
+  // Handles skipping the indent at the start of the line.
+  // Returns false when eof is reached and true otherwise.
+  auto LexLine = [&](bool IsFirstLine) -> bool {
+    comments::Token Tok;
+    // Lex the first token on the line. We handle it separately, because we to
+    // fix up its indentation.
+    L.lex(Tok);
+    if (Tok.is(comments::tok::eof))
+      return false;
+    if (Tok.is(comments::tok::newline)) {
+      Result += "\n";
+      return true;
+    }
+    llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
+    bool LocInvalid = false;
+    unsigned TokColumn =
+        SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
+    assert(!LocInvalid && "getFormattedText for invalid location");
+
+    // Amount of leading whitespace in TokText.
+    size_t WhitespaceLen = TokText.find_first_not_of(" \t");
+    if (WhitespaceLen == StringRef::npos)
+      WhitespaceLen = TokText.size();
+    // Remember the amount of whitespace we skipped in the first line to remove
+    // indent up to that column in the following lines.
+    if (IsFirstLine)
+      IndentColumn = TokColumn + WhitespaceLen;
+
+    // Amount of leading whitespace we actually want to skip.
+    // For the first line we skip all the whitespace.
+    // For the rest of the lines, we skip whitespace up to IndentColumn.
+    unsigned SkipLen =
+        IsFirstLine
+            ? WhitespaceLen
+            : std::min<size_t>(
+                  WhitespaceLen,
+                  std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
+    llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
+    Result += Trimmed;
+    // Lex all tokens in the rest of the line.
+    for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
+      if (Tok.is(comments::tok::newline)) {
+        Result += "\n";
+        return true;
+      }
+      Result += L.getSpelling(Tok, SourceMgr);
+    }
+    // We've reached the end of file token.
+    return false;
+  };
+
+  auto DropTrailingNewLines = [](std::string &Str) {
+    while (Str.back() == '\n')
+      Str.pop_back();
+  };
+
+  // Proces first line separately to remember indent for the following lines.
+  if (!LexLine(/*IsFirstLine=*/true)) {
+    DropTrailingNewLines(Result);
+    return Result;
+  }
+  // Process the rest of the lines.
+  while (LexLine(/*IsFirstLine=*/false))
+    ;
+  DropTrailingNewLines(Result);
+  return Result;
+}
author	Ilya Biryukov <ibiryukov@google.com>	2018-05-16 12:30:09 +0000
committer	Ilya Biryukov <ibiryukov@google.com>	2018-05-16 12:30:09 +0000
commit	bfc7cfab82c332ec0070bf8cfb2c24359bef640d (patch)
tree	00280f2b6232b877cd3ac843373a21f4080c2ef9 /lib/AST/RawCommentList.cpp
parent	983ac4e991973db70b0e0ef336baa525f293ed99 (diff)
download	clang-bfc7cfab82c332ec0070bf8cfb2c24359bef640d.tar.gz