summaryrefslogtreecommitdiff
path: root/lib/AST/RawCommentList.cpp
diff options
context:
space:
mode:
authorIlya Biryukov <ibiryukov@google.com>2018-05-16 12:30:09 +0000
committerIlya Biryukov <ibiryukov@google.com>2018-05-16 12:30:09 +0000
commitbfc7cfab82c332ec0070bf8cfb2c24359bef640d (patch)
tree00280f2b6232b877cd3ac843373a21f4080c2ef9 /lib/AST/RawCommentList.cpp
parent983ac4e991973db70b0e0ef336baa525f293ed99 (diff)
downloadclang-bfc7cfab82c332ec0070bf8cfb2c24359bef640d.tar.gz
[AST] Added a helper to extract a user-friendly text of a comment.
Summary: The helper is used in clangd for documentation shown in code completion and storing the docs in the symbols. See D45999. This patch reuses the code of the Doxygen comment lexer, disabling the bits that do command and html tag parsing. The new helper works on all comments, including non-doxygen comments. However, it does not understand or transform any doxygen directives, i.e. cannot extract brief text, etc. Reviewers: sammccall, hokein, ioeric Reviewed By: ioeric Subscribers: mgorny, cfe-commits Differential Revision: https://reviews.llvm.org/D46000 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@332458 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/AST/RawCommentList.cpp')
-rw-r--r--lib/AST/RawCommentList.cpp91
1 files changed, 91 insertions, 0 deletions
diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp
index d6a640b7dc..95da9ed6d2 100644
--- a/lib/AST/RawCommentList.cpp
+++ b/lib/AST/RawCommentList.cpp
@@ -335,3 +335,94 @@ void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> Deserialized
BeforeThanCompare<RawComment>(SourceMgr));
std::swap(Comments, MergedComments);
}
+
+std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
+ DiagnosticsEngine &Diags) const {
+ llvm::StringRef CommentText = getRawText(SourceMgr);
+ if (CommentText.empty())
+ return "";
+
+ llvm::BumpPtrAllocator Allocator;
+ // We do not parse any commands, so CommentOptions are ignored by
+ // comments::Lexer. Therefore, we just use default-constructed options.
+ CommentOptions DefOpts;
+ comments::CommandTraits EmptyTraits(Allocator, DefOpts);
+ comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
+ CommentText.begin(), CommentText.end(),
+ /*ParseCommands=*/false);
+
+ std::string Result;
+ // A column number of the first non-whitespace token in the comment text.
+ // We skip whitespace up to this column, but keep the whitespace after this
+ // column. IndentColumn is calculated when lexing the first line and reused
+ // for the rest of lines.
+ unsigned IndentColumn = 0;
+
+ // Processes one line of the comment and adds it to the result.
+ // Handles skipping the indent at the start of the line.
+ // Returns false when eof is reached and true otherwise.
+ auto LexLine = [&](bool IsFirstLine) -> bool {
+ comments::Token Tok;
+ // Lex the first token on the line. We handle it separately, because we to
+ // fix up its indentation.
+ L.lex(Tok);
+ if (Tok.is(comments::tok::eof))
+ return false;
+ if (Tok.is(comments::tok::newline)) {
+ Result += "\n";
+ return true;
+ }
+ llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
+ bool LocInvalid = false;
+ unsigned TokColumn =
+ SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
+ assert(!LocInvalid && "getFormattedText for invalid location");
+
+ // Amount of leading whitespace in TokText.
+ size_t WhitespaceLen = TokText.find_first_not_of(" \t");
+ if (WhitespaceLen == StringRef::npos)
+ WhitespaceLen = TokText.size();
+ // Remember the amount of whitespace we skipped in the first line to remove
+ // indent up to that column in the following lines.
+ if (IsFirstLine)
+ IndentColumn = TokColumn + WhitespaceLen;
+
+ // Amount of leading whitespace we actually want to skip.
+ // For the first line we skip all the whitespace.
+ // For the rest of the lines, we skip whitespace up to IndentColumn.
+ unsigned SkipLen =
+ IsFirstLine
+ ? WhitespaceLen
+ : std::min<size_t>(
+ WhitespaceLen,
+ std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
+ llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
+ Result += Trimmed;
+ // Lex all tokens in the rest of the line.
+ for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
+ if (Tok.is(comments::tok::newline)) {
+ Result += "\n";
+ return true;
+ }
+ Result += L.getSpelling(Tok, SourceMgr);
+ }
+ // We've reached the end of file token.
+ return false;
+ };
+
+ auto DropTrailingNewLines = [](std::string &Str) {
+ while (Str.back() == '\n')
+ Str.pop_back();
+ };
+
+ // Proces first line separately to remember indent for the following lines.
+ if (!LexLine(/*IsFirstLine=*/true)) {
+ DropTrailingNewLines(Result);
+ return Result;
+ }
+ // Process the rest of the lines.
+ while (LexLine(/*IsFirstLine=*/false))
+ ;
+ DropTrailingNewLines(Result);
+ return Result;
+}