//===-- clang-format/ClangFormat.cpp - Clang format tool ------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file implements a clang-format tool that automatically formats /// (fragments of) C++ code. /// //===----------------------------------------------------------------------===// #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Version.h" #include "clang/Format/Format.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Rewrite/Core/Rewriter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/Process.h" using namespace llvm; using clang::tooling::Replacements; static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); // Mark all our options with this category, everything else (except for -version // and -help) will be hidden. static cl::OptionCategory ClangFormatCategory("Clang-format options"); static cl::list Offsets("offset", cl::desc("Format a range starting at this byte offset.\n" "Multiple ranges can be formatted by specifying\n" "several -offset and -length pairs.\n" "Can only be used with one input file."), cl::cat(ClangFormatCategory)); static cl::list Lengths("length", cl::desc("Format a range of this length (in bytes).\n" "Multiple ranges can be formatted by specifying\n" "several -offset and -length pairs.\n" "When only a single -offset is specified without\n" "-length, clang-format will format up to the end\n" "of the file.\n" "Can only be used with one input file."), cl::cat(ClangFormatCategory)); static cl::list LineRanges("lines", cl::desc(": - format a range of\n" "lines (both 1-based).\n" "Multiple ranges can be formatted by specifying\n" "several -lines arguments.\n" "Can't be used with -offset and -length.\n" "Can only be used with one input file."), cl::cat(ClangFormatCategory)); static cl::opt Style("style", cl::desc(clang::format::StyleOptionHelpDescription), cl::init(clang::format::DefaultFormatStyle), cl::cat(ClangFormatCategory)); static cl::opt FallbackStyle("fallback-style", cl::desc("The name of the predefined style used as a\n" "fallback in case clang-format is invoked with\n" "-style=file, but can not find the .clang-format\n" "file to use.\n" "Use -fallback-style=none to skip formatting."), cl::init(clang::format::DefaultFallbackStyle), cl::cat(ClangFormatCategory)); static cl::opt AssumeFileName( "assume-filename", cl::desc("When reading from stdin, clang-format assumes this\n" "filename to look for a style config file (with\n" "-style=file) and to determine the language."), cl::init(""), cl::cat(ClangFormatCategory)); static cl::opt Inplace("i", cl::desc("Inplace edit s, if specified."), cl::cat(ClangFormatCategory)); static cl::opt OutputXML("output-replacements-xml", cl::desc("Output replacements as XML."), cl::cat(ClangFormatCategory)); static cl::opt DumpConfig("dump-config", cl::desc("Dump configuration options to stdout and exit.\n" "Can be used with -style option."), cl::cat(ClangFormatCategory)); static cl::opt Cursor("cursor", cl::desc("The position of the cursor when invoking\n" "clang-format from an editor integration"), cl::init(0), cl::cat(ClangFormatCategory)); static cl::opt SortIncludes( "sort-includes", cl::desc("If set, overrides the include sorting behavior determined by the " "SortIncludes style flag"), cl::cat(ClangFormatCategory)); static cl::opt Verbose("verbose", cl::desc("If set, shows the list of processed files"), cl::cat(ClangFormatCategory)); // Use --dry-run to match other LLVM tools when you mean do it but don't // actually do it static cl::opt DryRun("dry-run", cl::desc("If set, do not actually make the formatting changes"), cl::cat(ClangFormatCategory)); // Use -n as a common command as an alias for --dry-run. (git and make use -n) static cl::alias DryRunShort("n", cl::desc("Alias for --dry-run"), cl::cat(ClangFormatCategory), cl::aliasopt(DryRun), cl::NotHidden); // Emulate being able to turn on/off the warning. static cl::opt WarnFormat("Wclang-format-violations", cl::desc("Warnings about individual formatting changes needed. " "Used only with --dry-run or -n"), cl::init(true), cl::cat(ClangFormatCategory), cl::Hidden); static cl::opt NoWarnFormat("Wno-clang-format-violations", cl::desc("Do not warn about individual formatting changes " "needed. Used only with --dry-run or -n"), cl::init(false), cl::cat(ClangFormatCategory), cl::Hidden); static cl::opt ErrorLimit( "ferror-limit", cl::desc("Set the maximum number of clang-format errors to emit before " "stopping (0 = no limit). Used only with --dry-run or -n"), cl::init(0), cl::cat(ClangFormatCategory)); static cl::opt WarningsAsErrors("Werror", cl::desc("If set, changes formatting warnings to errors"), cl::cat(ClangFormatCategory)); static cl::opt ShowColors("fcolor-diagnostics", cl::desc("If set, and on a color-capable terminal controls " "whether or not to print diagnostics in color"), cl::init(true), cl::cat(ClangFormatCategory), cl::Hidden); static cl::opt NoShowColors("fno-color-diagnostics", cl::desc("If set, and on a color-capable terminal controls " "whether or not to print diagnostics in color"), cl::init(false), cl::cat(ClangFormatCategory), cl::Hidden); static cl::list FileNames(cl::Positional, cl::desc("[ ...]"), cl::cat(ClangFormatCategory)); namespace clang { namespace format { static FileID createInMemoryFile(StringRef FileName, MemoryBuffer *Source, SourceManager &Sources, FileManager &Files, llvm::vfs::InMemoryFileSystem *MemFS) { MemFS->addFileNoOwn(FileName, 0, Source); auto File = Files.getFile(FileName); return Sources.createFileID(File ? *File : nullptr, SourceLocation(), SrcMgr::C_User); } // Parses : input to a pair of line numbers. // Returns true on error. static bool parseLineRange(StringRef Input, unsigned &FromLine, unsigned &ToLine) { std::pair LineRange = Input.split(':'); return LineRange.first.getAsInteger(0, FromLine) || LineRange.second.getAsInteger(0, ToLine); } static bool fillRanges(MemoryBuffer *Code, std::vector &Ranges) { IntrusiveRefCntPtr InMemoryFileSystem( new llvm::vfs::InMemoryFileSystem); FileManager Files(FileSystemOptions(), InMemoryFileSystem); DiagnosticsEngine Diagnostics( IntrusiveRefCntPtr(new DiagnosticIDs), new DiagnosticOptions); SourceManager Sources(Diagnostics, Files); FileID ID = createInMemoryFile("", Code, Sources, Files, InMemoryFileSystem.get()); if (!LineRanges.empty()) { if (!Offsets.empty() || !Lengths.empty()) { errs() << "error: cannot use -lines with -offset/-length\n"; return true; } for (unsigned i = 0, e = LineRanges.size(); i < e; ++i) { unsigned FromLine, ToLine; if (parseLineRange(LineRanges[i], FromLine, ToLine)) { errs() << "error: invalid : pair\n"; return true; } if (FromLine > ToLine) { errs() << "error: start line should be less than end line\n"; return true; } SourceLocation Start = Sources.translateLineCol(ID, FromLine, 1); SourceLocation End = Sources.translateLineCol(ID, ToLine, UINT_MAX); if (Start.isInvalid() || End.isInvalid()) return true; unsigned Offset = Sources.getFileOffset(Start); unsigned Length = Sources.getFileOffset(End) - Offset; Ranges.push_back(tooling::Range(Offset, Length)); } return false; } if (Offsets.empty()) Offsets.push_back(0); if (Offsets.size() != Lengths.size() && !(Offsets.size() == 1 && Lengths.empty())) { errs() << "error: number of -offset and -length arguments must match.\n"; return true; } for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { if (Offsets[i] >= Code->getBufferSize()) { errs() << "error: offset " << Offsets[i] << " is outside the file\n"; return true; } SourceLocation Start = Sources.getLocForStartOfFile(ID).getLocWithOffset(Offsets[i]); SourceLocation End; if (i < Lengths.size()) { if (Offsets[i] + Lengths[i] > Code->getBufferSize()) { errs() << "error: invalid length " << Lengths[i] << ", offset + length (" << Offsets[i] + Lengths[i] << ") is outside the file.\n"; return true; } End = Start.getLocWithOffset(Lengths[i]); } else { End = Sources.getLocForEndOfFile(ID); } unsigned Offset = Sources.getFileOffset(Start); unsigned Length = Sources.getFileOffset(End) - Offset; Ranges.push_back(tooling::Range(Offset, Length)); } return false; } static void outputReplacementXML(StringRef Text) { // FIXME: When we sort includes, we need to make sure the stream is correct // utf-8. size_t From = 0; size_t Index; while ((Index = Text.find_first_of("\n\r<&", From)) != StringRef::npos) { outs() << Text.substr(From, Index - From); switch (Text[Index]) { case '\n': outs() << " "; break; case '\r': outs() << " "; break; case '<': outs() << "<"; break; case '&': outs() << "&"; break; default: llvm_unreachable("Unexpected character encountered!"); } From = Index + 1; } outs() << Text.substr(From); } static void outputReplacementsXML(const Replacements &Replaces) { for (const auto &R : Replaces) { outs() << ""; outputReplacementXML(R.getReplacementText()); outs() << "\n"; } } // If BufStr has an invalid BOM, returns the BOM name; otherwise, returns // nullptr. static const char *getInValidBOM(StringRef BufStr) { // Check to see if the buffer has a UTF Byte Order Mark (BOM). // We only support UTF-8 with and without a BOM right now. See // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding // for more information. const char *InvalidBOM = llvm::StringSwitch(BufStr) .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), "UTF-32 (BE)") .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), "UTF-32 (LE)") .StartsWith("\xFE\xFF", "UTF-16 (BE)") .StartsWith("\xFF\xFE", "UTF-16 (LE)") .StartsWith("\x2B\x2F\x76", "UTF-7") .StartsWith("\xF7\x64\x4C", "UTF-1") .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") .StartsWith("\x0E\xFE\xFF", "SCSU") .StartsWith("\xFB\xEE\x28", "BOCU-1") .StartsWith("\x84\x31\x95\x33", "GB-18030") .Default(nullptr); return InvalidBOM; } static bool emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName, const std::unique_ptr &Code) { if (Replaces.empty()) { return false; } IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); DiagOpts->ShowColors = (ShowColors && !NoShowColors); TextDiagnosticPrinter *DiagsBuffer = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts, false); IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); IntrusiveRefCntPtr Diags( new DiagnosticsEngine(DiagID, &*DiagOpts, DiagsBuffer)); IntrusiveRefCntPtr InMemoryFileSystem( new llvm::vfs::InMemoryFileSystem); FileManager Files(FileSystemOptions(), InMemoryFileSystem); SourceManager Sources(*Diags, Files); FileID FileID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files, InMemoryFileSystem.get()); const unsigned ID = Diags->getCustomDiagID( WarningsAsErrors ? clang::DiagnosticsEngine::Error : clang::DiagnosticsEngine::Warning, "code should be clang-formatted [-Wclang-format-violations]"); unsigned Errors = 0; DiagsBuffer->BeginSourceFile(LangOptions(), nullptr); if (WarnFormat && !NoWarnFormat) { for (const auto &R : Replaces) { Diags->Report( Sources.getLocForStartOfFile(FileID).getLocWithOffset(R.getOffset()), ID); Errors++; if (ErrorLimit && Errors >= ErrorLimit) break; } } DiagsBuffer->EndSourceFile(); return WarningsAsErrors; } static void outputXML(const Replacements &Replaces, const Replacements &FormatChanges, const FormattingAttemptStatus &Status, const cl::opt &Cursor, unsigned CursorPosition) { outs() << "\n\n"; if (Cursor.getNumOccurrences() != 0) outs() << "" << FormatChanges.getShiftedCodePosition(CursorPosition) << "\n"; outputReplacementsXML(Replaces); outs() << "\n"; } // Returns true on error. static bool format(StringRef FileName) { if (!OutputXML && Inplace && FileName == "-") { errs() << "error: cannot use -i when reading from stdin.\n"; return false; } // On Windows, overwriting a file with an open file mapping doesn't work, // so read the whole file into memory when formatting in-place. ErrorOr> CodeOrErr = !OutputXML && Inplace ? MemoryBuffer::getFileAsStream(FileName) : MemoryBuffer::getFileOrSTDIN(FileName); if (std::error_code EC = CodeOrErr.getError()) { errs() << EC.message() << "\n"; return true; } std::unique_ptr Code = std::move(CodeOrErr.get()); if (Code->getBufferSize() == 0) return false; // Empty files are formatted correctly. StringRef BufStr = Code->getBuffer(); const char *InvalidBOM = getInValidBOM(BufStr); if (InvalidBOM) { errs() << "error: encoding with unsupported byte order mark \"" << InvalidBOM << "\" detected"; if (FileName != "-") errs() << " in file '" << FileName << "'"; errs() << ".\n"; return true; } std::vector Ranges; if (fillRanges(Code.get(), Ranges)) return true; StringRef AssumedFileName = (FileName == "-") ? AssumeFileName : FileName; llvm::Expected FormatStyle = getStyle(Style, AssumedFileName, FallbackStyle, Code->getBuffer()); if (!FormatStyle) { llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n"; return true; } if (SortIncludes.getNumOccurrences() != 0) FormatStyle->SortIncludes = SortIncludes; unsigned CursorPosition = Cursor; Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges, AssumedFileName, &CursorPosition); auto ChangedCode = tooling::applyAllReplacements(Code->getBuffer(), Replaces); if (!ChangedCode) { llvm::errs() << llvm::toString(ChangedCode.takeError()) << "\n"; return true; } // Get new affected ranges after sorting `#includes`. Ranges = tooling::calculateRangesAfterReplacements(Replaces, Ranges); FormattingAttemptStatus Status; Replacements FormatChanges = reformat(*FormatStyle, *ChangedCode, Ranges, AssumedFileName, &Status); Replaces = Replaces.merge(FormatChanges); if (OutputXML || DryRun) { if (DryRun) { return emitReplacementWarnings(Replaces, AssumedFileName, Code); } else { outputXML(Replaces, FormatChanges, Status, Cursor, CursorPosition); } } else { IntrusiveRefCntPtr InMemoryFileSystem( new llvm::vfs::InMemoryFileSystem); FileManager Files(FileSystemOptions(), InMemoryFileSystem); DiagnosticsEngine Diagnostics( IntrusiveRefCntPtr(new DiagnosticIDs), new DiagnosticOptions); SourceManager Sources(Diagnostics, Files); FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files, InMemoryFileSystem.get()); Rewriter Rewrite(Sources, LangOptions()); tooling::applyAllReplacements(Replaces, Rewrite); if (Inplace) { if (Rewrite.overwriteChangedFiles()) return true; } else { if (Cursor.getNumOccurrences() != 0) { outs() << "{ \"Cursor\": " << FormatChanges.getShiftedCodePosition(CursorPosition) << ", \"IncompleteFormat\": " << (Status.FormatComplete ? "false" : "true"); if (!Status.FormatComplete) outs() << ", \"Line\": " << Status.Line; outs() << " }\n"; } Rewrite.getEditBuffer(ID).write(outs()); } } return false; } } // namespace format } // namespace clang static void PrintVersion(raw_ostream &OS) { OS << clang::getClangToolFullVersion("clang-format") << '\n'; } // Dump the configuration. static int dumpConfig() { StringRef FileName; std::unique_ptr Code; if (FileNames.empty()) { // We can't read the code to detect the language if there's no // file name, so leave Code empty here. FileName = AssumeFileName; } else { // Read in the code in case the filename alone isn't enough to // detect the language. ErrorOr> CodeOrErr = MemoryBuffer::getFileOrSTDIN(FileNames[0]); if (std::error_code EC = CodeOrErr.getError()) { llvm::errs() << EC.message() << "\n"; return 1; } FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0]; Code = std::move(CodeOrErr.get()); } llvm::Expected FormatStyle = clang::format::getStyle(Style, FileName, FallbackStyle, Code ? Code->getBuffer() : ""); if (!FormatStyle) { llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n"; return 1; } std::string Config = clang::format::configurationAsText(*FormatStyle); outs() << Config << "\n"; return 0; } int main(int argc, const char **argv) { llvm::InitLLVM X(argc, argv); cl::HideUnrelatedOptions(ClangFormatCategory); cl::SetVersionPrinter(PrintVersion); cl::ParseCommandLineOptions( argc, argv, "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n" "If no arguments are specified, it formats the code from standard input\n" "and writes the result to the standard output.\n" "If s are given, it reformats the files. If -i is specified\n" "together with s, the files are edited in-place. Otherwise, the\n" "result is written to the standard output.\n"); if (Help) { cl::PrintHelpMessage(); return 0; } if (DumpConfig) { return dumpConfig(); } bool Error = false; if (FileNames.empty()) { Error = clang::format::format("-"); return Error ? 1 : 0; } if (FileNames.size() != 1 && (!Offsets.empty() || !Lengths.empty() || !LineRanges.empty())) { errs() << "error: -offset, -length and -lines can only be used for " "single file.\n"; return 1; } for (const auto &FileName : FileNames) { if (Verbose) errs() << "Formatting " << FileName << "\n"; Error |= clang::format::format(FileName); } return Error ? 1 : 0; }