diff options
author | Kostya Serebryany <kcc@google.com> | 2019-06-13 21:17:49 +0000 |
---|---|---|
committer | Kostya Serebryany <kcc@google.com> | 2019-06-13 21:17:49 +0000 |
commit | 3346bd6120200e9950db2a4aa3f31cf3d8ac2443 (patch) | |
tree | 823f05f5196f02ac7ea94377cf29c43d85b79e9c /lib/fuzzer | |
parent | 6ac86bdc1602faac865d13039fd000e9e5ef1f58 (diff) | |
download | compiler-rt-3346bd6120200e9950db2a4aa3f31cf3d8ac2443.tar.gz |
[libFuzzer] simplify the DFT trace collection using the new faster DFSan mode that traces up to 16 labels at a time and never runs out of labels.
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@363326 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/fuzzer')
-rw-r--r-- | lib/fuzzer/FuzzerDataFlowTrace.cpp | 72 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerFork.cpp | 19 | ||||
-rw-r--r-- | lib/fuzzer/dataflow/DataFlow.cpp | 120 |
3 files changed, 75 insertions, 136 deletions
diff --git a/lib/fuzzer/FuzzerDataFlowTrace.cpp b/lib/fuzzer/FuzzerDataFlowTrace.cpp index 1fba3913c..311f53a31 100644 --- a/lib/fuzzer/FuzzerDataFlowTrace.cpp +++ b/lib/fuzzer/FuzzerDataFlowTrace.cpp @@ -120,12 +120,6 @@ static Vector<uint8_t> DFTStringToVector(const std::string &DFTString) { return DFT; } -static std::ostream &operator<<(std::ostream &OS, const Vector<uint8_t> &DFT) { - for (auto B : DFT) - OS << (B ? "1" : "0"); - return OS; -} - static bool ParseError(const char *Err, const std::string &Line) { Printf("DataFlowTrace: parse error: %s: Line: %s\n", Err, Line.c_str()); return false; @@ -246,74 +240,24 @@ int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath, const Vector<SizedFile> &CorporaFiles) { Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n", DFTBinary.c_str(), DirPath.c_str(), CorporaFiles.size()); + setenv("DFSAN_OPTIONS", "fast16labels=1:warn_unimplemented=0", 1); MkDir(DirPath); - auto Temp = TempPath(".dft"); for (auto &F : CorporaFiles) { // For every input F we need to collect the data flow and the coverage. // Data flow collection may fail if we request too many DFSan tags at once. // So, we start from requesting all tags in range [0,Size) and if that fails // we then request tags in [0,Size/2) and [Size/2, Size), and so on. // Function number => DFT. + auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File))); std::unordered_map<size_t, Vector<uint8_t>> DFTMap; std::unordered_set<std::string> Cov; - std::queue<std::pair<size_t, size_t>> Q; - Q.push({0, F.Size}); - while (!Q.empty()) { - auto R = Q.front(); - Printf("\n\n\n********* Trying: [%zd, %zd)\n", R.first, R.second); - Q.pop(); - Command Cmd; - Cmd.addArgument(DFTBinary); - Cmd.addArgument(std::to_string(R.first)); - Cmd.addArgument(std::to_string(R.second)); - Cmd.addArgument(F.File); - Cmd.addArgument(Temp); - Printf("CMD: %s\n", Cmd.toString().c_str()); - if (ExecuteCommand(Cmd)) { - // DFSan has failed, collect tags for two subsets. - if (R.second - R.first >= 2) { - size_t Mid = (R.second + R.first) / 2; - Q.push({R.first, Mid}); - Q.push({Mid, R.second}); - } - } else { - Printf("********* Success: [%zd, %zd)\n", R.first, R.second); - std::ifstream IF(Temp); - std::string L; - while (std::getline(IF, L, '\n')) { - // Data flow collection has succeeded. - // Merge the results with the other runs. - if (L.empty()) continue; - if (L[0] == 'C') { - // Take coverage lines as is, they will be the same in all attempts. - Cov.insert(L); - } else if (L[0] == 'F') { - size_t FunctionNum = 0; - std::string DFTString; - if (ParseDFTLine(L, &FunctionNum, &DFTString)) { - auto &DFT = DFTMap[FunctionNum]; - if (DFT.empty()) { - // Haven't seen this function before, take DFT as is. - DFT = DFTStringToVector(DFTString); - } else if (DFT.size() == DFTString.size()) { - // Have seen this function already, merge DFTs. - DFTStringAppendToVector(&DFT, DFTString); - } - } - } - } - } - } - auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File))); - // Dump combined DFT to disk. - Printf("Producing DFT for %s\n", OutPath.c_str()); - std::ofstream OF(OutPath); - for (auto &DFT: DFTMap) - OF << "F" << DFT.first << " " << DFT.second << std::endl; - for (auto &C : Cov) - OF << C << std::endl; + Command Cmd; + Cmd.addArgument(DFTBinary); + Cmd.addArgument(F.File); + Cmd.addArgument(OutPath); + Printf("CMD: %s\n", Cmd.toString().c_str()); + ExecuteCommand(Cmd); } - RemoveFile(Temp); // Write functions.txt if it's currently empty or doesn't exist. auto FunctionsTxtPath = DirPlusFile(DirPath, kFunctionsTxt); if (FileToString(FunctionsTxtPath).empty()) { diff --git a/lib/fuzzer/FuzzerFork.cpp b/lib/fuzzer/FuzzerFork.cpp index 870a22448..5c4855f82 100644 --- a/lib/fuzzer/FuzzerFork.cpp +++ b/lib/fuzzer/FuzzerFork.cpp @@ -89,6 +89,7 @@ struct GlobalEnv { std::string DFTDir; std::string DataFlowBinary; Set<uint32_t> Features, Cov; + Set<std::string> FilesWithDFT; Vector<std::string> Files; Random *Rand; std::chrono::system_clock::time_point ProcessStartTime; @@ -126,10 +127,13 @@ struct GlobalEnv { auto Job = new FuzzJob; std::string Seeds; if (size_t CorpusSubsetSize = - std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) - for (size_t i = 0; i < CorpusSubsetSize; i++) - Seeds += (Seeds.empty() ? "" : ",") + - Files[Rand->SkewTowardsLast(Files.size())]; + std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { + for (size_t i = 0; i < CorpusSubsetSize; i++) { + auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; + Seeds += (Seeds.empty() ? "" : ",") + SF; + CollectDFT(SF); + } + } if (!Seeds.empty()) { Job->SeedListPath = DirPlusFile(TempDir, std::to_string(JobId) + ".seeds"); @@ -196,7 +200,6 @@ struct GlobalEnv { auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); WriteToFile(U, NewPath); Files.push_back(NewPath); - CollectDFT(NewPath); } Features.insert(NewFeatures.begin(), NewFeatures.end()); Cov.insert(NewCov.begin(), NewCov.end()); @@ -217,6 +220,7 @@ struct GlobalEnv { void CollectDFT(const std::string &InputPath) { if (DataFlowBinary.empty()) return; + if (!FilesWithDFT.insert(InputPath).second) return; Command Cmd(Args); Cmd.removeFlag("fork"); Cmd.removeFlag("runs"); @@ -226,7 +230,7 @@ struct GlobalEnv { Cmd.removeArgument(C); Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log")); Cmd.combineOutAndErr(); - // Printf("CollectDFT: %s %s\n", InputPath.c_str(), Cmd.toString().c_str()); + // Printf("CollectDFT: %s\n", Cmd.toString().c_str()); ExecuteCommand(Cmd); } @@ -296,9 +300,6 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, {}, &Env.Cov, CFPath, false); - for (auto &F : Env.Files) - Env.CollectDFT(F); - RemoveFile(CFPath); Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); diff --git a/lib/fuzzer/dataflow/DataFlow.cpp b/lib/fuzzer/dataflow/DataFlow.cpp index 989675e81..8a5d69509 100644 --- a/lib/fuzzer/dataflow/DataFlow.cpp +++ b/lib/fuzzer/dataflow/DataFlow.cpp @@ -35,7 +35,8 @@ // Run: // # Collect data flow and coverage for INPUT_FILE // # write to OUTPUT_FILE (default: stdout) -// ./a.out FIRST_LABEL LAST_LABEL INPUT_FILE [OUTPUT_FILE] +// export DFSAN_OPTIONS=fast16labels=1:warn_unimplemented=0 +// ./a.out INPUT_FILE [OUTPUT_FILE] // // # Print all instrumented functions. llvm-symbolizer must be present in PATH // ./a.out @@ -48,8 +49,6 @@ // C1 8 // =============== // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. -// The byte string is LEN+1 bytes. The last byte is set if the function -// depends on the input length. // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered // in addition to the function's entry block, out of T total instrumented // blocks. @@ -72,22 +71,20 @@ __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); } // extern "C" static size_t InputLen; -static size_t InputLabelBeg; -static size_t InputLabelEnd; -static size_t InputSizeLabel; +static size_t NumIterations; static size_t NumFuncs, NumGuards; static uint32_t *GuardsBeg, *GuardsEnd; static const uintptr_t *PCsBeg, *PCsEnd; -static __thread size_t CurrentFunc; -static dfsan_label *FuncLabels; // Array of NumFuncs elements. +static __thread size_t CurrentFunc, CurrentIteration; +static dfsan_label **FuncLabels; // NumFuncs x NumIterations. static bool *BBExecuted; // Array of NumGuards elements. -static char *PrintableStringForLabel; // InputLen + 2 bytes. -static bool LabelSeen[1 << 8 * sizeof(dfsan_label)]; enum { PCFLAG_FUNC_ENTRY = 1, }; +const int kNumLabels = 16; + static inline bool BlockIsEntry(size_t BlockIdx) { return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY; } @@ -112,35 +109,32 @@ static int PrintFunctions() { return 0; } -extern "C" -void SetBytesForLabel(dfsan_label L, char *Bytes) { - if (LabelSeen[L]) - return; - LabelSeen[L] = true; - assert(L); - if (L < InputSizeLabel) { - Bytes[L + InputLabelBeg - 1] = '1'; - } else if (L == InputSizeLabel) { - Bytes[InputLen] = '1'; - } else { - auto *DLI = dfsan_get_label_info(L); - SetBytesForLabel(DLI->l1, Bytes); - SetBytesForLabel(DLI->l2, Bytes); - } -} - -static char *GetPrintableStringForLabel(dfsan_label L) { - memset(PrintableStringForLabel, '0', InputLen + 1); - PrintableStringForLabel[InputLen + 1] = 0; - memset(LabelSeen, 0, sizeof(LabelSeen)); - SetBytesForLabel(L, PrintableStringForLabel); - return PrintableStringForLabel; +static void PrintBinary(FILE *Out, dfsan_label L, size_t Len) { + char buf[kNumLabels + 1]; + assert(Len <= kNumLabels); + for (int i = 0; i < kNumLabels; i++) + buf[i] = (L & (1 << i)) ? '1' : '0'; + buf[Len] = 0; + fprintf(Out, "%s", buf); } static void PrintDataFlow(FILE *Out) { - for (size_t I = 0; I < NumFuncs; I++) - if (FuncLabels[I]) - fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I])); + for (size_t Func = 0; Func < NumFuncs; Func++) { + bool HasAny = false; + for (size_t Iter = 0; Iter < NumIterations; Iter++) + if (FuncLabels[Func][Iter]) + HasAny = true; + if (!HasAny) + continue; + fprintf(Out, "F%zd ", Func); + size_t LenOfLastIteration = kNumLabels; + if (auto Tail = InputLen % kNumLabels) + LenOfLastIteration = Tail; + for (size_t Iter = 0; Iter < NumIterations; Iter++) + PrintBinary(Out, FuncLabels[Func][Iter], + Iter == NumIterations - 1 ? LenOfLastIteration : kNumLabels); + fprintf(Out, "\n"); + } } static void PrintCoverage(FILE *Out) { @@ -169,12 +163,9 @@ int main(int argc, char **argv) { LLVMFuzzerInitialize(&argc, &argv); if (argc == 1) return PrintFunctions(); - assert(argc == 4 || argc == 5); - InputLabelBeg = atoi(argv[1]); - InputLabelEnd = atoi(argv[2]); - assert(InputLabelBeg < InputLabelEnd); + assert(argc == 2 || argc == 3); - const char *Input = argv[3]; + const char *Input = argv[1]; fprintf(stderr, "INFO: reading '%s'\n", Input); FILE *In = fopen(Input, "r"); assert(In); @@ -184,30 +175,35 @@ int main(int argc, char **argv) { unsigned char *Buf = (unsigned char*)malloc(InputLen); size_t NumBytesRead = fread(Buf, 1, InputLen, In); assert(NumBytesRead == InputLen); - PrintableStringForLabel = (char*)malloc(InputLen + 2); fclose(In); - fprintf(stderr, "INFO: running '%s'\n", Input); - for (size_t I = 1; I <= InputLen; I++) { - size_t Idx = I - 1; - if (Idx >= InputLabelBeg && Idx < InputLabelEnd) { - dfsan_label L = dfsan_create_label("", nullptr); - assert(L == I - InputLabelBeg); - dfsan_set_label(L, Buf + Idx, 1); - } - } - dfsan_label SizeL = dfsan_create_label("", nullptr); - InputSizeLabel = SizeL; - assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1); - dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); + NumIterations = (NumBytesRead + kNumLabels - 1) / kNumLabels; + FuncLabels = (dfsan_label**)calloc(NumFuncs, sizeof(dfsan_label*)); + for (size_t Func = 0; Func < NumFuncs; Func++) + FuncLabels[Func] = + (dfsan_label *)calloc(NumIterations, sizeof(dfsan_label)); - LLVMFuzzerTestOneInput(Buf, InputLen); + for (CurrentIteration = 0; CurrentIteration < NumIterations; + CurrentIteration++) { + fprintf(stderr, "INFO: running '%s' %zd/%zd\n", Input, CurrentIteration, + NumIterations); + dfsan_flush(); + dfsan_set_label(0, Buf, InputLen); + + size_t BaseIdx = CurrentIteration * kNumLabels; + size_t LastIdx = BaseIdx + kNumLabels < NumBytesRead ? BaseIdx + kNumLabels + : NumBytesRead; + assert(BaseIdx < LastIdx); + for (size_t Idx = BaseIdx; Idx < LastIdx; Idx++) + dfsan_set_label(1 << (Idx - BaseIdx), Buf + Idx, 1); + LLVMFuzzerTestOneInput(Buf, InputLen); + } free(Buf); - bool OutIsStdout = argc == 4; + bool OutIsStdout = argc == 2; fprintf(stderr, "INFO: writing dataflow to %s\n", - OutIsStdout ? "<stdout>" : argv[4]); - FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w"); + OutIsStdout ? "<stdout>" : argv[2]); + FILE *Out = OutIsStdout ? stdout : fopen(argv[2], "w"); PrintDataFlow(Out); PrintCoverage(Out); if (!OutIsStdout) fclose(Out); @@ -237,7 +233,6 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, GuardsBeg[i] = NumFuncs; } } - FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label)); BBExecuted = (bool*)calloc(NumGuards, sizeof(bool)); fprintf(stderr, "INFO: %zd instrumented function(s) observed " "and %zd basic blocks\n", NumFuncs, NumGuards); @@ -258,14 +253,13 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, dfsan_label L1, dfsan_label UnusedL) { assert(CurrentFunc < NumFuncs); - FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1); + FuncLabels[CurrentFunc][CurrentIteration] |= L1; } #define HOOK(Name, Type) \ void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \ assert(CurrentFunc < NumFuncs); \ - FuncLabels[CurrentFunc] = \ - dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \ + FuncLabels[CurrentFunc][CurrentIteration] |= L1 | L2; \ } HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t) |