[libFuzzer] Make -merge=1 to reuse coverage information from the control file.

Summary: This change allows to perform corpus merging in two steps. This is useful when the user wants to address the following two points simultaneously: 1) Get trustworthy incremental stats for the coverage and corpus size changes when adding new corpus units. 2) Make sure the shorter units will be preferred when two or more units give the same unique signal (equivalent to the `REDUCE` logic). This solution was brainstormed together with @kcc, hopefully it looks good to the other people too. The proposed use case scenario: 1) We have a `fuzz_target` binary and `existing_corpus` directory. 2) We do fuzzing and write new units into the `new_corpus` directory. 3) We want to merge the new corpus into the existing corpus and satisfy the points mentioned above. 4) We create an empty directory `merged_corpus` and run the first merge step: ` ./fuzz_target -merge=1 -merge_control_file=MCF ./merged_corpus ./existing_corpus ` this provides the initial stats for `existing_corpus`, e.g. from the output: ` MERGE-OUTER: 3 new files with 11 new features added; 11 new coverage edges ` 5) We recreate `merged_corpus` directory and run the second merge step: ` ./fuzz_target -merge=1 -merge_control_file=MCF ./merged_corpus ./existing_corpus ./new_corpus ` this provides the final stats for the merged corpus, e.g. from the output: ` MERGE-OUTER: 6 new files with 14 new features added; 14 new coverage edges ` Alternative solutions to this approach are: A) Store precise coverage information for every unit (not only unique signal). B) Execute the same two steps without reusing the control file. Either of these would be suboptimal as it would impose an extra disk or CPU load respectively, which is bad given the quadratic complexity in the worst case. Tested on Linux, Mac, Windows. Reviewers: morehouse, metzman, hctim, kcc Reviewed By: morehouse Subscribers: JDevlieghere, delcypher, mgrang, #sanitizers, llvm-commits, kcc Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D66107 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@371620 91177308-0d34-0410-b5e6-96231b3b80d8
author: Max Moroz <mmoroz@chromium.org> 2019-09-11 14:11:08 +0000
committer: Max Moroz <mmoroz@chromium.org> 2019-09-11 14:11:08 +0000
commit: 460c5df7e72dc468923186dde9f8c3e45d0f54b2 (patch)
tree: 84b847a87bc8593a58572235d0ea6cb8c5e9baff
parent: 95fd2921ca612ac342dcb486c910cdaa4598bb20 (diff)
download: compiler-rt-460c5df7e72dc468923186dde9f8c3e45d0f54b2.tar.gz
4 files changed, 88 insertions, 23 deletions
diff --git a/lib/fuzzer/FuzzerDefs.h b/lib/fuzzer/FuzzerDefs.h
index 320b37d5f..5dc2d8e1a 100644
--- a/lib/fuzzer/FuzzerDefs.h
+++ b/lib/fuzzer/FuzzerDefs.h
@@ -15,10 +15,11 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
+#include <memory>
+#include <set>
 #include <string>
 #include <vector>
-#include <set>
-#include <memory>
+
 
 // Platform detection.
 #ifdef __linux__
diff --git a/lib/fuzzer/FuzzerMerge.cpp b/lib/fuzzer/FuzzerMerge.cpp
index c05c49225..e3ad8b385 100644
--- a/lib/fuzzer/FuzzerMerge.cpp
+++ b/lib/fuzzer/FuzzerMerge.cpp
@@ -19,6 +19,7 @@
 #include <iterator>
 #include <set>
 #include <sstream>
+#include <unordered_set>
 
 namespace fuzzer {
 
@@ -221,7 +222,7 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
       U.resize(MaxInputLen);
       U.shrink_to_fit();
     }
-    std::ostringstream StartedLine;
+
     // Write the pre-run marker.
     OF << "STARTED " << i << " " << U.size() << "\n";
     OF.flush();  // Flush is important since Command::Execute may crash.
@@ -260,22 +261,39 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
   PrintStatsWrapper("DONE  ");
 }
 
-static void WriteNewControlFile(const std::string &CFPath,
-                                const Vector<SizedFile> &OldCorpus,
-                                const Vector<SizedFile> &NewCorpus) {
-  RemoveFile(CFPath);
-  std::ofstream ControlFile(CFPath);
-  ControlFile << (OldCorpus.size() + NewCorpus.size()) << "\n";
-  ControlFile << OldCorpus.size() << "\n";
+static size_t WriteNewControlFile(const std::string &CFPath,
+                                  const Vector<SizedFile> &OldCorpus,
+                                  const Vector<SizedFile> &NewCorpus,
+                                  const Vector<MergeFileInfo> &KnownFiles) {
+  std::unordered_set<std::string> FilesToSkip;
+  for (auto &SF: KnownFiles)
+    FilesToSkip.insert(SF.Name);
+
+  Vector<std::string> FilesToUse;
+  auto MaybeUseFile = [=, &FilesToUse](std::string Name) {
+    if (FilesToSkip.find(Name) == FilesToSkip.end())
+      FilesToUse.push_back(Name);
+  };
   for (auto &SF: OldCorpus)
-    ControlFile << SF.File << "\n";
+    MaybeUseFile(SF.File);
+  auto FilesToUseFromOldCorpus = FilesToUse.size();
   for (auto &SF: NewCorpus)
-    ControlFile << SF.File << "\n";
+    MaybeUseFile(SF.File);
+
+  RemoveFile(CFPath);
+  std::ofstream ControlFile(CFPath);
+  ControlFile << FilesToUse.size() << "\n";
+  ControlFile << FilesToUseFromOldCorpus << "\n";
+  for (auto &FN: FilesToUse)
+    ControlFile << FN << "\n";
+
   if (!ControlFile) {
     Printf("MERGE-OUTER: failed to write to the control file: %s\n",
            CFPath.c_str());
     exit(1);
   }
+
+  return FilesToUse.size();
 }
 
 // Outer process. Does not call the target code and thus should not fail.
@@ -291,12 +309,13 @@ void CrashResistantMerge(const Vector<std::string> &Args,
                          bool V /*Verbose*/) {
   if (NewCorpus.empty() && OldCorpus.empty()) return;  // Nothing to merge.
   size_t NumAttempts = 0;
+  Vector<MergeFileInfo> KnownFiles;
   if (FileSize(CFPath)) {
     VPrintf(V, "MERGE-OUTER: non-empty control file provided: '%s'\n",
            CFPath.c_str());
     Merger M;
     std::ifstream IF(CFPath);
-    if (M.Parse(IF, /*ParseCoverage=*/false)) {
+    if (M.Parse(IF, /*ParseCoverage=*/true)) {
       VPrintf(V, "MERGE-OUTER: control file ok, %zd files total,"
              " first not processed file %zd\n",
              M.Files.size(), M.FirstNotProcessedFile);
@@ -305,12 +324,25 @@ void CrashResistantMerge(const Vector<std::string> &Args,
                "(merge has stumbled on it the last time)\n",
                M.LastFailure.c_str());
       if (M.FirstNotProcessedFile >= M.Files.size()) {
+        // Merge has already been completed with the given merge control file.
+        if (M.Files.size() == OldCorpus.size() + NewCorpus.size()) {
+          VPrintf(
+              V,
+              "MERGE-OUTER: nothing to do, merge has been completed before\n");
+          exit(0);
+        }
+
+        // Number of input files likely changed, start merge from scratch, but
+        // reuse coverage information from the given merge control file.
         VPrintf(
-            V, "MERGE-OUTER: nothing to do, merge has been completed before\n");
-        exit(0);
+            V,
+            "MERGE-OUTER: starting merge from scratch, but reusing coverage "
+            "information from the given control file\n");
+        KnownFiles = M.Files;
+      } else {
+        // There is a merge in progress, continue.
+        NumAttempts = M.Files.size() - M.FirstNotProcessedFile;
       }
-
-      NumAttempts = M.Files.size() - M.FirstNotProcessedFile;
     } else {
       VPrintf(V, "MERGE-OUTER: bad control file, will overwrite it\n");
     }
@@ -318,10 +350,11 @@ void CrashResistantMerge(const Vector<std::string> &Args,
 
   if (!NumAttempts) {
     // The supplied control file is empty or bad, create a fresh one.
-    NumAttempts = OldCorpus.size() + NewCorpus.size();
-    VPrintf(V, "MERGE-OUTER: %zd files, %zd in the initial corpus\n",
-            NumAttempts, OldCorpus.size());
-    WriteNewControlFile(CFPath, OldCorpus, NewCorpus);
+    VPrintf(V, "MERGE-OUTER: "
+            "%zd files, %zd in the initial corpus, %zd processed earlier\n",
+            OldCorpus.size() + NewCorpus.size(), OldCorpus.size(),
+            KnownFiles.size());
+    NumAttempts = WriteNewControlFile(CFPath, OldCorpus, NewCorpus, KnownFiles);
   }
 
   // Execute the inner process until it passes.
@@ -358,6 +391,8 @@ void CrashResistantMerge(const Vector<std::string> &Args,
   VPrintf(V,
           "MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n",
           M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb());
+
+  M.Files.insert(M.Files.end(), KnownFiles.begin(), KnownFiles.end());
   M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
   VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; "
           "%zd new coverage edges\n",
diff --git a/test/fuzzer/merge.test b/test/fuzzer/merge.test
index c003df282..ef5597a25 100644
--- a/test/fuzzer/merge.test
+++ b/test/fuzzer/merge.test
@@ -1,5 +1,3 @@
-CHECK: BINGO
-
 RUN: %cpp_compiler %S/FullCoverageSetTest.cpp -o %t-FullCoverageSetTest
 
 RUN: rm -rf %t/T0 %t/T1 %t/T2
diff --git a/test/fuzzer/merge_two_step.test b/test/fuzzer/merge_two_step.test
new file mode 100644
index 000000000..42abe0b3d
--- /dev/null
+++ b/test/fuzzer/merge_two_step.test
@@ -0,0 +1,31 @@
+RUN: %cpp_compiler %S/FullCoverageSetTest.cpp -o %t-FullCoverageSetTest
+
+RUN: rm -rf %t/T0 %t/T1 %t/T2
+RUN: mkdir -p %t/T0 %t/T1 %t/T2
+RUN: echo F..... > %t/T1/1
+RUN: echo .U.... > %t/T1/2
+RUN: echo ..Z... > %t/T1/3
+
+# T1 has 3 elements, T0 is empty.
+RUN: rm -f %t/MCF
+RUN: %run %t-FullCoverageSetTest -merge=1 -merge_control_file=%t/MCF %t/T0 %t/T1 2>&1 | FileCheck %s --check-prefix=CHECK1
+CHECK1: MERGE-OUTER: 3 files, 0 in the initial corpus
+CHECK1: MERGE-OUTER: 3 new files with 11 new features added; 11 new coverage edges
+
+RUN: echo ...Z.. > %t/T2/1
+RUN: echo ....E. > %t/T2/2
+RUN: echo .....R > %t/T2/3
+RUN: echo F..... > %t/T2/a
+
+RUN: rm -rf %t/T0
+RUN: mkdir -p %t/T0
+
+# T1 has 3 elements, T2 has 4 elements, T0 is empty.
+RUN: %run %t-FullCoverageSetTest -merge=1 -merge_control_file=%t/MCF %t/T0 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=CHECK2
+CHECK2: MERGE-OUTER: non-empty control file provided
+CHECK2: MERGE-OUTER: control file ok, 3 files total, first not processed file 3
+CHECK2: MERGE-OUTER: starting merge from scratch, but reusing coverage information from the given control file
+CHECK2: MERGE-OUTER: 7 files, 0 in the initial corpus, 3 processed earlier
+CHECK2: MERGE-INNER: using the control file 
+CHECK2: MERGE-INNER: 4 total files; 0 processed earlier; will process 4 files now
+CHECK2: MERGE-OUTER: 6 new files with 14 new features added; 14 new coverage edges
author	Max Moroz <mmoroz@chromium.org>	2019-09-11 14:11:08 +0000
committer	Max Moroz <mmoroz@chromium.org>	2019-09-11 14:11:08 +0000
commit	460c5df7e72dc468923186dde9f8c3e45d0f54b2 (patch)
tree	84b847a87bc8593a58572235d0ea6cb8c5e9baff
parent	95fd2921ca612ac342dcb486c910cdaa4598bb20 (diff)
download	compiler-rt-460c5df7e72dc468923186dde9f8c3e45d0f54b2.tar.gz