summaryrefslogtreecommitdiff
path: root/lib/fuzzer
diff options
context:
space:
mode:
authorKostya Serebryany <kcc@google.com>2019-06-13 21:17:49 +0000
committerKostya Serebryany <kcc@google.com>2019-06-13 21:17:49 +0000
commit3346bd6120200e9950db2a4aa3f31cf3d8ac2443 (patch)
tree823f05f5196f02ac7ea94377cf29c43d85b79e9c /lib/fuzzer
parent6ac86bdc1602faac865d13039fd000e9e5ef1f58 (diff)
downloadcompiler-rt-3346bd6120200e9950db2a4aa3f31cf3d8ac2443.tar.gz
[libFuzzer] simplify the DFT trace collection using the new faster DFSan mode that traces up to 16 labels at a time and never runs out of labels.
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@363326 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/fuzzer')
-rw-r--r--lib/fuzzer/FuzzerDataFlowTrace.cpp72
-rw-r--r--lib/fuzzer/FuzzerFork.cpp19
-rw-r--r--lib/fuzzer/dataflow/DataFlow.cpp120
3 files changed, 75 insertions, 136 deletions
diff --git a/lib/fuzzer/FuzzerDataFlowTrace.cpp b/lib/fuzzer/FuzzerDataFlowTrace.cpp
index 1fba3913c..311f53a31 100644
--- a/lib/fuzzer/FuzzerDataFlowTrace.cpp
+++ b/lib/fuzzer/FuzzerDataFlowTrace.cpp
@@ -120,12 +120,6 @@ static Vector<uint8_t> DFTStringToVector(const std::string &DFTString) {
return DFT;
}
-static std::ostream &operator<<(std::ostream &OS, const Vector<uint8_t> &DFT) {
- for (auto B : DFT)
- OS << (B ? "1" : "0");
- return OS;
-}
-
static bool ParseError(const char *Err, const std::string &Line) {
Printf("DataFlowTrace: parse error: %s: Line: %s\n", Err, Line.c_str());
return false;
@@ -246,74 +240,24 @@ int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
const Vector<SizedFile> &CorporaFiles) {
Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n",
DFTBinary.c_str(), DirPath.c_str(), CorporaFiles.size());
+ setenv("DFSAN_OPTIONS", "fast16labels=1:warn_unimplemented=0", 1);
MkDir(DirPath);
- auto Temp = TempPath(".dft");
for (auto &F : CorporaFiles) {
// For every input F we need to collect the data flow and the coverage.
// Data flow collection may fail if we request too many DFSan tags at once.
// So, we start from requesting all tags in range [0,Size) and if that fails
// we then request tags in [0,Size/2) and [Size/2, Size), and so on.
// Function number => DFT.
+ auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File)));
std::unordered_map<size_t, Vector<uint8_t>> DFTMap;
std::unordered_set<std::string> Cov;
- std::queue<std::pair<size_t, size_t>> Q;
- Q.push({0, F.Size});
- while (!Q.empty()) {
- auto R = Q.front();
- Printf("\n\n\n********* Trying: [%zd, %zd)\n", R.first, R.second);
- Q.pop();
- Command Cmd;
- Cmd.addArgument(DFTBinary);
- Cmd.addArgument(std::to_string(R.first));
- Cmd.addArgument(std::to_string(R.second));
- Cmd.addArgument(F.File);
- Cmd.addArgument(Temp);
- Printf("CMD: %s\n", Cmd.toString().c_str());
- if (ExecuteCommand(Cmd)) {
- // DFSan has failed, collect tags for two subsets.
- if (R.second - R.first >= 2) {
- size_t Mid = (R.second + R.first) / 2;
- Q.push({R.first, Mid});
- Q.push({Mid, R.second});
- }
- } else {
- Printf("********* Success: [%zd, %zd)\n", R.first, R.second);
- std::ifstream IF(Temp);
- std::string L;
- while (std::getline(IF, L, '\n')) {
- // Data flow collection has succeeded.
- // Merge the results with the other runs.
- if (L.empty()) continue;
- if (L[0] == 'C') {
- // Take coverage lines as is, they will be the same in all attempts.
- Cov.insert(L);
- } else if (L[0] == 'F') {
- size_t FunctionNum = 0;
- std::string DFTString;
- if (ParseDFTLine(L, &FunctionNum, &DFTString)) {
- auto &DFT = DFTMap[FunctionNum];
- if (DFT.empty()) {
- // Haven't seen this function before, take DFT as is.
- DFT = DFTStringToVector(DFTString);
- } else if (DFT.size() == DFTString.size()) {
- // Have seen this function already, merge DFTs.
- DFTStringAppendToVector(&DFT, DFTString);
- }
- }
- }
- }
- }
- }
- auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File)));
- // Dump combined DFT to disk.
- Printf("Producing DFT for %s\n", OutPath.c_str());
- std::ofstream OF(OutPath);
- for (auto &DFT: DFTMap)
- OF << "F" << DFT.first << " " << DFT.second << std::endl;
- for (auto &C : Cov)
- OF << C << std::endl;
+ Command Cmd;
+ Cmd.addArgument(DFTBinary);
+ Cmd.addArgument(F.File);
+ Cmd.addArgument(OutPath);
+ Printf("CMD: %s\n", Cmd.toString().c_str());
+ ExecuteCommand(Cmd);
}
- RemoveFile(Temp);
// Write functions.txt if it's currently empty or doesn't exist.
auto FunctionsTxtPath = DirPlusFile(DirPath, kFunctionsTxt);
if (FileToString(FunctionsTxtPath).empty()) {
diff --git a/lib/fuzzer/FuzzerFork.cpp b/lib/fuzzer/FuzzerFork.cpp
index 870a22448..5c4855f82 100644
--- a/lib/fuzzer/FuzzerFork.cpp
+++ b/lib/fuzzer/FuzzerFork.cpp
@@ -89,6 +89,7 @@ struct GlobalEnv {
std::string DFTDir;
std::string DataFlowBinary;
Set<uint32_t> Features, Cov;
+ Set<std::string> FilesWithDFT;
Vector<std::string> Files;
Random *Rand;
std::chrono::system_clock::time_point ProcessStartTime;
@@ -126,10 +127,13 @@ struct GlobalEnv {
auto Job = new FuzzJob;
std::string Seeds;
if (size_t CorpusSubsetSize =
- std::min(Files.size(), (size_t)sqrt(Files.size() + 2)))
- for (size_t i = 0; i < CorpusSubsetSize; i++)
- Seeds += (Seeds.empty() ? "" : ",") +
- Files[Rand->SkewTowardsLast(Files.size())];
+ std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
+ for (size_t i = 0; i < CorpusSubsetSize; i++) {
+ auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
+ Seeds += (Seeds.empty() ? "" : ",") + SF;
+ CollectDFT(SF);
+ }
+ }
if (!Seeds.empty()) {
Job->SeedListPath =
DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
@@ -196,7 +200,6 @@ struct GlobalEnv {
auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
WriteToFile(U, NewPath);
Files.push_back(NewPath);
- CollectDFT(NewPath);
}
Features.insert(NewFeatures.begin(), NewFeatures.end());
Cov.insert(NewCov.begin(), NewCov.end());
@@ -217,6 +220,7 @@ struct GlobalEnv {
void CollectDFT(const std::string &InputPath) {
if (DataFlowBinary.empty()) return;
+ if (!FilesWithDFT.insert(InputPath).second) return;
Command Cmd(Args);
Cmd.removeFlag("fork");
Cmd.removeFlag("runs");
@@ -226,7 +230,7 @@ struct GlobalEnv {
Cmd.removeArgument(C);
Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
Cmd.combineOutAndErr();
- // Printf("CollectDFT: %s %s\n", InputPath.c_str(), Cmd.toString().c_str());
+ // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
ExecuteCommand(Cmd);
}
@@ -296,9 +300,6 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
{}, &Env.Cov,
CFPath, false);
- for (auto &F : Env.Files)
- Env.CollectDFT(F);
-
RemoveFile(CFPath);
Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
Env.Files.size(), Env.TempDir.c_str());
diff --git a/lib/fuzzer/dataflow/DataFlow.cpp b/lib/fuzzer/dataflow/DataFlow.cpp
index 989675e81..8a5d69509 100644
--- a/lib/fuzzer/dataflow/DataFlow.cpp
+++ b/lib/fuzzer/dataflow/DataFlow.cpp
@@ -35,7 +35,8 @@
// Run:
// # Collect data flow and coverage for INPUT_FILE
// # write to OUTPUT_FILE (default: stdout)
-// ./a.out FIRST_LABEL LAST_LABEL INPUT_FILE [OUTPUT_FILE]
+// export DFSAN_OPTIONS=fast16labels=1:warn_unimplemented=0
+// ./a.out INPUT_FILE [OUTPUT_FILE]
//
// # Print all instrumented functions. llvm-symbolizer must be present in PATH
// ./a.out
@@ -48,8 +49,6 @@
// C1 8
// ===============
// "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
-// The byte string is LEN+1 bytes. The last byte is set if the function
-// depends on the input length.
// "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered
// in addition to the function's entry block, out of T total instrumented
// blocks.
@@ -72,22 +71,20 @@ __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
} // extern "C"
static size_t InputLen;
-static size_t InputLabelBeg;
-static size_t InputLabelEnd;
-static size_t InputSizeLabel;
+static size_t NumIterations;
static size_t NumFuncs, NumGuards;
static uint32_t *GuardsBeg, *GuardsEnd;
static const uintptr_t *PCsBeg, *PCsEnd;
-static __thread size_t CurrentFunc;
-static dfsan_label *FuncLabels; // Array of NumFuncs elements.
+static __thread size_t CurrentFunc, CurrentIteration;
+static dfsan_label **FuncLabels; // NumFuncs x NumIterations.
static bool *BBExecuted; // Array of NumGuards elements.
-static char *PrintableStringForLabel; // InputLen + 2 bytes.
-static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
enum {
PCFLAG_FUNC_ENTRY = 1,
};
+const int kNumLabels = 16;
+
static inline bool BlockIsEntry(size_t BlockIdx) {
return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
}
@@ -112,35 +109,32 @@ static int PrintFunctions() {
return 0;
}
-extern "C"
-void SetBytesForLabel(dfsan_label L, char *Bytes) {
- if (LabelSeen[L])
- return;
- LabelSeen[L] = true;
- assert(L);
- if (L < InputSizeLabel) {
- Bytes[L + InputLabelBeg - 1] = '1';
- } else if (L == InputSizeLabel) {
- Bytes[InputLen] = '1';
- } else {
- auto *DLI = dfsan_get_label_info(L);
- SetBytesForLabel(DLI->l1, Bytes);
- SetBytesForLabel(DLI->l2, Bytes);
- }
-}
-
-static char *GetPrintableStringForLabel(dfsan_label L) {
- memset(PrintableStringForLabel, '0', InputLen + 1);
- PrintableStringForLabel[InputLen + 1] = 0;
- memset(LabelSeen, 0, sizeof(LabelSeen));
- SetBytesForLabel(L, PrintableStringForLabel);
- return PrintableStringForLabel;
+static void PrintBinary(FILE *Out, dfsan_label L, size_t Len) {
+ char buf[kNumLabels + 1];
+ assert(Len <= kNumLabels);
+ for (int i = 0; i < kNumLabels; i++)
+ buf[i] = (L & (1 << i)) ? '1' : '0';
+ buf[Len] = 0;
+ fprintf(Out, "%s", buf);
}
static void PrintDataFlow(FILE *Out) {
- for (size_t I = 0; I < NumFuncs; I++)
- if (FuncLabels[I])
- fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
+ for (size_t Func = 0; Func < NumFuncs; Func++) {
+ bool HasAny = false;
+ for (size_t Iter = 0; Iter < NumIterations; Iter++)
+ if (FuncLabels[Func][Iter])
+ HasAny = true;
+ if (!HasAny)
+ continue;
+ fprintf(Out, "F%zd ", Func);
+ size_t LenOfLastIteration = kNumLabels;
+ if (auto Tail = InputLen % kNumLabels)
+ LenOfLastIteration = Tail;
+ for (size_t Iter = 0; Iter < NumIterations; Iter++)
+ PrintBinary(Out, FuncLabels[Func][Iter],
+ Iter == NumIterations - 1 ? LenOfLastIteration : kNumLabels);
+ fprintf(Out, "\n");
+ }
}
static void PrintCoverage(FILE *Out) {
@@ -169,12 +163,9 @@ int main(int argc, char **argv) {
LLVMFuzzerInitialize(&argc, &argv);
if (argc == 1)
return PrintFunctions();
- assert(argc == 4 || argc == 5);
- InputLabelBeg = atoi(argv[1]);
- InputLabelEnd = atoi(argv[2]);
- assert(InputLabelBeg < InputLabelEnd);
+ assert(argc == 2 || argc == 3);
- const char *Input = argv[3];
+ const char *Input = argv[1];
fprintf(stderr, "INFO: reading '%s'\n", Input);
FILE *In = fopen(Input, "r");
assert(In);
@@ -184,30 +175,35 @@ int main(int argc, char **argv) {
unsigned char *Buf = (unsigned char*)malloc(InputLen);
size_t NumBytesRead = fread(Buf, 1, InputLen, In);
assert(NumBytesRead == InputLen);
- PrintableStringForLabel = (char*)malloc(InputLen + 2);
fclose(In);
- fprintf(stderr, "INFO: running '%s'\n", Input);
- for (size_t I = 1; I <= InputLen; I++) {
- size_t Idx = I - 1;
- if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
- dfsan_label L = dfsan_create_label("", nullptr);
- assert(L == I - InputLabelBeg);
- dfsan_set_label(L, Buf + Idx, 1);
- }
- }
- dfsan_label SizeL = dfsan_create_label("", nullptr);
- InputSizeLabel = SizeL;
- assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
- dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
+ NumIterations = (NumBytesRead + kNumLabels - 1) / kNumLabels;
+ FuncLabels = (dfsan_label**)calloc(NumFuncs, sizeof(dfsan_label*));
+ for (size_t Func = 0; Func < NumFuncs; Func++)
+ FuncLabels[Func] =
+ (dfsan_label *)calloc(NumIterations, sizeof(dfsan_label));
- LLVMFuzzerTestOneInput(Buf, InputLen);
+ for (CurrentIteration = 0; CurrentIteration < NumIterations;
+ CurrentIteration++) {
+ fprintf(stderr, "INFO: running '%s' %zd/%zd\n", Input, CurrentIteration,
+ NumIterations);
+ dfsan_flush();
+ dfsan_set_label(0, Buf, InputLen);
+
+ size_t BaseIdx = CurrentIteration * kNumLabels;
+ size_t LastIdx = BaseIdx + kNumLabels < NumBytesRead ? BaseIdx + kNumLabels
+ : NumBytesRead;
+ assert(BaseIdx < LastIdx);
+ for (size_t Idx = BaseIdx; Idx < LastIdx; Idx++)
+ dfsan_set_label(1 << (Idx - BaseIdx), Buf + Idx, 1);
+ LLVMFuzzerTestOneInput(Buf, InputLen);
+ }
free(Buf);
- bool OutIsStdout = argc == 4;
+ bool OutIsStdout = argc == 2;
fprintf(stderr, "INFO: writing dataflow to %s\n",
- OutIsStdout ? "<stdout>" : argv[4]);
- FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
+ OutIsStdout ? "<stdout>" : argv[2]);
+ FILE *Out = OutIsStdout ? stdout : fopen(argv[2], "w");
PrintDataFlow(Out);
PrintCoverage(Out);
if (!OutIsStdout) fclose(Out);
@@ -237,7 +233,6 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
GuardsBeg[i] = NumFuncs;
}
}
- FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
BBExecuted = (bool*)calloc(NumGuards, sizeof(bool));
fprintf(stderr, "INFO: %zd instrumented function(s) observed "
"and %zd basic blocks\n", NumFuncs, NumGuards);
@@ -258,14 +253,13 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
dfsan_label L1, dfsan_label UnusedL) {
assert(CurrentFunc < NumFuncs);
- FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
+ FuncLabels[CurrentFunc][CurrentIteration] |= L1;
}
#define HOOK(Name, Type) \
void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \
assert(CurrentFunc < NumFuncs); \
- FuncLabels[CurrentFunc] = \
- dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \
+ FuncLabels[CurrentFunc][CurrentIteration] |= L1 | L2; \
}
HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)