diff options
author | Kostya Serebryany <kcc@google.com> | 2018-06-06 01:23:29 +0000 |
---|---|---|
committer | Kostya Serebryany <kcc@google.com> | 2018-06-06 01:23:29 +0000 |
commit | 9c4a366945d9fe7ed89e679c045f01bb311158ce (patch) | |
tree | ff1f3a69b540445b5cb900312f2144c102d2a8ae /lib/fuzzer | |
parent | 7f47a0bea177db24ecfc75b9daf3874e39a869df (diff) | |
download | compiler-rt-9c4a366945d9fe7ed89e679c045f01bb311158ce.tar.gz |
[libFuzzer] initial implementation of -data_flow_trace. It parses the data flow trace and prints the summary, but doesn't use the information in any other way yet
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@334058 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/fuzzer')
-rw-r--r-- | lib/fuzzer/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerDataFlowTrace.cpp | 90 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerDataFlowTrace.h | 40 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerDriver.cpp | 2 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerFlags.def | 2 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerIO.cpp | 8 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerIO.h | 2 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerInternal.h | 2 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerLoop.cpp | 1 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerOptions.h | 1 | ||||
-rw-r--r-- | lib/fuzzer/dataflow/DataFlow.cpp | 8 | ||||
-rwxr-xr-x | lib/fuzzer/scripts/collect_data_flow.py | 25 | ||||
-rw-r--r-- | lib/fuzzer/tests/FuzzerUnittest.cpp | 8 |
13 files changed, 188 insertions, 2 deletions
diff --git a/lib/fuzzer/CMakeLists.txt b/lib/fuzzer/CMakeLists.txt index 7e696de12..7ec0dd551 100644 --- a/lib/fuzzer/CMakeLists.txt +++ b/lib/fuzzer/CMakeLists.txt @@ -1,5 +1,6 @@ set(LIBFUZZER_SOURCES FuzzerCrossOver.cpp + FuzzerDataFlowTrace.cpp FuzzerDriver.cpp FuzzerExtFunctionsDlsym.cpp FuzzerExtFunctionsDlsymWin.cpp diff --git a/lib/fuzzer/FuzzerDataFlowTrace.cpp b/lib/fuzzer/FuzzerDataFlowTrace.cpp new file mode 100644 index 000000000..69efd6f38 --- /dev/null +++ b/lib/fuzzer/FuzzerDataFlowTrace.cpp @@ -0,0 +1,90 @@ +//===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---*- C++ -* ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// fuzzer::DataFlowTrace +//===----------------------------------------------------------------------===// + +#include "FuzzerDataFlowTrace.h" +#include "FuzzerIO.h" + +#include <cstdlib> +#include <fstream> +#include <string> +#include <vector> + +namespace fuzzer { + +void DataFlowTrace::Init(const std::string &DirPath, + const std::string &FocusFunction) { + if (DirPath.empty()) return; + const char *kFunctionsTxt = "functions.txt"; + Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str()); + Vector<SizedFile> Files; + GetSizedFilesFromDir(DirPath, &Files); + std::string L; + + // Read functions.txt + std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt)); + size_t FocusFuncIdx = SIZE_MAX; + size_t NumFunctions = 0; + while (std::getline(IF, L, '\n')) { + NumFunctions++; + if (FocusFunction == L) + FocusFuncIdx = NumFunctions - 1; + } + if (!NumFunctions || FocusFuncIdx == SIZE_MAX || Files.size() <= 1) + return; + // Read traces. + size_t NumTraceFiles = 0; + size_t NumTracesWithFocusFunction = 0; + for (auto &SF : Files) { + auto Name = Basename(SF.File); + if (Name == kFunctionsTxt) continue; + auto ParseError = [&](const char *Err) { + Printf("DataFlowTrace: parse error: %s\n File: %s\n Line: %s\n", Err, + Name.c_str(), L.c_str()); + }; + NumTraceFiles++; + // Printf("=== %s\n", Name.c_str()); + std::ifstream IF(SF.File); + while (std::getline(IF, L, '\n')) { + size_t SpacePos = L.find(' '); + if (SpacePos == std::string::npos) + return ParseError("no space in the trace line"); + if (L.empty() || L[0] != 'F') + return ParseError("the trace line doesn't start with 'F'"); + size_t N = std::atol(L.c_str() + 1); + if (N >= NumFunctions) + return ParseError("N is greater than the number of functions"); + if (N == FocusFuncIdx) { + NumTracesWithFocusFunction++; + const char *Beg = L.c_str() + SpacePos + 1; + const char *End = L.c_str() + L.size(); + assert(Beg < End); + size_t Len = End - Beg; + Vector<bool> V(Len); + for (size_t I = 0; I < Len; I++) { + if (Beg[I] != '0' && Beg[I] != '1') + ParseError("the trace should contain only 0 or 1"); + V[I] = Beg[I] == '1'; + } + // Print just a few small traces. + if (NumTracesWithFocusFunction <= 3 && Len <= 16) + Printf("%s => |%s|\n", Name.c_str(), L.c_str() + SpacePos + 1); + break; // No need to parse the following lines. + } + } + } + assert(NumTraceFiles == Files.size() - 1); + Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, " + "%zd traces with focus function\n", + NumTraceFiles, NumFunctions, NumTracesWithFocusFunction); +} + +} // namespace fuzzer + diff --git a/lib/fuzzer/FuzzerDataFlowTrace.h b/lib/fuzzer/FuzzerDataFlowTrace.h new file mode 100644 index 000000000..2b7b71fdb --- /dev/null +++ b/lib/fuzzer/FuzzerDataFlowTrace.h @@ -0,0 +1,40 @@ +//===- FuzzerDataFlowTrace.h - Internal header for the Fuzzer ---*- C++ -* ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// fuzzer::DataFlowTrace; reads and handles a data-flow trace. +// +// A data flow trace is generated by e.g. dataflow/DataFlow.cpp +// and is stored on disk in a separate directory. +// +// The trace dir contains a file 'functions.txt' which lists function names, +// oner per line, e.g. +// ==> functions.txt <== +// Func2 +// LLVMFuzzerTestOneInput +// Func1 +// +// All other files in the dir are the traces, see dataflow/DataFlow.cpp. +// The name of the file is sha1 of the input used to generate the trace. +// +// Current status: +// the data is parsed and the summary is printed, but the data is not yet +// used in any other way. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_DATA_FLOW_TRACE +#define LLVM_FUZZER_DATA_FLOW_TRACE + +#include "FuzzerDefs.h" + +namespace fuzzer { +struct DataFlowTrace { + void Init(const std::string &DirPath, const std::string &FocusFunction); +}; +} // namespace fuzzer + +#endif // LLVM_FUZZER_DATA_FLOW_TRACE diff --git a/lib/fuzzer/FuzzerDriver.cpp b/lib/fuzzer/FuzzerDriver.cpp index dfb3d492c..d7b957349 100644 --- a/lib/fuzzer/FuzzerDriver.cpp +++ b/lib/fuzzer/FuzzerDriver.cpp @@ -623,6 +623,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.ExitOnItem = Flags.exit_on_item; if (Flags.focus_function) Options.FocusFunction = Flags.focus_function; + if (Flags.data_flow_trace) + Options.DataFlowTrace = Flags.data_flow_trace; unsigned Seed = Flags.seed; // Initialize Seed. diff --git a/lib/fuzzer/FuzzerFlags.def b/lib/fuzzer/FuzzerFlags.def index 139e6187f..5be6d2641 100644 --- a/lib/fuzzer/FuzzerFlags.def +++ b/lib/fuzzer/FuzzerFlags.def @@ -153,3 +153,5 @@ FUZZER_DEPRECATED_FLAG(use_equivalence_server) FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) FUZZER_FLAG_INT(use_feature_frequency, 0, "Experimental/internal") + +FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace") diff --git a/lib/fuzzer/FuzzerIO.cpp b/lib/fuzzer/FuzzerIO.cpp index dac5ec658..f3ead0ec5 100644 --- a/lib/fuzzer/FuzzerIO.cpp +++ b/lib/fuzzer/FuzzerIO.cpp @@ -100,6 +100,14 @@ std::string DirPlusFile(const std::string &DirPath, return DirPath + GetSeparator() + FileName; } +std::string Basename(const std::string &Path, char Separator) { + size_t Pos = Path.rfind(Separator); + if (Pos == std::string::npos) + return Path; + assert(Pos < Path.size()); + return Path.substr(Pos + 1); +} + void DupAndCloseStderr() { int OutputFd = DuplicateFile(2); if (OutputFd > 0) { diff --git a/lib/fuzzer/FuzzerIO.h b/lib/fuzzer/FuzzerIO.h index ea9f0d5a6..6d7757435 100644 --- a/lib/fuzzer/FuzzerIO.h +++ b/lib/fuzzer/FuzzerIO.h @@ -67,6 +67,8 @@ struct SizedFile { void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V); char GetSeparator(); +// Similar to the basename utility: returns the file name w/o the dir prefix. +std::string Basename(const std::string &Path, char Separator = GetSeparator()); FILE* OpenFile(int Fd, const char *Mode); diff --git a/lib/fuzzer/FuzzerInternal.h b/lib/fuzzer/FuzzerInternal.h index 2b2638f1f..ec098a78f 100644 --- a/lib/fuzzer/FuzzerInternal.h +++ b/lib/fuzzer/FuzzerInternal.h @@ -12,6 +12,7 @@ #ifndef LLVM_FUZZER_INTERNAL_H #define LLVM_FUZZER_INTERNAL_H +#include "FuzzerDataFlowTrace.h" #include "FuzzerDefs.h" #include "FuzzerExtFunctions.h" #include "FuzzerInterface.h" @@ -134,6 +135,7 @@ private: InputCorpus &Corpus; MutationDispatcher &MD; FuzzingOptions Options; + DataFlowTrace DFT; system_clock::time_point ProcessStartTime = system_clock::now(); system_clock::time_point UnitStartTime, UnitStopTime; diff --git a/lib/fuzzer/FuzzerLoop.cpp b/lib/fuzzer/FuzzerLoop.cpp index 9c19ba913..27bd5ee65 100644 --- a/lib/fuzzer/FuzzerLoop.cpp +++ b/lib/fuzzer/FuzzerLoop.cpp @@ -160,6 +160,7 @@ Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, CurrentUnitSize = 0; memset(BaseSha1, 0, sizeof(BaseSha1)); TPC.SetFocusFunction(Options.FocusFunction); + DFT.Init(Options.DataFlowTrace, Options.FocusFunction); } Fuzzer::~Fuzzer() {} diff --git a/lib/fuzzer/FuzzerOptions.h b/lib/fuzzer/FuzzerOptions.h index 946f0b9d6..7a52d3624 100644 --- a/lib/fuzzer/FuzzerOptions.h +++ b/lib/fuzzer/FuzzerOptions.h @@ -46,6 +46,7 @@ struct FuzzingOptions { std::string ExitOnSrcPos; std::string ExitOnItem; std::string FocusFunction; + std::string DataFlowTrace; bool SaveArtifacts = true; bool PrintNEW = true; // Print a status line when new units are found; bool PrintNewCovPcs = false; diff --git a/lib/fuzzer/dataflow/DataFlow.cpp b/lib/fuzzer/dataflow/DataFlow.cpp index 99863074d..a79c796ac 100644 --- a/lib/fuzzer/dataflow/DataFlow.cpp +++ b/lib/fuzzer/dataflow/DataFlow.cpp @@ -69,6 +69,7 @@ static const uintptr_t *FuncsBeg; static __thread size_t CurrentFunc; static dfsan_label *FuncLabels; // Array of NumFuncs elements. static char *PrintableStringForLabel; // InputLen + 2 bytes. +static bool LabelSeen[1 << 8 * sizeof(dfsan_label)]; // Prints all instrumented functions. static int PrintFunctions() { @@ -89,7 +90,11 @@ static int PrintFunctions() { return 0; } -static void SetBytesForLabel(dfsan_label L, char *Bytes) { +extern "C" +void SetBytesForLabel(dfsan_label L, char *Bytes) { + if (LabelSeen[L]) + return; + LabelSeen[L] = true; assert(L); if (L <= InputLen + 1) { Bytes[L - 1] = '1'; @@ -103,6 +108,7 @@ static void SetBytesForLabel(dfsan_label L, char *Bytes) { static char *GetPrintableStringForLabel(dfsan_label L) { memset(PrintableStringForLabel, '0', InputLen + 1); PrintableStringForLabel[InputLen + 1] = 0; + memset(LabelSeen, 0, sizeof(LabelSeen)); SetBytesForLabel(L, PrintableStringForLabel); return PrintableStringForLabel; } diff --git a/lib/fuzzer/scripts/collect_data_flow.py b/lib/fuzzer/scripts/collect_data_flow.py index d13f6dcc4..c3faf71c0 100755 --- a/lib/fuzzer/scripts/collect_data_flow.py +++ b/lib/fuzzer/scripts/collect_data_flow.py @@ -11,9 +11,15 @@ # the complete trace for all input bytes (running it on all bytes at once # may fail if DFSan runs out of labels). # Usage: -# collect_data_flow.py BINARY INPUT [RESULT] +# +# # Collect dataflow for one input, store it in OUTPUT (default is stdout) +# collect_data_flow.py BINARY INPUT [OUTPUT] +# +# # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR +# collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR #===------------------------------------------------------------------------===# import atexit +import hashlib import sys import os import subprocess @@ -26,9 +32,26 @@ def cleanup(d): print "removing: ", d shutil.rmtree(d) +def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir): + print "Collecting dataflow for corpus:", corpus_dir, \ + "output_dir:", output_dir + assert not os.path.exists(output_dir) + os.mkdir(output_dir) + for root, dirs, files in os.walk(corpus_dir): + for f in files: + path = os.path.join(root, f) + sha1 = hashlib.sha1(open(path).read()).hexdigest() + output = os.path.join(output_dir, sha1) + subprocess.call([self, exe, path, output]) + functions_txt = open(os.path.join(output_dir, "functions.txt"), "w") + subprocess.call([exe], stdout=functions_txt) + + def main(argv): exe = argv[1] inp = argv[2] + if os.path.isdir(inp): + return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3]) size = os.path.getsize(inp) q = [[0, size]] tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-") diff --git a/lib/fuzzer/tests/FuzzerUnittest.cpp b/lib/fuzzer/tests/FuzzerUnittest.cpp index a38a45344..0b8673876 100644 --- a/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -28,6 +28,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { abort(); } +TEST(Fuzzer, Basename) { + EXPECT_EQ(Basename("foo/bar"), "bar"); + EXPECT_EQ(Basename("bar"), "bar"); + EXPECT_EQ(Basename("/bar"), "bar"); + EXPECT_EQ(Basename("foo/x"), "x"); + EXPECT_EQ(Basename("foo/"), ""); +} + TEST(Fuzzer, CrossOver) { std::unique_ptr<ExternalFunctions> t(new ExternalFunctions()); fuzzer::EF = t.get(); |