lib/fuzzer/scripts/collect_data_flow.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

#!/usr/bin/env python
#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===#
#
#                     The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
#===------------------------------------------------------------------------===#
# Runs the data-flow tracer several times on the same input in order to collect
# the complete trace for all input bytes (running it on all bytes at once
# may fail if DFSan runs out of labels).
# Usage:
#
#   # Collect dataflow for one input, store it in OUTPUT (default is stdout)
#   collect_data_flow.py BINARY INPUT [OUTPUT]
#
#   # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
#   collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
#===------------------------------------------------------------------------===#
import atexit
import hashlib
import sys
import os
import subprocess
import tempfile
import shutil

tmpdir = ""

def cleanup(d):
  print("removing: %s" % d)
  shutil.rmtree(d)

def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
  print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir,
                                                               output_dir))
  assert not os.path.exists(output_dir)
  os.mkdir(output_dir)
  for root, dirs, files in os.walk(corpus_dir):
    for f in files:
      path = os.path.join(root, f)
      sha1 = hashlib.sha1(open(path).read()).hexdigest()
      output = os.path.join(output_dir, sha1)
      subprocess.call([self, exe, path, output])
  functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
  subprocess.call([exe], stdout=functions_txt)


def main(argv):
  exe = argv[1]
  inp = argv[2]
  if os.path.isdir(inp):
    return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
  size = os.path.getsize(inp)
  q = [[0, size]]
  tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
  atexit.register(cleanup, tmpdir)
  print "tmpdir: ", tmpdir
  outputs = []
  while len(q):
    r = q.pop()
    print "******* Trying:  ", r
    tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
    ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
    if ret and r[1] - r[0] >= 2:
      q.append([r[0], (r[1] + r[0]) / 2])
      q.append([(r[1] + r[0]) / 2, r[1]])
    else:
      outputs.append(tmpfile)
      print "******* Success: ", r
  f = sys.stdout
  if len(argv) >= 4:
    f = open(argv[3], "w")
  merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py")
  subprocess.call([merge] + outputs, stdout=f)

if __name__ == '__main__':
  main(sys.argv)