diff options
author | Stephen Dolan <mu@netsoc.tcd.ie> | 2016-12-06 16:18:04 +0000 |
---|---|---|
committer | Damien Doligez <damien.doligez@gmail.com> | 2016-12-06 17:18:04 +0100 |
commit | a35c6117e6a2c957d5dc3ba950375718f09a5cb5 (patch) | |
tree | 2b878a68717766c9ab65ed3df1acdef6a55a8932 | |
parent | 59380cd23e7c94e4970d7484bd8742c235893c11 (diff) | |
download | ocaml-a35c6117e6a2c957d5dc3ba950375718f09a5cb5.tar.gz |
Instrumentation for american fuzzy lop (afl-fuzz) (#504)
-rw-r--r-- | .depend | 25 | ||||
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Changes | 4 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | Makefile.nt | 1 | ||||
-rw-r--r-- | Makefile.shared | 1 | ||||
-rw-r--r-- | asmcomp/afl_instrument.ml | 79 | ||||
-rw-r--r-- | asmcomp/afl_instrument.mli | 4 | ||||
-rw-r--r-- | asmcomp/cmmgen.ml | 13 | ||||
-rw-r--r-- | asmrun/Makefile.shared | 5 | ||||
-rw-r--r-- | byterun/Makefile.shared | 4 | ||||
-rw-r--r-- | byterun/afl.c | 121 | ||||
-rw-r--r-- | byterun/misc.c | 2 | ||||
-rw-r--r-- | byterun/printexc.c | 10 | ||||
-rw-r--r-- | config/Makefile.mingw | 1 | ||||
-rw-r--r-- | config/Makefile.mingw64 | 1 | ||||
-rw-r--r-- | config/Makefile.msvc | 1 | ||||
-rw-r--r-- | config/Makefile.msvc64 | 1 | ||||
-rwxr-xr-x | configure | 9 | ||||
-rw-r--r-- | driver/compenv.ml | 3 | ||||
-rw-r--r-- | driver/main_args.ml | 13 | ||||
-rw-r--r-- | driver/main_args.mli | 2 | ||||
-rw-r--r-- | driver/optmain.ml | 2 | ||||
-rw-r--r-- | manual/README.md | 1 | ||||
-rw-r--r-- | manual/manual/allfiles.etex | 1 | ||||
-rw-r--r-- | manual/manual/cmds/Makefile | 6 | ||||
-rw-r--r-- | manual/manual/cmds/afl-fuzz.etex | 74 | ||||
-rw-r--r-- | tools/ocamloptp.ml | 2 | ||||
-rw-r--r-- | utils/clflags.ml | 3 | ||||
-rw-r--r-- | utils/clflags.mli | 2 | ||||
-rw-r--r-- | utils/config.mli | 2 | ||||
-rw-r--r-- | utils/config.mlp | 2 |
32 files changed, 378 insertions, 19 deletions
@@ -706,6 +706,13 @@ asmcomp/CSEgen.cmo : asmcomp/reg.cmi asmcomp/proc.cmi asmcomp/mach.cmi \ asmcomp/CSEgen.cmx : asmcomp/reg.cmx asmcomp/proc.cmx asmcomp/mach.cmx \ asmcomp/cmm.cmx asmcomp/CSEgen.cmi asmcomp/CSEgen.cmi : asmcomp/mach.cmi +asmcomp/afl_instrument.cmo : bytecomp/lambda.cmi typing/ident.cmi \ + middle_end/debuginfo.cmi asmcomp/cmm.cmi utils/clflags.cmi \ + asmcomp/afl_instrument.cmi +asmcomp/afl_instrument.cmx : bytecomp/lambda.cmx typing/ident.cmx \ + middle_end/debuginfo.cmx asmcomp/cmm.cmx utils/clflags.cmx \ + asmcomp/afl_instrument.cmi +asmcomp/afl_instrument.cmi : asmcomp/cmm.cmi asmcomp/arch.cmo : utils/clflags.cmi asmcomp/arch.cmx : utils/clflags.cmx asmcomp/asmgen.cmo : asmcomp/un_anf.cmi bytecomp/translmod.cmi \ @@ -852,14 +859,14 @@ asmcomp/cmmgen.cmo : asmcomp/un_anf.cmi typing/types.cmi bytecomp/switch.cmi \ middle_end/debuginfo.cmi utils/config.cmi asmcomp/compilenv.cmi \ asmcomp/cmx_format.cmi asmcomp/cmm.cmi utils/clflags.cmi \ asmcomp/clambda.cmi parsing/asttypes.cmi asmcomp/arch.cmo \ - asmcomp/cmmgen.cmi + asmcomp/afl_instrument.cmi asmcomp/cmmgen.cmi asmcomp/cmmgen.cmx : asmcomp/un_anf.cmx typing/types.cmx bytecomp/switch.cmx \ asmcomp/strmatch.cmx asmcomp/proc.cmx bytecomp/printlambda.cmx \ typing/primitive.cmx utils/misc.cmx bytecomp/lambda.cmx typing/ident.cmx \ middle_end/debuginfo.cmx utils/config.cmx asmcomp/compilenv.cmx \ asmcomp/cmx_format.cmi asmcomp/cmm.cmx utils/clflags.cmx \ asmcomp/clambda.cmx parsing/asttypes.cmi asmcomp/arch.cmx \ - asmcomp/cmmgen.cmi + asmcomp/afl_instrument.cmx asmcomp/cmmgen.cmi asmcomp/cmmgen.cmi : asmcomp/cmx_format.cmi asmcomp/cmm.cmi \ asmcomp/clambda.cmi asmcomp/cmx_format.cmi : asmcomp/export_info.cmi asmcomp/clambda.cmi @@ -1041,7 +1048,7 @@ asmcomp/printcmm.cmo : bytecomp/lambda.cmi typing/ident.cmi \ middle_end/debuginfo.cmi asmcomp/cmm.cmi asmcomp/printcmm.cmi asmcomp/printcmm.cmx : bytecomp/lambda.cmx typing/ident.cmx \ middle_end/debuginfo.cmx asmcomp/cmm.cmx asmcomp/printcmm.cmi -asmcomp/printcmm.cmi : asmcomp/cmm.cmi +asmcomp/printcmm.cmi : middle_end/debuginfo.cmi asmcomp/cmm.cmi asmcomp/printlinear.cmo : asmcomp/printmach.cmi asmcomp/printcmm.cmi \ asmcomp/mach.cmi asmcomp/linearize.cmi middle_end/debuginfo.cmi \ asmcomp/printlinear.cmi @@ -1123,11 +1130,13 @@ asmcomp/split.cmo : asmcomp/reg.cmi utils/misc.cmi asmcomp/mach.cmi \ asmcomp/split.cmx : asmcomp/reg.cmx utils/misc.cmx asmcomp/mach.cmx \ asmcomp/split.cmi asmcomp/split.cmi : asmcomp/mach.cmi -asmcomp/strmatch.cmo : bytecomp/lambda.cmi typing/ident.cmi asmcomp/cmm.cmi \ - asmcomp/arch.cmo asmcomp/strmatch.cmi -asmcomp/strmatch.cmx : bytecomp/lambda.cmx typing/ident.cmx asmcomp/cmm.cmx \ - asmcomp/arch.cmx asmcomp/strmatch.cmi -asmcomp/strmatch.cmi : asmcomp/cmm.cmi +asmcomp/strmatch.cmo : bytecomp/lambda.cmi typing/ident.cmi \ + middle_end/debuginfo.cmi asmcomp/cmm.cmi asmcomp/arch.cmo \ + asmcomp/strmatch.cmi +asmcomp/strmatch.cmx : bytecomp/lambda.cmx typing/ident.cmx \ + middle_end/debuginfo.cmx asmcomp/cmm.cmx asmcomp/arch.cmx \ + asmcomp/strmatch.cmi +asmcomp/strmatch.cmi : middle_end/debuginfo.cmi asmcomp/cmm.cmi asmcomp/un_anf.cmo : middle_end/semantics_of_primitives.cmi \ asmcomp/printclambda.cmi utils/misc.cmi bytecomp/lambda.cmi \ typing/ident.cmi middle_end/debuginfo.cmi utils/clflags.cmi \ diff --git a/.gitignore b/.gitignore index 8df29666e9..5ad2d2f5b9 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,7 @@ /asmrun/*.p.c /asmrun/*.d.c /asmrun/alloc.c +/asmrun/afl.c /asmrun/array.c /asmrun/backtrace.c /asmrun/callback.c @@ -7,6 +7,10 @@ Next version (4.05.0): ### Code generation and optimizations: +- GPR#504: Instrumentation support for fuzzing with afl-fuzz. + (Stephen Dolan, review by Alain Frisch, Pierre Chambart, Mark + Shinwell, Gabriel Scherer and Damien Doligez) + ### Runtime system: ### Type system: @@ -444,6 +444,7 @@ utils/config.ml: utils/config.mlp config/Makefile -e 's|%%FLAMBDA%%|$(FLAMBDA)|' \ -e 's|%%PROFILING%%|$(PROFILING)|' \ -e 's|%%SAFE_STRING%%|$(SAFE_STRING)|' \ + -e 's|%%AFL_INSTRUMENT%%|$(AFL_INSTRUMENT)|' \ utils/config.mlp > utils/config.ml partialclean:: diff --git a/Makefile.nt b/Makefile.nt index 1e930c17dc..dc674e63fc 100644 --- a/Makefile.nt +++ b/Makefile.nt @@ -409,6 +409,7 @@ utils/config.ml: utils/config.mlp config/Makefile -e 's|%%PROFILING%%|$(PROFILING)|' \ -e 's|%%FLAMBDA%%|$(FLAMBDA)|' \ -e 's|%%SAFE_STRING%%|$(SAFE_STRING)|' \ + -e 's|%%AFL_INSTRUMENT%%|$(AFL_INSTRUMENT)|' \ -e 's|%%FLEXLINK_FLAGS%%|$(FLEXLINK_FLAGS)|' \ utils/config.mlp > utils/config.ml diff --git a/Makefile.shared b/Makefile.shared index 177ba0bb8f..1326b06ef4 100644 --- a/Makefile.shared +++ b/Makefile.shared @@ -121,6 +121,7 @@ ASMCOMP=\ asmcomp/flambda_to_clambda.cmo \ asmcomp/import_approx.cmo \ asmcomp/un_anf.cmo \ + asmcomp/afl_instrument.cmo \ asmcomp/strmatch.cmo asmcomp/cmmgen.cmo \ asmcomp/printmach.cmo asmcomp/selectgen.cmo \ asmcomp/spacetime_profiling.cmo asmcomp/selection.cmo \ diff --git a/asmcomp/afl_instrument.ml b/asmcomp/afl_instrument.ml new file mode 100644 index 0000000000..329ca029d0 --- /dev/null +++ b/asmcomp/afl_instrument.ml @@ -0,0 +1,79 @@ +(* Insert instrumentation for afl-fuzz *) + +open Lambda +open Cmm + +let afl_area_ptr = Cconst_symbol "caml_afl_area_ptr" +let afl_prev_loc = Cconst_symbol "caml_afl_prev_loc" +let afl_map_size = 1 lsl 16 + +let rec with_afl_logging b = + if !Clflags.afl_inst_ratio < 100 && + Random.int 100 >= !Clflags.afl_inst_ratio then instrument b else + let instrumentation = + (* The instrumentation that afl-fuzz requires is: + + cur_location = <COMPILE_TIME_RANDOM>; + shared_mem[cur_location ^ prev_location]++; + prev_location = cur_location >> 1; + + See http://lcamtuf.coredump.cx/afl/technical_details.txt or + docs/technical_details.txt in afl-fuzz source for for a full + description of what's going on. *) + let cur_location = Random.int afl_map_size in + let cur_pos = Ident.create "pos" in + let afl_area = Ident.create "shared_mem" in + let op oper args = Cop (oper, args, Debuginfo.none) in + Clet(afl_area, op (Cload Word_int) [afl_area_ptr], + Clet(cur_pos, op Cxor [op (Cload Word_int) [afl_prev_loc]; + Cconst_int cur_location], + Csequence( + op (Cstore(Byte_unsigned, Assignment)) + [op Cadda [Cvar afl_area; Cvar cur_pos]; + op Cadda [op (Cload Byte_unsigned) + [op Cadda [Cvar afl_area; Cvar cur_pos]]; + Cconst_int 1]], + op (Cstore(Word_int, Assignment)) + [afl_prev_loc; Cconst_int (cur_location lsr 1)]))) in + Csequence(instrumentation, instrument b) + +and instrument = function + (* these cases add logging, as they may be targets of conditional branches *) + | Cifthenelse (cond, t, f) -> + Cifthenelse (instrument cond, with_afl_logging t, with_afl_logging f) + | Cloop e -> + Cloop (with_afl_logging e) + | Ctrywith (e, ex, handler) -> + Ctrywith (instrument e, ex, with_afl_logging handler) + | Cswitch (e, cases, handlers, dbg) -> + Cswitch (instrument e, cases, Array.map with_afl_logging handlers, dbg) + + (* these cases add no logging, but instrument subexpressions *) + | Clet (v, e, body) -> Clet (v, instrument e, instrument body) + | Cassign (v, e) -> Cassign (v, instrument e) + | Ctuple es -> Ctuple (List.map instrument es) + | Cop (op, es, dbg) -> Cop (op, List.map instrument es, dbg) + | Csequence (e1, e2) -> Csequence (instrument e1, instrument e2) + | Ccatch (isrec, cases, body) -> + Ccatch (isrec, + List.map (fun (nfail, ids, e) -> nfail, ids, instrument e) cases, + instrument body) + | Cexit (ex, args) -> Cexit (ex, List.map instrument args) + + (* these are base cases and have no logging *) + | Cconst_int _ | Cconst_natint _ | Cconst_float _ + | Cconst_symbol _ | Cconst_pointer _ | Cconst_natpointer _ + | Cblockheader _ | Cvar _ as c -> c + +let instrument_function c = + with_afl_logging c + +let instrument_initialiser c = + (* Each instrumented module calls caml_setup_afl at + initialisation, which is a no-op on the second and subsequent + calls *) + with_afl_logging (Csequence + (Cop (Cextcall ("caml_setup_afl", typ_int, + false, None), + [Cconst_int 0], Debuginfo.none), + c)) diff --git a/asmcomp/afl_instrument.mli b/asmcomp/afl_instrument.mli new file mode 100644 index 0000000000..1eb439b27a --- /dev/null +++ b/asmcomp/afl_instrument.mli @@ -0,0 +1,4 @@ +(* Instrumentation for afl-fuzz *) + +val instrument_function : Cmm.expression -> Cmm.expression +val instrument_initialiser : Cmm.expression -> Cmm.expression diff --git a/asmcomp/cmmgen.ml b/asmcomp/cmmgen.ml index e9e69949d8..fbd3f92dc5 100644 --- a/asmcomp/cmmgen.ml +++ b/asmcomp/cmmgen.ml @@ -2665,9 +2665,14 @@ let transl_function f = else f.body in + let cmm_body = + if !Clflags.afl_instrument then + Afl_instrument.instrument_function (transl empty_env body) + else + transl empty_env body in Cfunction {fun_name = f.label; fun_args = List.map (fun id -> (id, typ_val)) f.params; - fun_body = transl empty_env body; + fun_body = cmm_body; fun_fast = !Clflags.optimize_for_speed; fun_dbg = f.dbg; } @@ -2905,7 +2910,11 @@ let emit_preallocated_blocks preallocated_blocks cont = (* Translate a compilation unit *) let compunit (ulam, preallocated_blocks, constants) = - let init_code = transl empty_env ulam in + let init_code = + if !Clflags.afl_instrument then + Afl_instrument.instrument_initialiser (transl empty_env ulam) + else + transl empty_env ulam in let c1 = [Cfunction {fun_name = Compilenv.make_symbol (Some "entry"); fun_args = []; fun_body = init_code; fun_fast = false; diff --git a/asmrun/Makefile.shared b/asmrun/Makefile.shared index ffa491095a..625cb0ced0 100644 --- a/asmrun/Makefile.shared +++ b/asmrun/Makefile.shared @@ -23,10 +23,11 @@ COBJS=startup_aux.$(O) startup.$(O) main.$(O) fail.$(O) \ custom.$(O) globroots.$(O) backtrace_prim.$(O) backtrace.$(O) \ natdynlink.$(O) debugger.$(O) meta.$(O) dynlink.$(O) \ clambda_checks.$(O) spacetime.$(O) spacetime_snapshot.$(O) \ - spacetime_offline.$(O) + spacetime_offline.$(O) afl.$(O) LINKEDFILES=misc.c freelist.c major_gc.c minor_gc.c memory.c alloc.c array.c \ compare.c ints.c floats.c str.c io.c extern.c intern.c hash.c sys.c \ parsing.c gc_ctrl.c terminfo.c md5.c obj.c lexing.c printexc.c callback.c \ weak.c compact.c finalise.c meta.c custom.c main.c globroots.c \ - $(UNIX_OR_WIN32).c dynlink.c signals.c debugger.c startup_aux.c backtrace.c + $(UNIX_OR_WIN32).c dynlink.c signals.c debugger.c startup_aux.c backtrace.c \ + afl.c diff --git a/byterun/Makefile.shared b/byterun/Makefile.shared index 10eb4eace1..9cd28574b6 100644 --- a/byterun/Makefile.shared +++ b/byterun/Makefile.shared @@ -26,13 +26,13 @@ COMMONOBJS=\ compare.o ints.o floats.o str.o array.o io.o extern.o intern.o \ hash.o sys.o meta.o parsing.o gc_ctrl.o terminfo.o md5.o obj.o \ lexing.o callback.o debugger.o weak.o compact.o finalise.o custom.o \ - dynlink.o spacetime.o + dynlink.o spacetime.o afl.o PRIMS=\ alloc.c array.c compare.c extern.c floats.c gc_ctrl.c hash.c \ intern.c interp.c ints.c io.c lexing.c md5.c meta.c obj.c parsing.c \ signals.c str.c sys.c terminfo.c callback.c weak.c finalise.c stacks.c \ - dynlink.c backtrace_prim.c backtrace.c spacetime.c + dynlink.c backtrace_prim.c backtrace.c spacetime.c afl.c all:: ocamlrun$(EXE) ld.conf libcamlrun.$(A) all-$(RUNTIMED) primitives .PHONY: all diff --git a/byterun/afl.c b/byterun/afl.c new file mode 100644 index 0000000000..e33a5a2031 --- /dev/null +++ b/byterun/afl.c @@ -0,0 +1,121 @@ +/* Runtime support for afl-fuzz */ + +#ifdef _WIN32 + +#include "caml/mlvalues.h" + +CAMLprim value caml_setup_afl (value unit) +{ + return Val_unit; +} + +#else + +#include <unistd.h> +#include <sys/types.h> +#include <sys/shm.h> +#include <sys/wait.h> +#include <stdio.h> +#include "caml/misc.h" +#include "caml/mlvalues.h" + +static int afl_initialised = 0; + +/* afl uses abnormal termination (SIGABRT) to check whether + to count a testcase as "crashing" */ +extern int caml_abort_on_uncaught_exn; + +/* Values used by the instrumentation logic (see cmmgen.ml) */ +static unsigned char afl_area_initial[1 << 16]; +unsigned char* caml_afl_area_ptr = afl_area_initial; +uintnat caml_afl_prev_loc; + +/* File descriptors used to synchronise with afl-fuzz */ +#define FORKSRV_FD_READ 198 +#define FORKSRV_FD_WRITE 199 + +static void afl_write(uint32_t msg) +{ + if (write(FORKSRV_FD_WRITE, &msg, 4) != 4) + caml_fatal_error("writing to afl-fuzz"); +} + +static uint32_t afl_read() +{ + uint32_t msg; + if (read(FORKSRV_FD_READ, &msg, 4) != 4) + caml_fatal_error("reading from afl-fuzz"); + return msg; +} + +CAMLprim value caml_setup_afl(value unit) +{ + if (afl_initialised) return Val_unit; + afl_initialised = 1; + + char* shm_id_str = getenv("__AFL_SHM_ID"); + if (shm_id_str == NULL) { + /* Not running under afl-fuzz, continue as normal */ + return Val_unit; + } + + /* if afl-fuzz is attached, we want it to know about uncaught exceptions */ + caml_abort_on_uncaught_exn = 1; + + char* shm_id_end; + long int shm_id = strtol(shm_id_str, &shm_id_end, 10); + if (!(*shm_id_str != '\0' && *shm_id_end == '\0')) + caml_fatal_error("afl-fuzz: bad shm id"); + + caml_afl_area_ptr = shmat((int)shm_id, NULL, 0); + if (caml_afl_area_ptr == (void*)-1) + caml_fatal_error("afl-fuzz: could not attach shm area"); + + /* poke the bitmap so that afl-fuzz knows we exist, even if the + application has sparse instrumentation */ + caml_afl_area_ptr[0] = 1; + + /* synchronise with afl-fuzz */ + afl_write(0); + afl_read(); + + while (1) { + int child_pid = fork(); + if (child_pid < 0) caml_fatal_error("afl-fuzz: could not fork"); + else if (child_pid == 0) { + /* Run the program */ + close(FORKSRV_FD_READ); + close(FORKSRV_FD_WRITE); + return Val_unit; + } + + /* As long as the child keeps raising SIGSTOP, we re-use the same process */ + while (1) { + afl_write((uint32_t)child_pid); + + int status; + /* WUNTRACED means wait until termination or SIGSTOP */ + if (waitpid(child_pid, &status, WUNTRACED) < 0) + caml_fatal_error("afl-fuzz: waitpid failed"); + afl_write((uint32_t)status); + + uint32_t was_killed = afl_read(); + if (WIFSTOPPED(status)) { + /* child stopped, waiting for another test case */ + if (was_killed) { + /* we saw the child stop, but since then afl-fuzz killed it. + we should wait for it before forking another child */ + if (waitpid(child_pid, &status, 0) < 0) + caml_fatal_error("afl-fuzz: waitpid failed"); + } else { + kill(child_pid, SIGCONT); + } + } else { + /* child died */ + break; + } + } + } +} + +#endif /* _WIN32 */ diff --git a/byterun/misc.c b/byterun/misc.c index 447b933fc7..2e587da0b0 100644 --- a/byterun/misc.c +++ b/byterun/misc.c @@ -37,7 +37,7 @@ int caml_failed_assert (char * expr, char * file, int line) fprintf (stderr, "file %s; line %d ### Assertion failed: %s\n", file, line, expr); fflush (stderr); - exit (100); + abort(); } void caml_set_fields (value v, unsigned long start, unsigned long filler) diff --git a/byterun/printexc.c b/byterun/printexc.c index 971f172485..cb32e61b7f 100644 --- a/byterun/printexc.c +++ b/byterun/printexc.c @@ -131,6 +131,8 @@ static void default_fatal_uncaught_exception(value exn) caml_print_exception_backtrace(); } +int caml_abort_on_uncaught_exn = 0; /* see afl.c */ + void caml_fatal_uncaught_exception(value exn) { value *handle_uncaught_exception; @@ -143,6 +145,10 @@ void caml_fatal_uncaught_exception(value exn) else default_fatal_uncaught_exception(exn); /* Terminate the process */ - CAML_SYS_EXIT(2); - exit(2); /* Second exit needed for the Noreturn flag */ + if (caml_abort_on_uncaught_exn) { + abort(); + } else { + CAML_SYS_EXIT(2); + exit(2); /* Second exit needed for the Noreturn flag */ + } } diff --git a/config/Makefile.mingw b/config/Makefile.mingw index 1918a4c9ff..3134d69fa2 100644 --- a/config/Makefile.mingw +++ b/config/Makefile.mingw @@ -94,6 +94,7 @@ LIBUNWIND_AVAILABLE=false LIBUNWIND_LINK_FLAGS= PROFINFO_WIDTH=26 SAFE_STRING=false +AFL_INSTRUMENT=false ########## Configuration for the bytecode compiler diff --git a/config/Makefile.mingw64 b/config/Makefile.mingw64 index 795f80983d..e25c2b0d01 100644 --- a/config/Makefile.mingw64 +++ b/config/Makefile.mingw64 @@ -94,6 +94,7 @@ LIBUNWIND_AVAILABLE=false LIBUNWIND_LINK_FLAGS= PROFINFO_WIDTH=26 SAFE_STRING=false +AFL_INSTRUMENT=false ########## Configuration for the bytecode compiler diff --git a/config/Makefile.msvc b/config/Makefile.msvc index 4d98fb1764..f94d009e45 100644 --- a/config/Makefile.msvc +++ b/config/Makefile.msvc @@ -88,6 +88,7 @@ LIBUNWIND_AVAILABLE=false LIBUNWIND_LINK_FLAGS= PROFINFO_WIDTH=26 SAFE_STRING=false +AFL_INSTRUMENT=false ########## Configuration for the bytecode compiler diff --git a/config/Makefile.msvc64 b/config/Makefile.msvc64 index 893bbc4876..23ed503644 100644 --- a/config/Makefile.msvc64 +++ b/config/Makefile.msvc64 @@ -87,6 +87,7 @@ LIBUNWIND_AVAILABLE=false LIBUNWIND_LINK_FLAGS= PROFINFO_WIDTH=26 SAFE_STRING=false +AFL_INSTRUMENT=false ########## Configuration for the bytecode compiler @@ -61,6 +61,7 @@ TOOLPREF="" with_cfi=true flambda=false safe_string=false +afl_instrument=false max_testsuite_dir_retries=0 with_cplugins=true with_fpic=false @@ -205,6 +206,8 @@ while : ; do with_fpic=true;; -safe-string|--safe-string) safe_string=true;; + -afl-instrument) + afl_instrument=true;; *) if echo "$1" | grep -q -e '^--\?[a-zA-Z0-9-]\+='; then err "configure expects arguments of the form '-prefix /foo/bar'," \ "not '-prefix=/foo/bar' (note the '=')." @@ -2049,6 +2052,7 @@ if [ "$ostype" = Cygwin ]; then fi echo "FLAMBDA=$flambda" >> Makefile echo "SAFE_STRING=$safe_string" >> Makefile +echo "AFL_INSTRUMENT=$afl_instrument" >> Makefile echo "MAX_TESTSUITE_DIR_RETRIES=$max_testsuite_dir_retries" >> Makefile @@ -2165,6 +2169,11 @@ else else inf " safe strings ............. no" fi + if test "$afl_instrument" = "true"; then + inf " afl-fuzz always enabled .. yes" + else + inf " afl-fuzz always enabled .. no" + fi fi if test "$with_debugger" = "ocamldebugger"; then diff --git a/driver/compenv.ml b/driver/compenv.ml index e3f7812163..d4c5fdc009 100644 --- a/driver/compenv.ml +++ b/driver/compenv.ml @@ -191,6 +191,9 @@ let read_one_param ppf position name v = | "g" -> set "g" [ Clflags.debug ] v | "p" -> set "p" [ Clflags.gprofile ] v | "bin-annot" -> set "bin-annot" [ Clflags.binary_annotations ] v + | "afl-instrument" -> set "afl-instrument" [ Clflags.afl_instrument ] v + | "afl-inst-ratio" -> + int_setter ppf "afl-inst-ratio" afl_inst_ratio v | "annot" -> set "annot" [ Clflags.annotations ] v | "absname" -> set "absname" [ Location.absname ] v | "compat-32" -> set "compat-32" [ bytecode_compatible_32 ] v diff --git a/driver/main_args.ml b/driver/main_args.ml index c18b951034..bf1fb8ef1d 100644 --- a/driver/main_args.ml +++ b/driver/main_args.ml @@ -736,6 +736,15 @@ let mk_args0 f = from <file>" ;; +let mk_afl_instrument f = + "-afl-instrument", Arg.Unit f, "Enable instrumentation for afl-fuzz" +;; + +let mk_afl_inst_ratio f = + "-afl-inst-ratio", Arg.Int f, + "Configure percentage of branches instrumented\n\ + \ (advanced, see afl-fuzz docs for AFL_INST_RATIO)" +;; let mk__ f = "-", Arg.String f, @@ -928,6 +937,8 @@ module type Optcomp_options = sig val _pp : string -> unit val _S : unit -> unit val _shared : unit -> unit + val _afl_instrument : unit -> unit + val _afl_inst_ratio : int -> unit end;; module type Opttop_options = sig @@ -1116,6 +1127,8 @@ struct let list = [ mk_a F._a; mk_absname F._absname; + mk_afl_instrument F._afl_instrument; + mk_afl_inst_ratio F._afl_inst_ratio; mk_annot F._annot; mk_binannot F._binannot; mk_inline_branch_factor F._inline_branch_factor; diff --git a/driver/main_args.mli b/driver/main_args.mli index fe1b99bd76..dfe90c000f 100644 --- a/driver/main_args.mli +++ b/driver/main_args.mli @@ -202,6 +202,8 @@ module type Optcomp_options = sig val _pp : string -> unit val _S : unit -> unit val _shared : unit -> unit + val _afl_instrument : unit -> unit + val _afl_inst_ratio : int -> unit end;; module type Opttop_options = sig diff --git a/driver/optmain.ml b/driver/optmain.ml index c069cb4f8e..f28be9aec4 100644 --- a/driver/optmain.ml +++ b/driver/optmain.ml @@ -105,6 +105,8 @@ module Options = Main_args.Make_optcomp_options (struct let _a = set make_archive let _absname = set Location.absname + let _afl_instrument = set afl_instrument + let _afl_inst_ratio n = afl_inst_ratio := n let _annot = set annotations let _binannot = set binary_annotations let _c = set compile_only diff --git a/manual/README.md b/manual/README.md index ca7301fd8c..e640f7df00 100644 --- a/manual/README.md +++ b/manual/README.md @@ -89,6 +89,7 @@ chapters (or sometimes sections) are mapped to a distinct `.etex` file: - The ocamlbuild compilation manager: `ocamlbuild.etex` - Interfacing C with OCaml: `intf-c.etex` - Optimisation with Flambda: `flambda.etex` + - Fuzzing with afl-fuzz: `afl-fuzz.etex` Note that ocamlc,ocamlopt and the toplevel options overlap a lot. Consequently, these options are described together in the file diff --git a/manual/manual/allfiles.etex b/manual/manual/allfiles.etex index db342365be..00b87b776c 100644 --- a/manual/manual/allfiles.etex +++ b/manual/manual/allfiles.etex @@ -73,6 +73,7 @@ and as a % \input emacs.tex \input{intf-c.tex} \input{flambda.tex} +\input{afl-fuzz.tex} \part{The OCaml library} \label{p:library} diff --git a/manual/manual/cmds/Makefile b/manual/manual/cmds/Makefile index 6d7e5805c0..e5048def98 100644 --- a/manual/manual/cmds/Makefile +++ b/manual/manual/cmds/Makefile @@ -1,6 +1,7 @@ FILES=comp.tex top.tex runtime.tex native.tex lexyacc.tex intf-c.tex \ depend.tex profil.tex debugger.tex browser.tex ocamldoc.tex \ - warnings-help.tex ocamlbuild.tex flambda.tex unified-options.tex + warnings-help.tex ocamlbuild.tex flambda.tex afl-fuzz.tex \ + unified-options.tex TOPDIR=../../.. include $(TOPDIR)/Makefile.tools @@ -9,7 +10,8 @@ TRANSF=$(OCAMLRUN) ../../tools/transf TEXQUOTE=../../tools/texquote2 FORMAT=../../tools/format-intf -WITH_TRANSF= ocamldoc.tex top.tex intf-c.tex flambda.tex lexyacc.tex debugger.tex +WITH_TRANSF= ocamldoc.tex top.tex intf-c.tex flambda.tex afl-fuzz.tex\ + lexyacc.tex debugger.tex etex-files: $(FILES) diff --git a/manual/manual/cmds/afl-fuzz.etex b/manual/manual/cmds/afl-fuzz.etex new file mode 100644 index 0000000000..5e8a4beb3f --- /dev/null +++ b/manual/manual/cmds/afl-fuzz.etex @@ -0,0 +1,74 @@ +\chapter{Fuzzing with afl-fuzz} +\pdfchapterfold{-9}{Fuzzing with afl-fuzz} +%HEVEA\cutname{afl-fuzz.html} + +\section{Overview} + +American fuzzy lop (``afl-fuzz'') is a {\em fuzzer}, a tool for +testing software by providing randomly-generated inputs, searching for +those inputs which cause the program to crash. + +Unlike most fuzzers, afl-fuzz observes the internal behaviour of the +program being tested, and adjusts the test cases it generates to +trigger unexplored execution paths. As a result, test cases generated +by afl-fuzz cover more of the possible behaviours of the tested +program than other fuzzers. + +This requires that programs to be tested are instrumented to +communicate with afl-fuzz. The native-code compiler ``ocamlopt'' can +generate such instrumentation, allowing afl-fuzz to be used against +programs written in OCaml. + +For more information on afl-fuzz, see the website at +\ifouthtml +\ahref{http://lcamtuf.coredump.cx/afl/}{http://lcamtuf.coredump.cx/afl/}. +\else +{\tt http://lcamtuf.coredump.cx/afl/} +\fi + +\section{Generating instrumentation} + +The instrumentation that afl-fuzz requires is not generated by +default, and must be explicitly enabled, by passing the {\tt + -afl-instrument} option to {\tt ocamlopt}. + +To fuzz a large system without modifying build tools, OCaml's {\tt + configure} script also accepts the {\tt afl-instrument} option. If +OCaml is configured with {\tt afl-instrument}, then all programs +compiled by {\tt ocamlopt} will be instrumented. + +\subsection{Advanced options} + +In rare cases, it is useful to control the amount of instrumentation +generated. By passing the {\tt -afl-inst-ratio N} argument to {\tt + ocamlopt} with {\tt N} less than 100, instrumentation can be +generated for only N\% of branches. (See the afl-fuzz documentation on +the parameter {\tt AFL\_INST\_RATIO} for the precise effect of this). + +\section{Example} + +As an example, we fuzz-test the following program, {\tt readline.ml}: + +\begin{verbatim} +let _ = + let s = read_line () in + match Array.to_list (Array.init (String.length s) (String.get s)) with + ['s'; 'e'; 'c'; 'r'; 'e'; 't'; ' '; 'c'; 'o'; 'd'; 'e'] -> failwith "uh oh" + | _ -> () +\end{verbatim} + +There is a single input (the string ``secret code'') which causes this +program to crash, but finding it by blind random search is infeasible. + +Instead, we compile with afl-fuzz instrumentation enabled: +\begin{verbatim} +ocamlopt -afl-instrument readline.ml -o readline +\end{verbatim} +Next, we run the program under afl-fuzz: +\begin{verbatim} +mkdir input +echo asdf > input/testcase +mkdir output +afl-fuzz -i input -o output ./readline +\end{verbatim} +By inspecting instrumentation output, the fuzzer finds the crashing input quickly. diff --git a/tools/ocamloptp.ml b/tools/ocamloptp.ml index 41d18e40f7..33147ea743 100644 --- a/tools/ocamloptp.ml +++ b/tools/ocamloptp.ml @@ -51,6 +51,8 @@ let incompatible o = module Options = Main_args.Make_optcomp_options (struct let _a () = make_archive := true; option "-a" () let _absname = option "-absname" + let _afl_instrument = option "-afl-instrument" + let _afl_inst_ratio n = option_with_int "-afl-inst-ratio" n let _annot = option "-annot" let _binannot = option "-bin-annot" let _c = option "-c" diff --git a/utils/clflags.ml b/utils/clflags.ml index bd884872bc..62ced4578f 100644 --- a/utils/clflags.ml +++ b/utils/clflags.ml @@ -159,6 +159,9 @@ let unsafe_string = ref (not Config.safe_string) let classic_inlining = ref false (* -Oclassic *) let inlining_report = ref false (* -inlining-report *) +let afl_instrument = ref Config.afl_instrument (* -afl-instrument *) +let afl_inst_ratio = ref 100 (* -afl-inst-ratio *) + let simplify_rounds = ref None (* -rounds *) let default_simplify_rounds = ref 1 (* -rounds *) let rounds () = diff --git a/utils/clflags.mli b/utils/clflags.mli index f7939eb6e9..7efc4f8ef1 100644 --- a/utils/clflags.mli +++ b/utils/clflags.mli @@ -193,6 +193,8 @@ val inline_max_depth : Int_arg_helper.parsed ref val remove_unused_arguments : bool ref val dump_flambda_verbose : bool ref val classic_inlining : bool ref +val afl_instrument : bool ref +val afl_inst_ratio : int ref val all_passes : string list ref val dumped_pass : string -> bool diff --git a/utils/config.mli b/utils/config.mli index 28a9850d77..ff1eab6933 100644 --- a/utils/config.mli +++ b/utils/config.mli @@ -156,3 +156,5 @@ val libunwind_link_flags : string val safe_string: bool (* Whether the compiler was configured with -safe-string *) +val afl_instrument : bool + (* Whether afl-fuzz instrumentation is generated by default *) diff --git a/utils/config.mlp b/utils/config.mlp index 36cde144cc..626b096f49 100644 --- a/utils/config.mlp +++ b/utils/config.mlp @@ -61,6 +61,8 @@ let profiling = %%PROFILING%% let flambda = %%FLAMBDA%% let safe_string = %%SAFE_STRING%% +let afl_instrument = %%AFL_INSTRUMENT%% + let exec_magic_number = "Caml1999X011" and cmi_magic_number = "Caml1999I021" and cmo_magic_number = "Caml1999O011" |