diff options
Diffstat (limited to 'lib')
118 files changed, 2664 insertions, 1733 deletions
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt index 2ae5c85ec..3ec9c500c 100644 --- a/lib/asan/CMakeLists.txt +++ b/lib/asan/CMakeLists.txt @@ -91,6 +91,8 @@ append_list_if(COMPILER_RT_HAS_LIBRT rt ASAN_DYNAMIC_LIBS) append_list_if(COMPILER_RT_HAS_LIBM m ASAN_DYNAMIC_LIBS) append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread ASAN_DYNAMIC_LIBS) append_list_if(COMPILER_RT_HAS_LIBLOG log ASAN_DYNAMIC_LIBS) +append_list_if(MINGW "${MINGW_LIBRARIES}" ASAN_DYNAMIC_LIBS) +append_list_if(MINGW psapi ASAN_DYNAMIC_LIBS) # Compile ASan sources into an object library. @@ -138,6 +140,7 @@ if(APPLE) add_weak_symbols("lsan" WEAK_SYMBOL_LINK_FLAGS) add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS) add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS) + add_weak_symbols("xray" WEAK_SYMBOL_LINK_FLAGS) add_compiler_rt_runtime(clang_rt.asan SHARED @@ -215,7 +218,7 @@ else() endif() set(ASAN_DYNAMIC_WEAK_INTERCEPTION) - if (MSVC) + if (WIN32) add_compiler_rt_object_libraries(AsanWeakInterception ${SANITIZER_COMMON_SUPPORTED_OS} ARCHS ${arch} diff --git a/lib/asan/asan_globals_win.cc b/lib/asan/asan_globals_win.cc index 29ab5ebf1..0e75992bf 100644 --- a/lib/asan/asan_globals_win.cc +++ b/lib/asan/asan_globals_win.cc @@ -29,7 +29,7 @@ static void call_on_globals(void (*hook)(__asan_global *, uptr)) { __asan_global *end = &__asan_globals_end; uptr bytediff = (uptr)end - (uptr)start; if (bytediff % sizeof(__asan_global) != 0) { -#ifdef SANITIZER_DLL_THUNK +#if defined(SANITIZER_DLL_THUNK) || defined(SANITIZER_DYNAMIC_RUNTIME_THUNK) __debugbreak(); #else CHECK("corrupt asan global array"); diff --git a/lib/asan/asan_malloc_win.cc b/lib/asan/asan_malloc_win.cc index efa058243..a094e051c 100644 --- a/lib/asan/asan_malloc_win.cc +++ b/lib/asan/asan_malloc_win.cc @@ -14,8 +14,17 @@ #include "sanitizer_common/sanitizer_platform.h" #if SANITIZER_WINDOWS -#define WIN32_LEAN_AND_MEAN -#include <windows.h> +// Intentionally not including windows.h here, to avoid the risk of +// pulling in conflicting declarations of these functions. (With mingw-w64, +// there's a risk of windows.h pulling in stdint.h.) +typedef int BOOL; +typedef void *HANDLE; +typedef const void *LPCVOID; +typedef void *LPVOID; + +#define HEAP_ZERO_MEMORY 0x00000008 +#define HEAP_REALLOC_IN_PLACE_ONLY 0x00000010 + #include "asan_allocator.h" #include "asan_interceptors.h" @@ -125,7 +134,7 @@ void *_recalloc_base(void *p, size_t n, size_t elem_size) { } ALLOCATION_FUNCTION_ATTRIBUTE -size_t _msize(const void *ptr) { +size_t _msize(void *ptr) { GET_CURRENT_PC_BP_SP; (void)sp; return asan_malloc_usable_size(ptr, pc, bp); diff --git a/lib/asan/asan_new_delete.cc b/lib/asan/asan_new_delete.cc index 30efd61a9..e6053c1fe 100644 --- a/lib/asan/asan_new_delete.cc +++ b/lib/asan/asan_new_delete.cc @@ -26,7 +26,7 @@ // anyway by passing extra -export flags to the linker, which is exactly that // dllexport would normally do. We need to export them in order to make the // VS2015 dynamic CRT (MD) work. -#if SANITIZER_WINDOWS +#if SANITIZER_WINDOWS && defined(_MSC_VER) #define CXX_OPERATOR_ATTRIBUTE #define COMMENT_EXPORT(sym) __pragma(comment(linker, "/export:" sym)) #ifdef _WIN64 diff --git a/lib/asan/asan_win.cc b/lib/asan/asan_win.cc index 67125d38a..5661d911c 100644 --- a/lib/asan/asan_win.cc +++ b/lib/asan/asan_win.cc @@ -159,6 +159,14 @@ INTERCEPTOR_WINAPI(DWORD, CreateThread, namespace __asan { void InitializePlatformInterceptors() { + // The interceptors were not designed to be removable, so we have to keep this + // module alive for the life of the process. + HMODULE pinned; + CHECK(GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_PIN, + (LPCWSTR)&InitializePlatformInterceptors, + &pinned)); + ASAN_INTERCEPT_FUNC(CreateThread); ASAN_INTERCEPT_FUNC(SetUnhandledExceptionFilter); diff --git a/lib/builtins/arm/addsf3.S b/lib/builtins/arm/addsf3.S index 362b5c147..74723cbef 100644 --- a/lib/builtins/arm/addsf3.S +++ b/lib/builtins/arm/addsf3.S @@ -178,7 +178,7 @@ LOCAL_LABEL(do_substraction): push {r0, r1, r2, r3} movs r0, r4 - bl __clzsi2 + bl SYMBOL_NAME(__clzsi2) movs r5, r0 pop {r0, r1, r2, r3} // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); diff --git a/lib/builtins/clzdi2.c b/lib/builtins/clzdi2.c index b56d98f5c..1819e6be4 100644 --- a/lib/builtins/clzdi2.c +++ b/lib/builtins/clzdi2.c @@ -16,8 +16,13 @@ /* Returns: the number of leading 0-bits */ -#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__)) -/* gcc resolves __builtin_clz -> __clzdi2 leading to infinite recursion */ +#if !defined(__clang__) && \ + ((defined(__sparc__) && defined(__arch64__)) || \ + defined(__mips64) || \ + (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) +/* On 64-bit architectures with neither a native clz instruction nor a native + * ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than + * __clzsi2, leading to infinite recursion. */ #define __builtin_clz(a) __clzsi2(a) extern si_int __clzsi2(si_int); #endif diff --git a/lib/builtins/ctzdi2.c b/lib/builtins/ctzdi2.c index eecde2971..ef6d7fea1 100644 --- a/lib/builtins/ctzdi2.c +++ b/lib/builtins/ctzdi2.c @@ -16,8 +16,13 @@ /* Returns: the number of trailing 0-bits */ -#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__)) -/* gcc resolves __builtin_ctz -> __ctzdi2 leading to infinite recursion */ +#if !defined(__clang__) && \ + ((defined(__sparc__) && defined(__arch64__)) || \ + defined(__mips64) || \ + (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) +/* On 64-bit architectures with neither a native clz instruction nor a native + * ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than + * __ctzsi2, leading to infinite recursion. */ #define __builtin_ctz(a) __ctzsi2(a) extern si_int __ctzsi2(si_int); #endif diff --git a/lib/builtins/divdc3.c b/lib/builtins/divdc3.c index 3c88390b5..392d6ecac 100644 --- a/lib/builtins/divdc3.c +++ b/lib/builtins/divdc3.c @@ -12,6 +12,8 @@ * ===----------------------------------------------------------------------=== */ +#define DOUBLE_PRECISION +#include "fp_lib.h" #include "int_lib.h" #include "int_math.h" @@ -21,7 +23,7 @@ COMPILER_RT_ABI Dcomplex __divdc3(double __a, double __b, double __c, double __d) { int __ilogbw = 0; - double __logbw = crt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); + double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); if (crt_isfinite(__logbw)) { __ilogbw = (int)__logbw; diff --git a/lib/builtins/divsc3.c b/lib/builtins/divsc3.c index 42a48315e..0d18a256c 100644 --- a/lib/builtins/divsc3.c +++ b/lib/builtins/divsc3.c @@ -12,6 +12,8 @@ *===----------------------------------------------------------------------=== */ +#define SINGLE_PRECISION +#include "fp_lib.h" #include "int_lib.h" #include "int_math.h" @@ -21,7 +23,8 @@ COMPILER_RT_ABI Fcomplex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; - float __logbw = crt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); + float __logbw = + __compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); if (crt_isfinite(__logbw)) { __ilogbw = (int)__logbw; diff --git a/lib/builtins/divtc3.c b/lib/builtins/divtc3.c index 16e538ba4..e5ea00d84 100644 --- a/lib/builtins/divtc3.c +++ b/lib/builtins/divtc3.c @@ -12,6 +12,8 @@ *===----------------------------------------------------------------------=== */ +#define QUAD_PRECISION +#include "fp_lib.h" #include "int_lib.h" #include "int_math.h" @@ -21,7 +23,8 @@ COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b, long double __c, long double __d) { int __ilogbw = 0; - long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + long double __logbw = + __compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); if (crt_isfinite(__logbw)) { __ilogbw = (int)__logbw; diff --git a/lib/builtins/emutls.c b/lib/builtins/emutls.c index 07d436e26..ef95a1c26 100644 --- a/lib/builtins/emutls.c +++ b/lib/builtins/emutls.c @@ -42,6 +42,7 @@ static void emutls_shutdown(emutls_address_array *array); static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_key_t emutls_pthread_key; +static bool emutls_key_created = false; typedef unsigned int gcc_word __attribute__((mode(word))); typedef unsigned int gcc_pointer __attribute__((mode(pointer))); @@ -109,6 +110,7 @@ static void emutls_key_destructor(void* ptr) { static __inline void emutls_init(void) { if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) abort(); + emutls_key_created = true; } static __inline void emutls_init_once(void) { @@ -390,3 +392,14 @@ void* __emutls_get_address(__emutls_control* control) { array->data[index] = emutls_allocate_object(control); return array->data[index]; } + +#ifdef __BIONIC__ +/* Called by Bionic on dlclose to delete the emutls pthread key. */ +__attribute__((visibility("hidden"))) +void __emutls_unregister_key(void) { + if (emutls_key_created) { + pthread_key_delete(emutls_pthread_key); + emutls_key_created = false; + } +} +#endif diff --git a/lib/builtins/fp_lib.h b/lib/builtins/fp_lib.h index 223fb980a..a0e19ab6a 100644 --- a/lib/builtins/fp_lib.h +++ b/lib/builtins/fp_lib.h @@ -25,6 +25,7 @@ #include <stdbool.h> #include <limits.h> #include "int_lib.h" +#include "int_math.h" // x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in // 32-bit mode. @@ -265,6 +266,62 @@ static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int *hi = 0; } } + +// Implements logb methods (logb, logbf, logbl) for IEEE-754. This avoids +// pulling in a libm dependency from compiler-rt, but is not meant to replace +// it (i.e. code calling logb() should get the one from libm, not this), hence +// the __compiler_rt prefix. +static __inline fp_t __compiler_rt_logbX(fp_t x) { + rep_t rep = toRep(x); + int exp = (rep & exponentMask) >> significandBits; + + // Abnormal cases: + // 1) +/- inf returns +inf; NaN returns NaN + // 2) 0.0 returns -inf + if (exp == maxExponent) { + if (((rep & signBit) == 0) || (x != x)) { + return x; // NaN or +inf: return x + } else { + return -x; // -inf: return -x + } + } else if (x == 0.0) { + // 0.0: return -inf + return fromRep(infRep | signBit); + } + + if (exp != 0) { + // Normal number + return exp - exponentBias; // Unbias exponent + } else { + // Subnormal number; normalize and repeat + rep &= absMask; + const int shift = 1 - normalize(&rep); + exp = (rep & exponentMask) >> significandBits; + return exp - exponentBias - shift; // Unbias exponent + } +} +#endif + +#if defined(SINGLE_PRECISION) +static __inline fp_t __compiler_rt_logbf(fp_t x) { + return __compiler_rt_logbX(x); +} +#elif defined(DOUBLE_PRECISION) +static __inline fp_t __compiler_rt_logb(fp_t x) { + return __compiler_rt_logbX(x); +} +#elif defined(QUAD_PRECISION) + #if defined(CRT_LDBL_128BIT) +static __inline fp_t __compiler_rt_logbl(fp_t x) { + return __compiler_rt_logbX(x); +} + #else +// The generic implementation only works for ieee754 floating point. For other +// floating point types, continue to rely on the libm implementation for now. +static __inline long double __compiler_rt_logbl(long double x) { + return crt_logbl(x); +} + #endif #endif #endif // FP_LIB_HEADER diff --git a/lib/builtins/int_math.h b/lib/builtins/int_math.h index fc81fb7f0..aa3d0721a 100644 --- a/lib/builtins/int_math.h +++ b/lib/builtins/int_math.h @@ -92,12 +92,8 @@ #endif #if defined(_MSC_VER) && !defined(__clang__) -#define crt_logb(x) logb((x)) -#define crt_logbf(x) logbf((x)) #define crt_logbl(x) logbl((x)) #else -#define crt_logb(x) __builtin_logb((x)) -#define crt_logbf(x) __builtin_logbf((x)) #define crt_logbl(x) __builtin_logbl((x)) #endif diff --git a/lib/builtins/int_util.c b/lib/builtins/int_util.c index de87410db..752f20155 100644 --- a/lib/builtins/int_util.c +++ b/lib/builtins/int_util.c @@ -27,7 +27,7 @@ NORETURN extern void panic(const char *, ...); #ifndef _WIN32 __attribute__((visibility("hidden"))) #endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { +void __compilerrt_abort_impl(const char *file, int line, const char *function) { panic("%s:%d: abort in %s", file, line, function); } @@ -41,7 +41,7 @@ NORETURN extern void __assert_rtn(const char *func, const char *file, int line, __attribute__((weak)) __attribute__((visibility("hidden"))) #endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { +void __compilerrt_abort_impl(const char *file, int line, const char *function) { __assert_rtn(function, file, line, "libcompiler_rt abort"); } @@ -51,7 +51,7 @@ void compilerrt_abort_impl(const char *file, int line, const char *function) { __attribute__((weak)) __attribute__((visibility("hidden"))) #endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { +void __compilerrt_abort_impl(const char *file, int line, const char *function) { __builtin_trap(); } @@ -64,7 +64,7 @@ void compilerrt_abort_impl(const char *file, int line, const char *function) { __attribute__((weak)) __attribute__((visibility("hidden"))) #endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { +void __compilerrt_abort_impl(const char *file, int line, const char *function) { abort(); } diff --git a/lib/builtins/int_util.h b/lib/builtins/int_util.h index a7b20ed66..c3c87381a 100644 --- a/lib/builtins/int_util.h +++ b/lib/builtins/int_util.h @@ -20,10 +20,10 @@ #define INT_UTIL_H /** \brief Trigger a program abort (or panic for kernel code). */ -#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__) +#define compilerrt_abort() __compilerrt_abort_impl(__FILE__, __LINE__, __func__) -NORETURN void compilerrt_abort_impl(const char *file, int line, - const char *function); +NORETURN void __compilerrt_abort_impl(const char *file, int line, + const char *function); #define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) #define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) diff --git a/lib/builtins/ppc/divtc3.c b/lib/builtins/ppc/divtc3.c index 8ec41c528..ef532b841 100644 --- a/lib/builtins/ppc/divtc3.c +++ b/lib/builtins/ppc/divtc3.c @@ -4,6 +4,11 @@ #include "DD.h" #include "../int_math.h" +// Use DOUBLE_PRECISION because the soft-fp method we use is logb (on the upper +// half of the long doubles), even though this file defines complex division for +// 128-bit floats. +#define DOUBLE_PRECISION +#include "../fp_lib.h" #if !defined(CRT_INFINITY) && defined(HUGE_VAL) #define CRT_INFINITY HUGE_VAL @@ -21,9 +26,10 @@ __divtc3(long double a, long double b, long double c, long double d) DD dDD = { .ld = d }; int ilogbw = 0; - const double logbw = crt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi) )); - - if (crt_isfinite(logbw)) + const double logbw = __compiler_rt_logb( + crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi))); + + if (crt_isfinite(logbw)) { ilogbw = (int)logbw; diff --git a/lib/fuzzer/CMakeLists.txt b/lib/fuzzer/CMakeLists.txt index 679318e46..aae3df8db 100644 --- a/lib/fuzzer/CMakeLists.txt +++ b/lib/fuzzer/CMakeLists.txt @@ -3,7 +3,7 @@ set(LIBFUZZER_SOURCES FuzzerDataFlowTrace.cpp FuzzerDriver.cpp FuzzerExtFunctionsDlsym.cpp - FuzzerExtFunctionsDlsymWin.cpp + FuzzerExtFunctionsWeakAlias.cpp FuzzerExtFunctionsWeak.cpp FuzzerExtraCounters.cpp FuzzerIO.cpp diff --git a/lib/fuzzer/FuzzerDefs.h b/lib/fuzzer/FuzzerDefs.h index a35c7a181..31655d562 100644 --- a/lib/fuzzer/FuzzerDefs.h +++ b/lib/fuzzer/FuzzerDefs.h @@ -129,8 +129,15 @@ #if LIBFUZZER_WINDOWS #define ATTRIBUTE_INTERFACE __declspec(dllexport) +// This is used for __sancov_lowest_stack which is needed for +// -fsanitize-coverage=stack-depth. That feature is not yet available on +// Windows, so make the symbol static to avoid linking errors. +#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC \ + __attribute__((tls_model("initial-exec"))) thread_local static #else #define ATTRIBUTE_INTERFACE __attribute__((visibility("default"))) +#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC \ + ATTRIBUTE_INTERFACE __attribute__((tls_model("initial-exec"))) thread_local #endif namespace fuzzer { diff --git a/lib/fuzzer/FuzzerDriver.cpp b/lib/fuzzer/FuzzerDriver.cpp index d11f9a606..918a972ff 100644 --- a/lib/fuzzer/FuzzerDriver.cpp +++ b/lib/fuzzer/FuzzerDriver.cpp @@ -615,8 +615,6 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.PrintNewCovPcs = Flags.print_pcs; Options.PrintNewCovFuncs = Flags.print_funcs; Options.PrintFinalStats = Flags.print_final_stats; - Options.PrintMutationStats = Flags.print_mutation_stats; - Options.UseWeightedMutations = Flags.use_weighted_mutations; Options.PrintCorpusStats = Flags.print_corpus_stats; Options.PrintCoverage = Flags.print_coverage; Options.PrintUnstableStats = Flags.print_unstable_stats; diff --git a/lib/fuzzer/FuzzerExtFunctionsDlsymWin.cpp b/lib/fuzzer/FuzzerExtFunctionsDlsymWin.cpp deleted file mode 100644 index 321b3ec5d..000000000 --- a/lib/fuzzer/FuzzerExtFunctionsDlsymWin.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===- FuzzerExtFunctionsDlsymWin.cpp - Interface to external functions ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation using dynamic loading for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include "Windows.h" - -// This must be included after Windows.h. -#include "Psapi.h" - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { - HMODULE Modules[1024]; - DWORD BytesNeeded; - HANDLE CurrentProcess = GetCurrentProcess(); - - if (!EnumProcessModules(CurrentProcess, Modules, sizeof(Modules), - &BytesNeeded)) { - Printf("EnumProcessModules failed (error: %d).\n", GetLastError()); - exit(1); - } - - if (sizeof(Modules) < BytesNeeded) { - Printf("Error: the array is not big enough to hold all loaded modules.\n"); - exit(1); - } - - for (size_t i = 0; i < (BytesNeeded / sizeof(HMODULE)); i++) - { - FARPROC Fn; -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - if (this->NAME == nullptr) { \ - Fn = GetProcAddress(Modules[i], #NAME); \ - if (Fn == nullptr) \ - Fn = GetProcAddress(Modules[i], #NAME "__dll"); \ - this->NAME = (decltype(ExternalFunctions::NAME)) Fn; \ - } -#include "FuzzerExtFunctions.def" -#undef EXT_FUNC - } - -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - if (this->NAME == nullptr && WARN) \ - Printf("WARNING: Failed to find function \"%s\".\n", #NAME); -#include "FuzzerExtFunctions.def" -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff --git a/lib/fuzzer/FuzzerExtFunctionsWeak.cpp b/lib/fuzzer/FuzzerExtFunctionsWeak.cpp index a4e56fc27..6a6ef4932 100644 --- a/lib/fuzzer/FuzzerExtFunctionsWeak.cpp +++ b/lib/fuzzer/FuzzerExtFunctionsWeak.cpp @@ -22,7 +22,7 @@ extern "C" { // Declare these symbols as weak to allow them to be optionally defined. #define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - __attribute__((weak)) RETURN_TYPE NAME FUNC_SIG + __attribute__((weak, visibility("default"))) RETURN_TYPE NAME FUNC_SIG #include "FuzzerExtFunctions.def" diff --git a/lib/fuzzer/FuzzerFlags.def b/lib/fuzzer/FuzzerFlags.def index 258427c3f..0417dda5b 100644 --- a/lib/fuzzer/FuzzerFlags.def +++ b/lib/fuzzer/FuzzerFlags.def @@ -162,6 +162,3 @@ FUZZER_DEPRECATED_FLAG(use_equivalence_server) FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace") -FUZZER_FLAG_INT(print_mutation_stats, 0, "Experimental") -FUZZER_FLAG_INT(use_weighted_mutations, 0, "Experimental: If 1, fuzzing will " - "favor mutations that perform better during runtime.") diff --git a/lib/fuzzer/FuzzerIO.cpp b/lib/fuzzer/FuzzerIO.cpp index f3ead0ec5..dac5ec658 100644 --- a/lib/fuzzer/FuzzerIO.cpp +++ b/lib/fuzzer/FuzzerIO.cpp @@ -100,14 +100,6 @@ std::string DirPlusFile(const std::string &DirPath, return DirPath + GetSeparator() + FileName; } -std::string Basename(const std::string &Path, char Separator) { - size_t Pos = Path.rfind(Separator); - if (Pos == std::string::npos) - return Path; - assert(Pos < Path.size()); - return Path.substr(Pos + 1); -} - void DupAndCloseStderr() { int OutputFd = DuplicateFile(2); if (OutputFd > 0) { diff --git a/lib/fuzzer/FuzzerIO.h b/lib/fuzzer/FuzzerIO.h index 6d7757435..b4a68190e 100644 --- a/lib/fuzzer/FuzzerIO.h +++ b/lib/fuzzer/FuzzerIO.h @@ -68,7 +68,7 @@ void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V); char GetSeparator(); // Similar to the basename utility: returns the file name w/o the dir prefix. -std::string Basename(const std::string &Path, char Separator = GetSeparator()); +std::string Basename(const std::string &Path); FILE* OpenFile(int Fd, const char *Mode); diff --git a/lib/fuzzer/FuzzerIOPosix.cpp b/lib/fuzzer/FuzzerIOPosix.cpp index 17e884d3c..401b4cbbf 100644 --- a/lib/fuzzer/FuzzerIOPosix.cpp +++ b/lib/fuzzer/FuzzerIOPosix.cpp @@ -46,6 +46,13 @@ size_t FileSize(const std::string &Path) { return St.st_size; } +std::string Basename(const std::string &Path) { + size_t Pos = Path.rfind(GetSeparator()); + if (Pos == std::string::npos) return Path; + assert(Pos < Path.size()); + return Path.substr(Pos + 1); +} + void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector<std::string> *V, bool TopDir) { auto E = GetEpoch(Dir); diff --git a/lib/fuzzer/FuzzerIOWindows.cpp b/lib/fuzzer/FuzzerIOWindows.cpp index 74853646b..75dcaf72a 100644 --- a/lib/fuzzer/FuzzerIOWindows.cpp +++ b/lib/fuzzer/FuzzerIOWindows.cpp @@ -72,6 +72,26 @@ bool IsFile(const std::string &Path) { return IsFile(Path, Att); } +std::string Basename(const std::string &Path) { + size_t Pos = Path.find_last_of("/\\"); + if (Pos == std::string::npos) return Path; + assert(Pos < Path.size()); + return Path.substr(Pos + 1); +} + +size_t FileSize(const std::string &Path) { + WIN32_FILE_ATTRIBUTE_DATA attr; + if (!GetFileAttributesExA(Path.c_str(), GetFileExInfoStandard, &attr)) { + Printf("GetFileAttributesExA() failed for \"%s\" (Error code: %lu).\n", + Path.c_str(), GetLastError()); + return 0; + } + ULARGE_INTEGER size; + size.HighPart = attr.nFileSizeHigh; + size.LowPart = attr.nFileSizeLow; + return size.QuadPart; +} + void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector<std::string> *V, bool TopDir) { auto E = GetEpoch(Dir); @@ -91,7 +111,7 @@ void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, { if (GetLastError() == ERROR_FILE_NOT_FOUND) return; - Printf("No such directory: %s; exiting\n", Dir.c_str()); + Printf("No such file or directory: %s; exiting\n", Dir.c_str()); exit(1); } diff --git a/lib/fuzzer/FuzzerLoop.cpp b/lib/fuzzer/FuzzerLoop.cpp index c7b13d1e5..7b98f55ae 100644 --- a/lib/fuzzer/FuzzerLoop.cpp +++ b/lib/fuzzer/FuzzerLoop.cpp @@ -38,7 +38,6 @@ namespace fuzzer { static const size_t kMaxUnitSizeToPrint = 256; -static const size_t kUpdateMutationWeightRuns = 10000; thread_local bool Fuzzer::IsMyThread; @@ -361,7 +360,6 @@ void Fuzzer::PrintFinalStats() { TPC.DumpCoverage(); if (Options.PrintCorpusStats) Corpus.PrintStats(); - if (Options.PrintMutationStats) MD.PrintMutationStats(); if (!Options.PrintFinalStats) return; size_t ExecPerSec = execPerSec(); @@ -550,9 +548,6 @@ static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) { void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { TPC.RecordInitialStack(); - if (Options.UseWeightedMutations && - TotalNumberOfRuns % kUpdateMutationWeightRuns == 0) - MD.UpdateDistribution(); TotalNumberOfRuns++; assert(InFuzzingThread()); if (SMR.IsClient()) diff --git a/lib/fuzzer/FuzzerMutate.cpp b/lib/fuzzer/FuzzerMutate.cpp index fac3c7afb..142b2b0b0 100644 --- a/lib/fuzzer/FuzzerMutate.cpp +++ b/lib/fuzzer/FuzzerMutate.cpp @@ -30,41 +30,34 @@ MutationDispatcher::MutationDispatcher(Random &Rand, DefaultMutators.insert( DefaultMutators.begin(), { - // Initialize useful and total mutation counts as 1 in order to - // have mutation stats (i.e. weights) with equal non-zero values. - {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes", 1, 1}, - {&MutationDispatcher::Mutate_InsertByte, "InsertByte", 1, 1}, + {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"}, + {&MutationDispatcher::Mutate_InsertByte, "InsertByte"}, {&MutationDispatcher::Mutate_InsertRepeatedBytes, - "InsertRepeatedBytes", 1, 1}, - {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte", 1, 1}, - {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit", 1, 1}, - {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes", 1, 1}, - {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt", 1, - 1}, - {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt", 1, - 1}, - {&MutationDispatcher::Mutate_CopyPart, "CopyPart", 1, 1}, - {&MutationDispatcher::Mutate_CrossOver, "CrossOver", 1, 1}, + "InsertRepeatedBytes"}, + {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}, + {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}, + {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}, + {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}, + {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"}, + {&MutationDispatcher::Mutate_CopyPart, "CopyPart"}, + {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, {&MutationDispatcher::Mutate_AddWordFromManualDictionary, - "ManualDict", 1, 1}, + "ManualDict"}, {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, - "PersAutoDict", 1, 1}, + "PersAutoDict"}, }); if(Options.UseCmp) DefaultMutators.push_back( - {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP", 1, 1}); + {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"}); if (EF->LLVMFuzzerCustomMutator) - Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom", 1, 1}); + Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"}); else Mutators = DefaultMutators; if (EF->LLVMFuzzerCustomCrossOver) Mutators.push_back( - {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver", 1, 1}); - - // For weighted mutation selection, init with uniform weights distribution. - Stats.resize(Mutators.size()); + {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"}); } static char RandCh(Random &Rand) { @@ -471,7 +464,6 @@ void MutationDispatcher::RecordSuccessfulMutationSequence() { if (!PersistentAutoDictionary.ContainsWord(DE->GetW())) PersistentAutoDictionary.push_back({DE->GetW(), 1}); } - RecordUsefulMutations(); } void MutationDispatcher::PrintRecommendedDictionary() { @@ -492,7 +484,8 @@ void MutationDispatcher::PrintRecommendedDictionary() { void MutationDispatcher::PrintMutationSequence() { Printf("MS: %zd ", CurrentMutatorSequence.size()); - for (auto M : CurrentMutatorSequence) Printf("%s-", M->Name); + for (auto M : CurrentMutatorSequence) + Printf("%s-", M.Name); if (!CurrentDictionaryEntrySequence.empty()) { Printf(" DE: "); for (auto DE : CurrentDictionaryEntrySequence) { @@ -519,20 +512,13 @@ size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), // in which case they will return 0. // Try several times before returning un-mutated data. - Mutator *M = nullptr; for (int Iter = 0; Iter < 100; Iter++) { - // Even when using weighted mutations, fallback to the default selection in - // 20% of cases. - if (Options.UseWeightedMutations && Rand(5)) - M = &Mutators[WeightedIndex()]; - else - M = &Mutators[Rand(Mutators.size())]; - size_t NewSize = (this->*(M->Fn))(Data, Size, MaxSize); + auto M = Mutators[Rand(Mutators.size())]; + size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); if (NewSize && NewSize <= MaxSize) { if (Options.OnlyASCII) ToASCII(Data, NewSize); CurrentMutatorSequence.push_back(M); - M->TotalCount++; return NewSize; } } @@ -573,34 +559,4 @@ void MutationDispatcher::AddWordToManualDictionary(const Word &W) { {W, std::numeric_limits<size_t>::max()}); } -void MutationDispatcher::RecordUsefulMutations() { - for (auto M : CurrentMutatorSequence) M->UsefulCount++; -} - -void MutationDispatcher::PrintMutationStats() { - Printf("\nstat::mutation_usefulness: "); - UpdateMutationStats(); - for (size_t i = 0; i < Stats.size(); i++) { - Printf("%.3f", 100 * Stats[i]); - if (i < Stats.size() - 1) - Printf(","); - else - Printf("\n"); - } -} - -void MutationDispatcher::UpdateMutationStats() { - // Calculate usefulness statistic for each mutation - for (size_t i = 0; i < Stats.size(); i++) - Stats[i] = - static_cast<double>(Mutators[i].UsefulCount) / Mutators[i].TotalCount; -} - -void MutationDispatcher::UpdateDistribution() { - UpdateMutationStats(); - Distribution = std::discrete_distribution<size_t>(Stats.begin(), Stats.end()); -} - -size_t MutationDispatcher::WeightedIndex() { return Distribution(GetRand()); } - } // namespace fuzzer diff --git a/lib/fuzzer/FuzzerMutate.h b/lib/fuzzer/FuzzerMutate.h index d89667cc4..a51c7fb44 100644 --- a/lib/fuzzer/FuzzerMutate.h +++ b/lib/fuzzer/FuzzerMutate.h @@ -93,29 +93,10 @@ public: Random &GetRand() { return Rand; } - /// Records tally of mutations resulting in new coverage, for usefulness - /// metric. - void RecordUsefulMutations(); - - /// Outputs usefulness stats on command line if option is enabled. - void PrintMutationStats(); - - /// Recalculates mutation stats based on latest run data. - void UpdateMutationStats(); - - /// Sets weights based on mutation performance during fuzzer run. - void UpdateDistribution(); - - /// Returns the index of a mutation based on how useful it has been. - /// Favors mutations with higher usefulness ratios but can return any index. - size_t WeightedIndex(); - private: struct Mutator { size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); const char *Name; - uint64_t UsefulCount; - uint64_t TotalCount; }; size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, @@ -154,7 +135,6 @@ public: Dictionary PersistentAutoDictionary; Vector<DictionaryEntry *> CurrentDictionaryEntrySequence; - Vector<Mutator *> CurrentMutatorSequence; static const size_t kCmpDictionaryEntriesDequeSize = 16; DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize]; @@ -169,10 +149,7 @@ public: Vector<Mutator> Mutators; Vector<Mutator> DefaultMutators; - - // Used to weight mutations based on usefulness. - Vector<double> Stats; - std::discrete_distribution<size_t> Distribution; + Vector<Mutator> CurrentMutatorSequence; }; } // namespace fuzzer diff --git a/lib/fuzzer/FuzzerOptions.h b/lib/fuzzer/FuzzerOptions.h index 82855ce0b..bb642f1e2 100644 --- a/lib/fuzzer/FuzzerOptions.h +++ b/lib/fuzzer/FuzzerOptions.h @@ -52,8 +52,6 @@ struct FuzzingOptions { bool PrintNewCovPcs = false; int PrintNewCovFuncs = 0; bool PrintFinalStats = false; - bool PrintMutationStats = false; - bool UseWeightedMutations = false; bool PrintCorpusStats = false; bool PrintCoverage = false; bool PrintUnstableStats = false; diff --git a/lib/fuzzer/FuzzerTracePC.cpp b/lib/fuzzer/FuzzerTracePC.cpp index 1aba816e8..75130840c 100644 --- a/lib/fuzzer/FuzzerTracePC.cpp +++ b/lib/fuzzer/FuzzerTracePC.cpp @@ -32,8 +32,7 @@ ATTRIBUTE_INTERFACE uintptr_t __sancov_trace_pc_pcs[fuzzer::TracePC::kNumPCs]; // Used by -fsanitize-coverage=stack-depth to track stack depth -ATTRIBUTE_INTERFACE __attribute__((tls_model("initial-exec"))) -thread_local uintptr_t __sancov_lowest_stack; +ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC uintptr_t __sancov_lowest_stack; namespace fuzzer { diff --git a/lib/fuzzer/FuzzerUtilFuchsia.cpp b/lib/fuzzer/FuzzerUtilFuchsia.cpp index cd2bb7438..b9c70e461 100644 --- a/lib/fuzzer/FuzzerUtilFuchsia.cpp +++ b/lib/fuzzer/FuzzerUtilFuchsia.cpp @@ -49,9 +49,6 @@ void CrashTrampolineAsm() __asm__("CrashTrampolineAsm"); namespace { -// TODO(phosek): remove this and replace it with ZX_TIME_INFINITE -#define ZX_TIME_INFINITE_OLD INT64_MAX - // A magic value for the Zircon exception port, chosen to spell 'FUZZING' // when interpreted as a byte sequence on little-endian platforms. const uint64_t kFuzzingCrash = 0x474e495a5a5546; @@ -237,7 +234,7 @@ void CrashHandler(zx_handle_t *Event) { "_zx_object_signal"); zx_port_packet_t Packet; - ExitOnErr(_zx_port_wait(Port.Handle, ZX_TIME_INFINITE_OLD, &Packet), + ExitOnErr(_zx_port_wait(Port.Handle, ZX_TIME_INFINITE, &Packet), "_zx_port_wait"); // At this point, we want to get the state of the crashing thread, but @@ -315,8 +312,8 @@ void SetSignalHandler(const FuzzingOptions &Options) { ExitOnErr(_zx_event_create(0, &Event), "_zx_event_create"); std::thread T(CrashHandler, &Event); - zx_status_t Status = _zx_object_wait_one(Event, ZX_USER_SIGNAL_0, - ZX_TIME_INFINITE_OLD, nullptr); + zx_status_t Status = + _zx_object_wait_one(Event, ZX_USER_SIGNAL_0, ZX_TIME_INFINITE, nullptr); _zx_handle_close(Event); ExitOnErr(Status, "_zx_object_wait_one"); @@ -440,7 +437,7 @@ int ExecuteCommand(const Command &Cmd) { // Now join the process and return the exit status. if ((rc = _zx_object_wait_one(ProcessHandle, ZX_PROCESS_TERMINATED, - ZX_TIME_INFINITE_OLD, nullptr)) != ZX_OK) { + ZX_TIME_INFINITE, nullptr)) != ZX_OK) { Printf("libFuzzer: failed to join '%s': %s\n", Argv[0], _zx_status_get_string(rc)); return rc; diff --git a/lib/fuzzer/FuzzerUtilWindows.cpp b/lib/fuzzer/FuzzerUtilWindows.cpp index 8227e778e..393b4768b 100644 --- a/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/lib/fuzzer/FuzzerUtilWindows.cpp @@ -24,7 +24,7 @@ #include <windows.h> // This must be included after windows.h. -#include <Psapi.h> +#include <psapi.h> namespace fuzzer { @@ -179,7 +179,9 @@ const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, } std::string DisassembleCmd(const std::string &FileName) { - if (ExecuteCommand("dumpbin /summary > nul") == 0) + Vector<std::string> command_vector; + command_vector.push_back("dumpbin /summary > nul"); + if (ExecuteCommand(Command(command_vector)) == 0) return "dumpbin /disasm " + FileName; Printf("libFuzzer: couldn't find tool to disassemble (dumpbin)\n"); exit(1); diff --git a/lib/fuzzer/tests/CMakeLists.txt b/lib/fuzzer/tests/CMakeLists.txt index ed5807168..0b561c170 100644 --- a/lib/fuzzer/tests/CMakeLists.txt +++ b/lib/fuzzer/tests/CMakeLists.txt @@ -17,6 +17,8 @@ list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS --driver-mode=g++) if(APPLE OR CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS -lc++ -lpthread) +elseif(WIN32) + list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS -lstdc++) else() list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS -lstdc++ -lpthread) endif() diff --git a/lib/fuzzer/tests/FuzzerUnittest.cpp b/lib/fuzzer/tests/FuzzerUnittest.cpp index e3b067026..7cdd44582 100644 --- a/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -34,6 +34,13 @@ TEST(Fuzzer, Basename) { EXPECT_EQ(Basename("/bar"), "bar"); EXPECT_EQ(Basename("foo/x"), "x"); EXPECT_EQ(Basename("foo/"), ""); +#if LIBFUZZER_WINDOWS + EXPECT_EQ(Basename("foo\\bar"), "bar"); + EXPECT_EQ(Basename("foo\\bar/baz"), "baz"); + EXPECT_EQ(Basename("\\bar"), "bar"); + EXPECT_EQ(Basename("foo\\x"), "x"); + EXPECT_EQ(Basename("foo\\"), ""); +#endif } TEST(Fuzzer, CrossOver) { diff --git a/lib/hwasan/CMakeLists.txt b/lib/hwasan/CMakeLists.txt index 6d1682bbf..3c00eddfb 100644 --- a/lib/hwasan/CMakeLists.txt +++ b/lib/hwasan/CMakeLists.txt @@ -10,6 +10,7 @@ set(HWASAN_RTL_SOURCES hwasan_poisoning.cc hwasan_report.cc hwasan_thread.cc + hwasan_thread_list.cc ) set(HWASAN_RTL_CXX_SOURCES @@ -25,8 +26,12 @@ set(HWASAN_RTL_HEADERS hwasan_mapping.h hwasan_poisoning.h hwasan_report.h - hwasan_thread.h) + hwasan_thread.h + hwasan_thread_list.h + ) +set(HWASAN_DEFINITIONS) +append_list_if(COMPILER_RT_HWASAN_WITH_INTERCEPTORS HWASAN_WITH_INTERCEPTORS=1 HWASAN_DEFINITIONS) set(HWASAN_RTL_CFLAGS ${SANITIZER_COMMON_CFLAGS}) append_rtti_flag(OFF HWASAN_RTL_CFLAGS) @@ -55,23 +60,27 @@ add_compiler_rt_object_libraries(RTHwasan ARCHS ${HWASAN_SUPPORTED_ARCH} SOURCES ${HWASAN_RTL_SOURCES} ADDITIONAL_HEADERS ${HWASAN_RTL_HEADERS} - CFLAGS ${HWASAN_RTL_CFLAGS}) + CFLAGS ${HWASAN_RTL_CFLAGS} + DEFS ${HWASAN_DEFINITIONS}) add_compiler_rt_object_libraries(RTHwasan_cxx ARCHS ${HWASAN_SUPPORTED_ARCH} SOURCES ${HWASAN_RTL_CXX_SOURCES} ADDITIONAL_HEADERS ${HWASAN_RTL_HEADERS} - CFLAGS ${HWASAN_RTL_CFLAGS}) + CFLAGS ${HWASAN_RTL_CFLAGS} + DEFS ${HWASAN_DEFINITIONS}) add_compiler_rt_object_libraries(RTHwasan_dynamic ARCHS ${HWASAN_SUPPORTED_ARCH} SOURCES ${HWASAN_RTL_SOURCES} ${HWASAN_RTL_CXX_SOURCES} ADDITIONAL_HEADERS ${HWASAN_RTL_HEADERS} - CFLAGS ${HWASAN_DYNAMIC_CFLAGS}) + CFLAGS ${HWASAN_DYNAMIC_CFLAGS} + DEFS ${HWASAN_DEFINITIONS}) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cc "") add_compiler_rt_object_libraries(RTHwasan_dynamic_version_script_dummy ARCHS ${HWASAN_SUPPORTED_ARCH} SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dummy.cc - CFLAGS ${HWASAN_DYNAMIC_CFLAGS}) + CFLAGS ${HWASAN_DYNAMIC_CFLAGS} + DEFS ${HWASAN_DEFINITIONS}) foreach(arch ${HWASAN_SUPPORTED_ARCH}) add_compiler_rt_runtime(clang_rt.hwasan diff --git a/lib/hwasan/hwasan.cc b/lib/hwasan/hwasan.cc index 3d8a8e888..02aee4d61 100644 --- a/lib/hwasan/hwasan.cc +++ b/lib/hwasan/hwasan.cc @@ -17,6 +17,7 @@ #include "hwasan_poisoning.h" #include "hwasan_report.h" #include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_flags.h" @@ -36,17 +37,17 @@ using namespace __sanitizer; namespace __hwasan { void EnterSymbolizer() { - HwasanThread *t = GetCurrentThread(); + Thread *t = GetCurrentThread(); CHECK(t); t->EnterSymbolizer(); } void ExitSymbolizer() { - HwasanThread *t = GetCurrentThread(); + Thread *t = GetCurrentThread(); CHECK(t); t->LeaveSymbolizer(); } bool IsInSymbolizer() { - HwasanThread *t = GetCurrentThread(); + Thread *t = GetCurrentThread(); return t && t->InSymbolizer(); } @@ -87,7 +88,18 @@ static void InitializeFlags() { cf.check_printf = false; cf.intercept_tls_get_addr = true; cf.exitcode = 99; + // Sigtrap is used in error reporting. cf.handle_sigtrap = kHandleSignalExclusive; + +#if SANITIZER_ANDROID + // Let platform handle other signals. It is better at reporting them then we + // are. + cf.handle_segv = kHandleSignalNo; + cf.handle_sigbus = kHandleSignalNo; + cf.handle_abort = kHandleSignalNo; + cf.handle_sigill = kHandleSignalNo; + cf.handle_sigfpe = kHandleSignalNo; +#endif OverrideCommonFlags(cf); } @@ -120,7 +132,8 @@ static void InitializeFlags() { #if HWASAN_CONTAINS_UBSAN ubsan_parser.ParseString(GetEnv("UBSAN_OPTIONS")); #endif - VPrintf(1, "HWASAN_OPTIONS: %s\n", hwasan_options ? hwasan_options : "<empty>"); + VPrintf(1, "HWASAN_OPTIONS: %s\n", + hwasan_options ? hwasan_options : "<empty>"); InitializeCommonFlags(); @@ -131,8 +144,13 @@ static void InitializeFlags() { void GetStackTrace(BufferedStackTrace *stack, uptr max_s, uptr pc, uptr bp, void *context, bool request_fast_unwind) { - HwasanThread *t = GetCurrentThread(); - if (!t || !StackTrace::WillUseFastUnwind(request_fast_unwind)) { + Thread *t = GetCurrentThread(); + if (!t) { + // the thread is still being created. + stack->size = 0; + return; + } + if (!StackTrace::WillUseFastUnwind(request_fast_unwind)) { // Block reports from our interceptors during _Unwind_Backtrace. SymbolizerScope sym_scope; return stack->Unwind(max_s, pc, bp, context, 0, 0, request_fast_unwind); @@ -154,6 +172,54 @@ static void HWAsanCheckFailed(const char *file, int line, const char *cond, Die(); } +static constexpr uptr kMemoryUsageBufferSize = 4096; + +static void HwasanFormatMemoryUsage(InternalScopedString &s) { + HwasanThreadList &thread_list = hwasanThreadList(); + auto thread_stats = thread_list.GetThreadStats(); + auto *sds = StackDepotGetStats(); + AllocatorStatCounters asc; + GetAllocatorStats(asc); + s.append( + "HWASAN pid: %d rss: %zd threads: %zd stacks: %zd" + " thr_aux: %zd stack_depot: %zd uniq_stacks: %zd" + " heap: %zd", + internal_getpid(), GetRSS(), thread_stats.n_live_threads, + thread_stats.total_stack_size, + thread_stats.n_live_threads * thread_list.MemoryUsedPerThread(), + sds->allocated, sds->n_uniq_ids, asc[AllocatorStatMapped]); +} + +#if SANITIZER_ANDROID +static char *memory_usage_buffer = nullptr; + +#define PR_SET_VMA 0x53564d41 +#define PR_SET_VMA_ANON_NAME 0 + +static void InitMemoryUsage() { + memory_usage_buffer = + (char *)MmapOrDie(kMemoryUsageBufferSize, "memory usage string"); + CHECK(memory_usage_buffer); + memory_usage_buffer[0] = '\0'; + CHECK(internal_prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, + (uptr)memory_usage_buffer, kMemoryUsageBufferSize, + (uptr)memory_usage_buffer) == 0); +} + +void UpdateMemoryUsage() { + if (!flags()->export_memory_stats) + return; + if (!memory_usage_buffer) + InitMemoryUsage(); + InternalScopedString s(kMemoryUsageBufferSize); + HwasanFormatMemoryUsage(s); + internal_strncpy(memory_usage_buffer, s.data(), kMemoryUsageBufferSize - 1); + memory_usage_buffer[kMemoryUsageBufferSize - 1] = '\0'; +} +#else +void UpdateMemoryUsage() {} +#endif + } // namespace __hwasan // Interface. @@ -166,12 +232,6 @@ void __hwasan_shadow_init() { if (hwasan_shadow_inited) return; if (!InitShadow()) { Printf("FATAL: HWAddressSanitizer cannot mmap the shadow memory.\n"); - if (HWASAN_FIXED_MAPPING) { - Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n"); - Printf("FATAL: Disabling ASLR is known to cause this error.\n"); - Printf("FATAL: If running under GDB, try " - "'set disable-randomization off'.\n"); - } DumpProcessMap(); Die(); } @@ -195,7 +255,12 @@ void __hwasan_init() { __sanitizer_set_report_path(common_flags()->log_path); DisableCoreDumperIfNecessary(); + __hwasan_shadow_init(); + + InitThreads(); + hwasanThreadList().CreateCurrentThread(); + MadviseShadow(); // This may call libc -> needs initialized shadow. @@ -209,14 +274,11 @@ void __hwasan_init() { InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir); - HwasanTSDInit(HwasanTSDDtor); + HwasanTSDInit(); + HwasanTSDThreadInit(); HwasanAllocatorInit(); - HwasanThread *main_thread = HwasanThread::Create(nullptr, nullptr); - SetCurrentThread(main_thread); - main_thread->ThreadStart(); - #if HWASAN_CONTAINS_UBSAN __ubsan::InitAsPlugin(); #endif @@ -228,13 +290,13 @@ void __hwasan_init() { } void __hwasan_print_shadow(const void *p, uptr sz) { - uptr ptr_raw = GetAddressFromPointer((uptr)p); - uptr shadow_first = MEM_TO_SHADOW(ptr_raw); - uptr shadow_last = MEM_TO_SHADOW(ptr_raw + sz - 1); + uptr ptr_raw = UntagAddr(reinterpret_cast<uptr>(p)); + uptr shadow_first = MemToShadow(ptr_raw); + uptr shadow_last = MemToShadow(ptr_raw + sz - 1); Printf("HWASan shadow map for %zx .. %zx (pointer tag %x)\n", ptr_raw, ptr_raw + sz, GetTagFromPointer((uptr)p)); for (uptr s = shadow_first; s <= shadow_last; ++s) - Printf(" %zx: %x\n", SHADOW_TO_MEM(s), *(tag_t *)s); + Printf(" %zx: %x\n", ShadowToMem(s), *(tag_t *)s); } sptr __hwasan_test_shadow(const void *p, uptr sz) { @@ -243,12 +305,12 @@ sptr __hwasan_test_shadow(const void *p, uptr sz) { tag_t ptr_tag = GetTagFromPointer((uptr)p); if (ptr_tag == 0) return -1; - uptr ptr_raw = GetAddressFromPointer((uptr)p); - uptr shadow_first = MEM_TO_SHADOW(ptr_raw); - uptr shadow_last = MEM_TO_SHADOW(ptr_raw + sz - 1); + uptr ptr_raw = UntagAddr(reinterpret_cast<uptr>(p)); + uptr shadow_first = MemToShadow(ptr_raw); + uptr shadow_last = MemToShadow(ptr_raw + sz - 1); for (uptr s = shadow_first; s <= shadow_last; ++s) if (*(tag_t*)s != ptr_tag) - return SHADOW_TO_MEM(s) - ptr_raw; + return ShadowToMem(s) - ptr_raw; return -1; } @@ -304,7 +366,7 @@ template <ErrorAction EA, AccessType AT, unsigned LogSize> __attribute__((always_inline, nodebug)) static void CheckAddress(uptr p) { tag_t ptr_tag = GetTagFromPointer(p); uptr ptr_raw = p & ~kAddressTagMask; - tag_t mem_tag = *(tag_t *)MEM_TO_SHADOW(ptr_raw); + tag_t mem_tag = *(tag_t *)MemToShadow(ptr_raw); if (UNLIKELY(ptr_tag != mem_tag)) { SigTrap<0x20 * (EA == ErrorAction::Recover) + 0x10 * (AT == AccessType::Store) + LogSize>(p); @@ -318,8 +380,8 @@ __attribute__((always_inline, nodebug)) static void CheckAddressSized(uptr p, CHECK_NE(0, sz); tag_t ptr_tag = GetTagFromPointer(p); uptr ptr_raw = p & ~kAddressTagMask; - tag_t *shadow_first = (tag_t *)MEM_TO_SHADOW(ptr_raw); - tag_t *shadow_last = (tag_t *)MEM_TO_SHADOW(ptr_raw + sz - 1); + tag_t *shadow_first = (tag_t *)MemToShadow(ptr_raw); + tag_t *shadow_last = (tag_t *)MemToShadow(ptr_raw + sz - 1); for (tag_t *t = shadow_first; t <= shadow_last; ++t) if (UNLIKELY(ptr_tag != *t)) { SigTrap<0x20 * (EA == ErrorAction::Recover) + @@ -430,10 +492,16 @@ void __hwasan_handle_longjmp(const void *sp_dst) { TagMemory(sp, dst - sp, 0); } +void __hwasan_print_memory_usage() { + InternalScopedString s(kMemoryUsageBufferSize); + HwasanFormatMemoryUsage(s); + Printf("%s\n", s.data()); +} + static const u8 kFallbackTag = 0xBB; u8 __hwasan_generate_tag() { - HwasanThread *t = GetCurrentThread(); + Thread *t = GetCurrentThread(); if (!t) return kFallbackTag; return t->GenerateRandomTag(); } diff --git a/lib/hwasan/hwasan.h b/lib/hwasan/hwasan.h index d7dc83b2f..a3da09e88 100644 --- a/lib/hwasan/hwasan.h +++ b/lib/hwasan/hwasan.h @@ -30,6 +30,10 @@ # define HWASAN_CONTAINS_UBSAN CAN_SANITIZE_UB #endif +#ifndef HWASAN_WITH_INTERCEPTORS +#define HWASAN_WITH_INTERCEPTORS 0 +#endif + typedef u8 tag_t; // TBI (Top Byte Ignore) feature of AArch64: bits [63:56] are ignored in address @@ -37,16 +41,21 @@ typedef u8 tag_t; const unsigned kAddressTagShift = 56; const uptr kAddressTagMask = 0xFFUL << kAddressTagShift; +// Minimal alignment of the shadow base address. Determines the space available +// for threads and stack histories. This is an ABI constant. +const unsigned kShadowBaseAlignment = 32; + static inline tag_t GetTagFromPointer(uptr p) { return p >> kAddressTagShift; } -static inline uptr GetAddressFromPointer(uptr p) { - return p & ~kAddressTagMask; +static inline uptr UntagAddr(uptr tagged_addr) { + return tagged_addr & ~kAddressTagMask; } -static inline void * GetAddressFromPointer(const void *p) { - return (void *)((uptr)p & ~kAddressTagMask); +static inline void *UntagPtr(const void *tagged_ptr) { + return reinterpret_cast<void *>( + UntagAddr(reinterpret_cast<uptr>(tagged_ptr))); } static inline uptr AddTagToPointer(uptr p, tag_t tag) { @@ -61,6 +70,7 @@ extern int hwasan_report_count; bool ProtectRange(uptr beg, uptr end); bool InitShadow(); +void InitThreads(); void MadviseShadow(); char *GetProcSelfMaps(); void InitializeInterceptors(); @@ -136,13 +146,13 @@ class ScopedThreadLocalStateBackup { u64 va_arg_overflow_size_tls; }; -void HwasanTSDInit(void (*destructor)(void *tsd)); -void *HwasanTSDGet(); -void HwasanTSDSet(void *tsd); -void HwasanTSDDtor(void *tsd); +void HwasanTSDInit(); +void HwasanTSDThreadInit(); void HwasanOnDeadlySignal(int signo, void *info, void *context); +void UpdateMemoryUsage(); + } // namespace __hwasan #define HWASAN_MALLOC_HOOK(ptr, size) \ diff --git a/lib/hwasan/hwasan_allocator.cc b/lib/hwasan/hwasan_allocator.cc index 23e919e79..b9c379ea4 100644 --- a/lib/hwasan/hwasan_allocator.cc +++ b/lib/hwasan/hwasan_allocator.cc @@ -12,10 +12,6 @@ // HWAddressSanitizer allocator. //===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_allocator.h" -#include "sanitizer_common/sanitizer_allocator_checks.h" -#include "sanitizer_common/sanitizer_allocator_interface.h" -#include "sanitizer_common/sanitizer_allocator_report.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_errno.h" #include "sanitizer_common/sanitizer_stackdepot.h" @@ -23,29 +19,14 @@ #include "hwasan_allocator.h" #include "hwasan_mapping.h" #include "hwasan_thread.h" -#include "hwasan_poisoning.h" +#include "hwasan_report.h" namespace __hwasan { -enum { - CHUNK_INVALID = 0, - CHUNK_FREE = 1, - CHUNK_ALLOCATED = 2 -}; - -struct Metadata { - u64 state : 2; - u64 requested_size : 62; - u32 alloc_context_id; - u32 free_context_id; -}; - -bool HwasanChunkView::IsValid() const { - return metadata_ && metadata_->state != CHUNK_INVALID; -} bool HwasanChunkView::IsAllocated() const { - return metadata_ && metadata_->state == CHUNK_ALLOCATED; + return metadata_ && metadata_->alloc_context_id && metadata_->requested_size; } + uptr HwasanChunkView::Beg() const { return block_; } @@ -58,44 +39,6 @@ uptr HwasanChunkView::UsedSize() const { u32 HwasanChunkView::GetAllocStackId() const { return metadata_->alloc_context_id; } -u32 HwasanChunkView::GetFreeStackId() const { - return metadata_->free_context_id; -} - -struct HwasanMapUnmapCallback { - void OnMap(uptr p, uptr size) const {} - void OnUnmap(uptr p, uptr size) const { - // We are about to unmap a chunk of user memory. - // It can return as user-requested mmap() or another thread stack. - // Make it accessible with zero-tagged pointer. - TagMemory(p, size, 0); - } -}; - -#if !defined(__aarch64__) && !defined(__x86_64__) -#error Unsupported platform -#endif - -static const uptr kMaxAllowedMallocSize = 2UL << 30; // 2G -static const uptr kRegionSizeLog = 20; -static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog; -typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap; - -struct AP32 { - static const uptr kSpaceBeg = 0; - static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; - static const uptr kMetadataSize = sizeof(Metadata); - typedef __sanitizer::CompactSizeClassMap SizeClassMap; - static const uptr kRegionSizeLog = __hwasan::kRegionSizeLog; - typedef __hwasan::ByteMap ByteMap; - typedef HwasanMapUnmapCallback MapUnmapCallback; - static const uptr kFlags = 0; -}; -typedef SizeClassAllocator32<AP32> PrimaryAllocator; -typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache; -typedef LargeMmapAllocator<HwasanMapUnmapCallback> SecondaryAllocator; -typedef CombinedAllocator<PrimaryAllocator, AllocatorCache, - SecondaryAllocator> Allocator; static Allocator allocator; static AllocatorCache fallback_allocator_cache; @@ -105,6 +48,10 @@ static atomic_uint8_t hwasan_allocator_tagging_enabled; static const tag_t kFallbackAllocTag = 0xBB; static const tag_t kFallbackFreeTag = 0xBC; +void GetAllocatorStats(AllocatorStatCounters s) { + allocator.GetStats(s); +} + void HwasanAllocatorInit() { atomic_store_relaxed(&hwasan_allocator_tagging_enabled, !flags()->disable_allocator_tagging); @@ -112,34 +59,34 @@ void HwasanAllocatorInit() { allocator.Init(common_flags()->allocator_release_to_os_interval_ms); } -AllocatorCache *GetAllocatorCache(HwasanThreadLocalMallocStorage *ms) { - CHECK(ms); - CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache)); - return reinterpret_cast<AllocatorCache *>(ms->allocator_cache); +void AllocatorSwallowThreadLocalCache(AllocatorCache *cache) { + allocator.SwallowCache(cache); } -void HwasanThreadLocalMallocStorage::CommitBack() { - allocator.SwallowCache(GetAllocatorCache(this)); +static uptr TaggedSize(uptr size) { + if (!size) size = 1; + uptr new_size = RoundUpTo(size, kShadowAlignment); + CHECK_GE(new_size, size); + return new_size; } -static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment, - bool zeroise) { - alignment = Max(alignment, kShadowAlignment); - size = RoundUpTo(size, kShadowAlignment); - - if (size > kMaxAllowedMallocSize) { +static void *HwasanAllocate(StackTrace *stack, uptr orig_size, uptr alignment, + bool zeroise) { + if (orig_size > kMaxAllowedMallocSize) { if (AllocatorMayReturnNull()) { Report("WARNING: HWAddressSanitizer failed to allocate 0x%zx bytes\n", - size); + orig_size); return nullptr; } - ReportAllocationSizeTooBig(size, kMaxAllowedMallocSize, stack); + ReportAllocationSizeTooBig(orig_size, kMaxAllowedMallocSize, stack); } - HwasanThread *t = GetCurrentThread(); + + alignment = Max(alignment, kShadowAlignment); + uptr size = TaggedSize(orig_size); + Thread *t = GetCurrentThread(); void *allocated; if (t) { - AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); - allocated = allocator.Allocate(cache, size, alignment); + allocated = allocator.Allocate(t->allocator_cache(), size, alignment); } else { SpinMutexLock l(&fallback_mutex); AllocatorCache *cache = &fallback_allocator_cache; @@ -153,8 +100,7 @@ static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment, } Metadata *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated)); - meta->state = CHUNK_ALLOCATED; - meta->requested_size = size; + meta->requested_size = static_cast<u32>(orig_size); meta->alloc_context_id = StackDepotPut(*stack); if (zeroise) { internal_memset(allocated, 0, size); @@ -173,71 +119,68 @@ static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment, return user_ptr; } -void HwasanDeallocate(StackTrace *stack, void *user_ptr) { - CHECK(user_ptr); - HWASAN_FREE_HOOK(user_ptr); +static bool PointerAndMemoryTagsMatch(void *tagged_ptr) { + CHECK(tagged_ptr); + tag_t ptr_tag = GetTagFromPointer(reinterpret_cast<uptr>(tagged_ptr)); + tag_t mem_tag = *reinterpret_cast<tag_t *>( + MemToShadow(reinterpret_cast<uptr>(UntagPtr(tagged_ptr)))); + return ptr_tag == mem_tag; +} + +void HwasanDeallocate(StackTrace *stack, void *tagged_ptr) { + CHECK(tagged_ptr); + HWASAN_FREE_HOOK(tagged_ptr); + + if (!PointerAndMemoryTagsMatch(tagged_ptr)) + ReportInvalidFree(stack, reinterpret_cast<uptr>(tagged_ptr)); - void *p = GetAddressFromPointer(user_ptr); - Metadata *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(p)); - uptr size = meta->requested_size; - meta->state = CHUNK_FREE; + void *untagged_ptr = UntagPtr(tagged_ptr); + Metadata *meta = + reinterpret_cast<Metadata *>(allocator.GetMetaData(untagged_ptr)); + uptr orig_size = meta->requested_size; + u32 free_context_id = StackDepotPut(*stack); + u32 alloc_context_id = meta->alloc_context_id; meta->requested_size = 0; - meta->free_context_id = StackDepotPut(*stack); + meta->alloc_context_id = 0; // This memory will not be reused by anyone else, so we are free to keep it // poisoned. - HwasanThread *t = GetCurrentThread(); + Thread *t = GetCurrentThread(); if (flags()->max_free_fill_size > 0) { - uptr fill_size = Min(size, (uptr)flags()->max_free_fill_size); - internal_memset(p, flags()->free_fill_byte, fill_size); + uptr fill_size = Min(orig_size, (uptr)flags()->max_free_fill_size); + internal_memset(untagged_ptr, flags()->free_fill_byte, fill_size); } if (flags()->tag_in_free && atomic_load_relaxed(&hwasan_allocator_tagging_enabled)) - TagMemoryAligned((uptr)p, size, + TagMemoryAligned((uptr)untagged_ptr, TaggedSize(orig_size), t ? t->GenerateRandomTag() : kFallbackFreeTag); if (t) { - AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); - allocator.Deallocate(cache, p); + allocator.Deallocate(t->allocator_cache(), untagged_ptr); + if (auto *ha = t->heap_allocations()) + ha->push({reinterpret_cast<uptr>(tagged_ptr), alloc_context_id, + free_context_id, static_cast<u32>(orig_size)}); } else { SpinMutexLock l(&fallback_mutex); AllocatorCache *cache = &fallback_allocator_cache; - allocator.Deallocate(cache, p); + allocator.Deallocate(cache, untagged_ptr); } } -void *HwasanReallocate(StackTrace *stack, void *user_old_p, uptr new_size, +void *HwasanReallocate(StackTrace *stack, void *tagged_ptr_old, uptr new_size, uptr alignment) { - alignment = Max(alignment, kShadowAlignment); - new_size = RoundUpTo(new_size, kShadowAlignment); - - void *old_p = GetAddressFromPointer(user_old_p); - Metadata *meta = reinterpret_cast<Metadata*>(allocator.GetMetaData(old_p)); - uptr old_size = meta->requested_size; - uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(old_p); - if (new_size <= actually_allocated_size) { - // We are not reallocating here. - // FIXME: update stack trace for the allocation? - meta->requested_size = new_size; - if (!atomic_load_relaxed(&hwasan_allocator_tagging_enabled)) - return user_old_p; - if (flags()->retag_in_realloc) { - HwasanThread *t = GetCurrentThread(); - return (void *)TagMemoryAligned( - (uptr)old_p, new_size, - t ? t->GenerateRandomTag() : kFallbackAllocTag); - } - if (new_size > old_size) { - tag_t tag = GetTagFromPointer((uptr)user_old_p); - TagMemoryAligned((uptr)old_p + old_size, new_size - old_size, tag); - } - return user_old_p; - } - uptr memcpy_size = Min(new_size, old_size); - void *new_p = HwasanAllocate(stack, new_size, alignment, false /*zeroise*/); - if (new_p) { - internal_memcpy(new_p, old_p, memcpy_size); - HwasanDeallocate(stack, old_p); + if (!PointerAndMemoryTagsMatch(tagged_ptr_old)) + ReportInvalidFree(stack, reinterpret_cast<uptr>(tagged_ptr_old)); + + void *tagged_ptr_new = + HwasanAllocate(stack, new_size, alignment, false /*zeroise*/); + if (tagged_ptr_old && tagged_ptr_new) { + void *untagged_ptr_old = UntagPtr(tagged_ptr_old); + Metadata *meta = + reinterpret_cast<Metadata *>(allocator.GetMetaData(untagged_ptr_old)); + internal_memcpy(UntagPtr(tagged_ptr_new), untagged_ptr_old, + Min(new_size, static_cast<uptr>(meta->requested_size))); + HwasanDeallocate(stack, tagged_ptr_old); } - return new_p; + return tagged_ptr_new; } void *HwasanCalloc(StackTrace *stack, uptr nmemb, uptr size) { @@ -258,12 +201,12 @@ HwasanChunkView FindHeapChunkByAddress(uptr address) { return HwasanChunkView(reinterpret_cast<uptr>(block), metadata); } -static uptr AllocationSize(const void *user_ptr) { - const void *p = GetAddressFromPointer(user_ptr); - if (!p) return 0; - const void *beg = allocator.GetBlockBegin(p); - if (beg != p) return 0; - Metadata *b = (Metadata *)allocator.GetMetaData(p); +static uptr AllocationSize(const void *tagged_ptr) { + const void *untagged_ptr = UntagPtr(tagged_ptr); + if (!untagged_ptr) return 0; + const void *beg = allocator.GetBlockBegin(untagged_ptr); + if (beg != untagged_ptr) return 0; + Metadata *b = (Metadata *)allocator.GetMetaData(untagged_ptr); return b->requested_size; } diff --git a/lib/hwasan/hwasan_allocator.h b/lib/hwasan/hwasan_allocator.h index d025112e9..3589212d8 100644 --- a/lib/hwasan/hwasan_allocator.h +++ b/lib/hwasan/hwasan_allocator.h @@ -1,4 +1,4 @@ -//===-- hwasan_allocator.h ----------------------------------------*- C++ -*-===// +//===-- hwasan_allocator.h --------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,35 +14,69 @@ #ifndef HWASAN_ALLOCATOR_H #define HWASAN_ALLOCATOR_H +#include "sanitizer_common/sanitizer_allocator.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" +#include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_allocator_report.h" #include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_ring_buffer.h" +#include "hwasan_poisoning.h" + +#if !defined(__aarch64__) && !defined(__x86_64__) +#error Unsupported platform +#endif namespace __hwasan { -struct HwasanThreadLocalMallocStorage { - uptr quarantine_cache[16]; - // Allocator cache contains atomic_uint64_t which must be 8-byte aligned. - ALIGNED(8) uptr allocator_cache[96 * (512 * 8 + 16)]; // Opaque. - void CommitBack(); +struct Metadata { + u32 requested_size; // sizes are < 4G. + u32 alloc_context_id; +}; - private: - // These objects are allocated via mmap() and are zero-initialized. - HwasanThreadLocalMallocStorage() {} +struct HwasanMapUnmapCallback { + void OnMap(uptr p, uptr size) const { UpdateMemoryUsage(); } + void OnUnmap(uptr p, uptr size) const { + // We are about to unmap a chunk of user memory. + // It can return as user-requested mmap() or another thread stack. + // Make it accessible with zero-tagged pointer. + TagMemory(p, size, 0); + } +}; + +static const uptr kMaxAllowedMallocSize = 2UL << 30; // 2G +static const uptr kRegionSizeLog = 20; +static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog; +typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap; + +struct AP32 { + static const uptr kSpaceBeg = 0; + static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; + static const uptr kMetadataSize = sizeof(Metadata); + typedef __sanitizer::CompactSizeClassMap SizeClassMap; + static const uptr kRegionSizeLog = __hwasan::kRegionSizeLog; + typedef __hwasan::ByteMap ByteMap; + typedef HwasanMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; }; +typedef SizeClassAllocator32<AP32> PrimaryAllocator; +typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache; +typedef LargeMmapAllocator<HwasanMapUnmapCallback> SecondaryAllocator; +typedef CombinedAllocator<PrimaryAllocator, AllocatorCache, + SecondaryAllocator> Allocator; -struct Metadata; + +void AllocatorSwallowThreadLocalCache(AllocatorCache *cache); class HwasanChunkView { public: HwasanChunkView() : block_(0), metadata_(nullptr) {} HwasanChunkView(uptr block, Metadata *metadata) : block_(block), metadata_(metadata) {} - bool IsValid() const; // Checks if it points to a valid allocated chunk bool IsAllocated() const; // Checks if the memory is currently allocated uptr Beg() const; // First byte of user memory uptr End() const; // Last byte of user memory uptr UsedSize() const; // Size requested by the user u32 GetAllocStackId() const; - u32 GetFreeStackId() const; private: uptr block_; Metadata *const metadata_; @@ -50,6 +84,21 @@ class HwasanChunkView { HwasanChunkView FindHeapChunkByAddress(uptr address); +// Information about one (de)allocation that happened in the past. +// These are recorded in a thread-local ring buffer. +// TODO: this is currently 24 bytes (20 bytes + alignment). +// Compress it to 16 bytes or extend it to be more useful. +struct HeapAllocationRecord { + uptr tagged_addr; + u32 alloc_context_id; + u32 free_context_id; + u32 requested_size; +}; + +typedef RingBuffer<HeapAllocationRecord> HeapAllocationsRingBuffer; + +void GetAllocatorStats(AllocatorStatCounters s); + } // namespace __hwasan #endif // HWASAN_ALLOCATOR_H diff --git a/lib/hwasan/hwasan_dynamic_shadow.cc b/lib/hwasan/hwasan_dynamic_shadow.cc index 17338003a..87670f508 100644 --- a/lib/hwasan/hwasan_dynamic_shadow.cc +++ b/lib/hwasan/hwasan_dynamic_shadow.cc @@ -13,6 +13,7 @@ /// //===----------------------------------------------------------------------===// +#include "hwasan.h" #include "hwasan_dynamic_shadow.h" #include "hwasan_mapping.h" #include "sanitizer_common/sanitizer_common.h" @@ -35,12 +36,16 @@ static void UnmapFromTo(uptr from, uptr to) { } } -// Returns an address aligned to 8 pages, such that one page on the left and -// shadow_size_bytes bytes on the right of it are mapped r/o. +// Returns an address aligned to kShadowBaseAlignment, such that +// 2**kShadowBaseAlingment on the left and shadow_size_bytes bytes on the right +// of it are mapped no access. static uptr MapDynamicShadow(uptr shadow_size_bytes) { const uptr granularity = GetMmapGranularity(); - const uptr alignment = granularity * SHADOW_GRANULARITY; - const uptr left_padding = granularity; + const uptr min_alignment = granularity << kShadowScale; + const uptr alignment = 1ULL << kShadowBaseAlignment; + CHECK_GE(alignment, min_alignment); + + const uptr left_padding = 1ULL << kShadowBaseAlignment; const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity); const uptr map_size = shadow_size + left_padding + alignment; @@ -58,8 +63,7 @@ static uptr MapDynamicShadow(uptr shadow_size_bytes) { } // namespace __hwasan -#if HWASAN_PREMAP_SHADOW - +#if SANITIZER_ANDROID extern "C" { INTERFACE_ATTRIBUTE void __hwasan_shadow(); @@ -117,16 +121,22 @@ void __hwasan_shadow(); } // extern "C" -#endif // HWASAN_PREMAP_SHADOW - namespace __hwasan { uptr FindDynamicShadowStart(uptr shadow_size_bytes) { -#if HWASAN_PREMAP_SHADOW if (IsPremapShadowAvailable()) return FindPremappedShadowStart(shadow_size_bytes); -#endif return MapDynamicShadow(shadow_size_bytes); } } // namespace __hwasan +#else +namespace __hwasan { + +uptr FindDynamicShadowStart(uptr shadow_size_bytes) { + return MapDynamicShadow(shadow_size_bytes); +} + +} // namespace __hwasan +# +#endif // SANITIZER_ANDROID diff --git a/lib/hwasan/hwasan_flags.h b/lib/hwasan/hwasan_flags.h index 16d60c4d8..492d5bb98 100644 --- a/lib/hwasan/hwasan_flags.h +++ b/lib/hwasan/hwasan_flags.h @@ -1,4 +1,4 @@ -//===-- hwasan_flags.h --------------------------------------------*- C++ -*-===// +//===-- hwasan_flags.h ------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/hwasan/hwasan_flags.inc b/lib/hwasan/hwasan_flags.inc index 28122dd42..f1b416d15 100644 --- a/lib/hwasan/hwasan_flags.inc +++ b/lib/hwasan/hwasan_flags.inc @@ -17,9 +17,10 @@ // HWASAN_FLAG(Type, Name, DefaultValue, Description) // See COMMON_FLAG in sanitizer_flags.inc for more details. +HWASAN_FLAG(bool, verbose_threads, false, + "inform on thread creation/destruction") HWASAN_FLAG(bool, tag_in_malloc, true, "") HWASAN_FLAG(bool, tag_in_free, true, "") -HWASAN_FLAG(bool, retag_in_realloc, true, "") HWASAN_FLAG(bool, print_stats, false, "") HWASAN_FLAG(bool, halt_on_error, true, "") HWASAN_FLAG(bool, atexit, false, "") @@ -44,3 +45,13 @@ HWASAN_FLAG(int, malloc_fill_byte, 0xbe, "Value used to fill the newly allocated memory.") HWASAN_FLAG(int, free_fill_byte, 0x55, "Value used to fill deallocated memory.") +HWASAN_FLAG(int, heap_history_size, 1023, + "The number of heap (de)allocations remembered per thread. " + "Affects the quality of heap-related reports, but not the ability " + "to find bugs.") +HWASAN_FLAG(bool, export_memory_stats, true, + "Export up-to-date memory stats through /proc") +HWASAN_FLAG(int, stack_history_size, 1024, + "The number of stack frames remembered per thread. " + "Affects the quality of stack-related reports, but not the ability " + "to find bugs.") diff --git a/lib/hwasan/hwasan_interceptors.cc b/lib/hwasan/hwasan_interceptors.cc index 08041079e..9a0770f56 100644 --- a/lib/hwasan/hwasan_interceptors.cc +++ b/lib/hwasan/hwasan_interceptors.cc @@ -1,4 +1,4 @@ -//===-- hwasan_interceptors.cc ----------------------------------------------===// +//===-- hwasan_interceptors.cc --------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -50,18 +50,18 @@ DECLARE_REAL(void *, memcpy, void *dest, const void *src, uptr n) DECLARE_REAL(void *, memset, void *dest, int c, uptr n) bool IsInInterceptorScope() { - HwasanThread *t = GetCurrentThread(); + Thread *t = GetCurrentThread(); return t && t->InInterceptorScope(); } struct InterceptorScope { InterceptorScope() { - HwasanThread *t = GetCurrentThread(); + Thread *t = GetCurrentThread(); if (t) t->EnterInterceptorScope(); } ~InterceptorScope() { - HwasanThread *t = GetCurrentThread(); + Thread *t = GetCurrentThread(); if (t) t->LeaveInterceptorScope(); } @@ -128,49 +128,24 @@ static void *AllocateFromLocalPool(uptr size_in_bytes) { CHECK_UNPOISONED((x), \ common_flags()->strict_string_checks ? (len) + 1 : (n) ) -#define SANITIZER_ALIAS(RET, FN, ARGS...) \ - extern "C" SANITIZER_INTERFACE_ATTRIBUTE RET __sanitizer_##FN(ARGS) \ - ALIAS(WRAPPER_NAME(FN)); - -SANITIZER_ALIAS(int, posix_memalign, void **memptr, SIZE_T alignment, SIZE_T size); -SANITIZER_ALIAS(void *, memalign, SIZE_T alignment, SIZE_T size); -SANITIZER_ALIAS(void *, aligned_alloc, SIZE_T alignment, SIZE_T size); -SANITIZER_ALIAS(void *, __libc_memalign, SIZE_T alignment, SIZE_T size); -SANITIZER_ALIAS(void *, valloc, SIZE_T size); -SANITIZER_ALIAS(void *, pvalloc, SIZE_T size); -SANITIZER_ALIAS(void, free, void *ptr); -SANITIZER_ALIAS(void, cfree, void *ptr); -SANITIZER_ALIAS(uptr, malloc_usable_size, const void *ptr); -SANITIZER_ALIAS(void, mallinfo, __sanitizer_struct_mallinfo *sret); -SANITIZER_ALIAS(int, mallopt, int cmd, int value); -SANITIZER_ALIAS(void, malloc_stats, void); -SANITIZER_ALIAS(void *, calloc, SIZE_T nmemb, SIZE_T size); -SANITIZER_ALIAS(void *, realloc, void *ptr, SIZE_T size); -SANITIZER_ALIAS(void *, malloc, SIZE_T size); - -INTERCEPTOR(int, posix_memalign, void **memptr, SIZE_T alignment, SIZE_T size) { +int __sanitizer_posix_memalign(void **memptr, uptr alignment, uptr size) { GET_MALLOC_STACK_TRACE; CHECK_NE(memptr, 0); int res = hwasan_posix_memalign(memptr, alignment, size, &stack); return res; } -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD -INTERCEPTOR(void *, memalign, SIZE_T alignment, SIZE_T size) { +void * __sanitizer_memalign(uptr alignment, uptr size) { GET_MALLOC_STACK_TRACE; return hwasan_memalign(alignment, size, &stack); } -#define HWASAN_MAYBE_INTERCEPT_MEMALIGN INTERCEPT_FUNCTION(memalign) -#else -#define HWASAN_MAYBE_INTERCEPT_MEMALIGN -#endif -INTERCEPTOR(void *, aligned_alloc, SIZE_T alignment, SIZE_T size) { +void * __sanitizer_aligned_alloc(uptr alignment, uptr size) { GET_MALLOC_STACK_TRACE; return hwasan_aligned_alloc(alignment, size, &stack); } -INTERCEPTOR(void *, __libc_memalign, SIZE_T alignment, SIZE_T size) { +void * __sanitizer___libc_memalign(uptr alignment, uptr size) { GET_MALLOC_STACK_TRACE; void *ptr = hwasan_memalign(alignment, size, &stack); if (ptr) @@ -178,80 +153,47 @@ INTERCEPTOR(void *, __libc_memalign, SIZE_T alignment, SIZE_T size) { return ptr; } -INTERCEPTOR(void *, valloc, SIZE_T size) { +void * __sanitizer_valloc(uptr size) { GET_MALLOC_STACK_TRACE; return hwasan_valloc(size, &stack); } -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD -INTERCEPTOR(void *, pvalloc, SIZE_T size) { +void * __sanitizer_pvalloc(uptr size) { GET_MALLOC_STACK_TRACE; return hwasan_pvalloc(size, &stack); } -#define HWASAN_MAYBE_INTERCEPT_PVALLOC INTERCEPT_FUNCTION(pvalloc) -#else -#define HWASAN_MAYBE_INTERCEPT_PVALLOC -#endif -INTERCEPTOR(void, free, void *ptr) { +void __sanitizer_free(void *ptr) { GET_MALLOC_STACK_TRACE; if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr))) return; HwasanDeallocate(&stack, ptr); } -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD -INTERCEPTOR(void, cfree, void *ptr) { +void __sanitizer_cfree(void *ptr) { GET_MALLOC_STACK_TRACE; if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr))) return; HwasanDeallocate(&stack, ptr); } -#define HWASAN_MAYBE_INTERCEPT_CFREE INTERCEPT_FUNCTION(cfree) -#else -#define HWASAN_MAYBE_INTERCEPT_CFREE -#endif -INTERCEPTOR(uptr, malloc_usable_size, void *ptr) { +uptr __sanitizer_malloc_usable_size(const void *ptr) { return __sanitizer_get_allocated_size(ptr); } -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD -// This function actually returns a struct by value, but we can't unpoison a -// temporary! The following is equivalent on all supported platforms but -// aarch64 (which uses a different register for sret value). We have a test -// to confirm that. -INTERCEPTOR(void, mallinfo, __sanitizer_struct_mallinfo *sret) { -#ifdef __aarch64__ - uptr r8; - asm volatile("mov %0,x8" : "=r" (r8)); - sret = reinterpret_cast<__sanitizer_struct_mallinfo*>(r8); -#endif - REAL(memset)(sret, 0, sizeof(*sret)); +struct __sanitizer_struct_mallinfo __sanitizer_mallinfo() { + __sanitizer_struct_mallinfo sret; + internal_memset(&sret, 0, sizeof(sret)); + return sret; } -#define HWASAN_MAYBE_INTERCEPT_MALLINFO INTERCEPT_FUNCTION(mallinfo) -#else -#define HWASAN_MAYBE_INTERCEPT_MALLINFO -#endif -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD -INTERCEPTOR(int, mallopt, int cmd, int value) { +int __sanitizer_mallopt(int cmd, int value) { return -1; } -#define HWASAN_MAYBE_INTERCEPT_MALLOPT INTERCEPT_FUNCTION(mallopt) -#else -#define HWASAN_MAYBE_INTERCEPT_MALLOPT -#endif -#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD -INTERCEPTOR(void, malloc_stats, void) { +void __sanitizer_malloc_stats(void) { // FIXME: implement, but don't call REAL(malloc_stats)! } -#define HWASAN_MAYBE_INTERCEPT_MALLOC_STATS INTERCEPT_FUNCTION(malloc_stats) -#else -#define HWASAN_MAYBE_INTERCEPT_MALLOC_STATS -#endif - -INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) { +void * __sanitizer_calloc(uptr nmemb, uptr size) { GET_MALLOC_STACK_TRACE; if (UNLIKELY(!hwasan_inited)) // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym. @@ -259,7 +201,7 @@ INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) { return hwasan_calloc(nmemb, size, &stack); } -INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) { +void * __sanitizer_realloc(void *ptr, uptr size) { GET_MALLOC_STACK_TRACE; if (UNLIKELY(IsInDlsymAllocPool(ptr))) { uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym; @@ -277,7 +219,7 @@ INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) { return hwasan_realloc(ptr, size, &stack); } -INTERCEPTOR(void *, malloc, SIZE_T size) { +void * __sanitizer_malloc(uptr size) { GET_MALLOC_STACK_TRACE; if (UNLIKELY(!hwasan_init_is_running)) ENSURE_HWASAN_INITED(); @@ -287,48 +229,62 @@ INTERCEPTOR(void *, malloc, SIZE_T size) { return hwasan_malloc(size, &stack); } -template <class Mmap> -static void *mmap_interceptor(Mmap real_mmap, void *addr, SIZE_T sz, int prot, - int flags, int fd, OFF64_T off) { - if (addr && !MEM_IS_APP(addr)) { - if (flags & map_fixed) { - errno = errno_EINVAL; - return (void *)-1; - } else { - addr = nullptr; - } - } - return real_mmap(addr, sz, prot, flags, fd, off); -} +#if HWASAN_WITH_INTERCEPTORS +#define INTERCEPTOR_ALIAS(RET, FN, ARGS...) \ + extern "C" SANITIZER_INTERFACE_ATTRIBUTE RET WRAP(FN)(ARGS) \ + ALIAS("__sanitizer_" #FN); \ + extern "C" SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE RET FN( \ + ARGS) ALIAS("__sanitizer_" #FN) + +INTERCEPTOR_ALIAS(int, posix_memalign, void **memptr, SIZE_T alignment, + SIZE_T size); +INTERCEPTOR_ALIAS(void *, aligned_alloc, SIZE_T alignment, SIZE_T size); +INTERCEPTOR_ALIAS(void *, __libc_memalign, SIZE_T alignment, SIZE_T size); +INTERCEPTOR_ALIAS(void *, valloc, SIZE_T size); +INTERCEPTOR_ALIAS(void, free, void *ptr); +INTERCEPTOR_ALIAS(uptr, malloc_usable_size, const void *ptr); +INTERCEPTOR_ALIAS(void *, calloc, SIZE_T nmemb, SIZE_T size); +INTERCEPTOR_ALIAS(void *, realloc, void *ptr, SIZE_T size); +INTERCEPTOR_ALIAS(void *, malloc, SIZE_T size); + +#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD +INTERCEPTOR_ALIAS(void *, memalign, SIZE_T alignment, SIZE_T size); +INTERCEPTOR_ALIAS(void *, pvalloc, SIZE_T size); +INTERCEPTOR_ALIAS(void, cfree, void *ptr); +INTERCEPTOR_ALIAS(__sanitizer_struct_mallinfo, mallinfo); +INTERCEPTOR_ALIAS(int, mallopt, int cmd, int value); +INTERCEPTOR_ALIAS(void, malloc_stats, void); +#endif +#endif // HWASAN_WITH_INTERCEPTORS + +#if HWASAN_WITH_INTERCEPTORS extern "C" int pthread_attr_init(void *attr); extern "C" int pthread_attr_destroy(void *attr); +struct ThreadStartArg { + thread_callback_t callback; + void *param; +}; + static void *HwasanThreadStartFunc(void *arg) { - HwasanThread *t = (HwasanThread *)arg; - SetCurrentThread(t); - return t->ThreadStart(); + __hwasan_thread_enter(); + ThreadStartArg A = *reinterpret_cast<ThreadStartArg*>(arg); + UnmapOrDie(arg, GetPageSizeCached()); + return A.callback(A.param); } INTERCEPTOR(int, pthread_create, void *th, void *attr, void *(*callback)(void*), void * param) { - ENSURE_HWASAN_INITED(); // for GetTlsSize() - __sanitizer_pthread_attr_t myattr; - if (!attr) { - pthread_attr_init(&myattr); - attr = &myattr; - } - - AdjustStackSize(attr); - - HwasanThread *t = HwasanThread::Create(callback, param); - - int res = REAL(pthread_create)(th, attr, HwasanThreadStartFunc, t); - - if (attr == &myattr) - pthread_attr_destroy(&myattr); + ScopedTaggingDisabler disabler; + ThreadStartArg *A = reinterpret_cast<ThreadStartArg *> (MmapOrDie( + GetPageSizeCached(), "pthread_create")); + *A = {callback, param}; + int res = REAL(pthread_create)(UntagPtr(th), UntagPtr(attr), + &HwasanThreadStartFunc, A); return res; } +#endif // HWASAN_WITH_INTERCEPTORS static void BeforeFork() { StackDepotLockAll(); @@ -360,151 +316,18 @@ int OnExit() { } // namespace __hwasan -// A version of CHECK_UNPOISONED using a saved scope value. Used in common -// interceptors. -#define CHECK_UNPOISONED_CTX(ctx, x, n) \ - do { \ - if (!((HwasanInterceptorContext *)ctx)->in_interceptor_scope) \ - CHECK_UNPOISONED_0(x, n); \ - } while (0) - -#define HWASAN_INTERCEPT_FUNC(name) \ - do { \ - if ((!INTERCEPT_FUNCTION(name) || !REAL(name))) \ - VReport(1, "HWAddressSanitizer: failed to intercept '" #name "'\n"); \ - } while (0) - -#define HWASAN_INTERCEPT_FUNC_VER(name, ver) \ - do { \ - if ((!INTERCEPT_FUNCTION_VER(name, ver) || !REAL(name))) \ - VReport( \ - 1, "HWAddressSanitizer: failed to intercept '" #name "@@" #ver "'\n"); \ - } while (0) - -#define COMMON_INTERCEPT_FUNCTION(name) HWASAN_INTERCEPT_FUNC(name) -#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \ - HWASAN_INTERCEPT_FUNC_VER(name, ver) -#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \ - CHECK_UNPOISONED_CTX(ctx, ptr, size) -#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \ - CHECK_UNPOISONED_CTX(ctx, ptr, size) -#define COMMON_INTERCEPTOR_INITIALIZE_RANGE(ptr, size) \ - HWASAN_WRITE_RANGE(ctx, ptr, size) -#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \ - if (hwasan_init_is_running) return REAL(func)(__VA_ARGS__); \ - ENSURE_HWASAN_INITED(); \ - HwasanInterceptorContext hwasan_ctx = {IsInInterceptorScope()}; \ - ctx = (void *)&hwasan_ctx; \ - (void)ctx; \ - InterceptorScope interceptor_scope; -#define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path) \ - do { \ - } while (false) -#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd) \ - do { \ - } while (false) -#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd) \ - do { \ - } while (false) -#define COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, newfd) \ - do { \ - } while (false) -#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \ - do { \ - } while (false) // FIXME -#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \ - do { \ - } while (false) // FIXME -#define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name) -#define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit() - -#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end) \ - if (HwasanThread *t = GetCurrentThread()) { \ - *begin = t->tls_begin(); \ - *end = t->tls_end(); \ - } else { \ - *begin = *end = 0; \ - } - -// AArch64 has TBI and can (and must!) pass the pointer to system memset as-is. -// Other platforms need to remove the tag. -#if defined(__aarch64__) -#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \ - { \ - COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size); \ - if (common_flags()->intercept_intrin && \ - MEM_IS_APP(GetAddressFromPointer(dst))) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \ - return REAL(memset)(dst, v, size); \ - } -#else -#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \ - { \ - COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size); \ - if (common_flags()->intercept_intrin && \ - MEM_IS_APP(GetAddressFromPointer(dst))) \ - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \ - return REAL(memset)(GetAddressFromPointer(dst), v, size); \ - } -#endif - -#define COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, length, prot, flags, fd, \ - offset) \ - do { \ - return mmap_interceptor(REAL(mmap), addr, length, prot, flags, fd, \ - offset); \ - } while (false) - -#include "sanitizer_common/sanitizer_platform_interceptors.h" -#include "sanitizer_common/sanitizer_common_interceptors.inc" -#include "sanitizer_common/sanitizer_signal_interceptors.inc" - -#define COMMON_SYSCALL_PRE_READ_RANGE(p, s) CHECK_UNPOISONED(p, s) -#define COMMON_SYSCALL_PRE_WRITE_RANGE(p, s) \ - do { \ - (void)(p); \ - (void)(s); \ - } while (false) -#define COMMON_SYSCALL_POST_READ_RANGE(p, s) \ - do { \ - (void)(p); \ - (void)(s); \ - } while (false) -#define COMMON_SYSCALL_POST_WRITE_RANGE(p, s) \ - do { \ - (void)(p); \ - (void)(s); \ - } while (false) -#include "sanitizer_common/sanitizer_common_syscalls.inc" -#include "sanitizer_common/sanitizer_syscalls_netbsd.inc" - - - namespace __hwasan { void InitializeInterceptors() { static int inited = 0; CHECK_EQ(inited, 0); - InitializeCommonInterceptors(); - InitializeSignalInterceptors(); - - INTERCEPT_FUNCTION(posix_memalign); - HWASAN_MAYBE_INTERCEPT_MEMALIGN; - INTERCEPT_FUNCTION(__libc_memalign); - INTERCEPT_FUNCTION(valloc); - HWASAN_MAYBE_INTERCEPT_PVALLOC; - INTERCEPT_FUNCTION(malloc); - INTERCEPT_FUNCTION(calloc); - INTERCEPT_FUNCTION(realloc); - INTERCEPT_FUNCTION(free); - HWASAN_MAYBE_INTERCEPT_CFREE; - INTERCEPT_FUNCTION(malloc_usable_size); - HWASAN_MAYBE_INTERCEPT_MALLINFO; - HWASAN_MAYBE_INTERCEPT_MALLOPT; - HWASAN_MAYBE_INTERCEPT_MALLOC_STATS; - INTERCEPT_FUNCTION(pthread_create); + INTERCEPT_FUNCTION(fork); +#if HWASAN_WITH_INTERCEPTORS + INTERCEPT_FUNCTION(pthread_create); +#endif + inited = 1; } } // namespace __hwasan diff --git a/lib/hwasan/hwasan_interface_internal.h b/lib/hwasan/hwasan_interface_internal.h index 12c445409..448997e5e 100644 --- a/lib/hwasan/hwasan_interface_internal.h +++ b/lib/hwasan/hwasan_interface_internal.h @@ -1,4 +1,4 @@ -//===-- hwasan_interface_internal.h -------------------------------*- C++ -*-===// +//===-- hwasan_interface_internal.h -----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,7 @@ #define HWASAN_INTERFACE_INTERNAL_H #include "sanitizer_common/sanitizer_internal_defs.h" +#include "sanitizer_common/sanitizer_platform_limits_posix.h" extern "C" { @@ -137,6 +138,59 @@ void __hwasan_enable_allocator_tagging(); SANITIZER_INTERFACE_ATTRIBUTE void __hwasan_disable_allocator_tagging(); +SANITIZER_INTERFACE_ATTRIBUTE +void __hwasan_thread_enter(); + +SANITIZER_INTERFACE_ATTRIBUTE +void __hwasan_thread_exit(); + +SANITIZER_INTERFACE_ATTRIBUTE +void __hwasan_print_memory_usage(); + +SANITIZER_INTERFACE_ATTRIBUTE +int __sanitizer_posix_memalign(void **memptr, uptr alignment, uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE +void * __sanitizer_memalign(uptr alignment, uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE +void * __sanitizer_aligned_alloc(uptr alignment, uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE +void * __sanitizer___libc_memalign(uptr alignment, uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE +void * __sanitizer_valloc(uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE +void * __sanitizer_pvalloc(uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE +void __sanitizer_free(void *ptr); + +SANITIZER_INTERFACE_ATTRIBUTE +void __sanitizer_cfree(void *ptr); + +SANITIZER_INTERFACE_ATTRIBUTE +uptr __sanitizer_malloc_usable_size(const void *ptr); + +SANITIZER_INTERFACE_ATTRIBUTE +__hwasan::__sanitizer_struct_mallinfo __sanitizer_mallinfo(); + +SANITIZER_INTERFACE_ATTRIBUTE +int __sanitizer_mallopt(int cmd, int value); + +SANITIZER_INTERFACE_ATTRIBUTE +void __sanitizer_malloc_stats(void); + +SANITIZER_INTERFACE_ATTRIBUTE +void * __sanitizer_calloc(uptr nmemb, uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE +void * __sanitizer_realloc(void *ptr, uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE +void * __sanitizer_malloc(uptr size); } // extern "C" #endif // HWASAN_INTERFACE_INTERNAL_H diff --git a/lib/hwasan/hwasan_linux.cc b/lib/hwasan/hwasan_linux.cc index 0ddcc808e..f8e83ff03 100644 --- a/lib/hwasan/hwasan_linux.cc +++ b/lib/hwasan/hwasan_linux.cc @@ -22,6 +22,7 @@ #include "hwasan_mapping.h" #include "hwasan_report.h" #include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include <elf.h> #include <link.h> @@ -37,6 +38,10 @@ #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_procmaps.h" +#if HWASAN_WITH_INTERCEPTORS && !SANITIZER_ANDROID +THREADLOCAL uptr __hwasan_tls; +#endif + namespace __hwasan { static void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) { @@ -101,55 +106,31 @@ static void PrintAddressSpaceLayout() { else CHECK_EQ(kHighShadowEnd + 1, kHighMemStart); PrintRange(kHighShadowStart, kHighShadowEnd, "HighShadow"); - if (SHADOW_OFFSET) { - if (kLowShadowEnd + 1 < kHighShadowStart) - PrintRange(kLowShadowEnd + 1, kHighShadowStart - 1, "ShadowGap"); - else - CHECK_EQ(kLowMemEnd + 1, kHighShadowStart); - PrintRange(kLowShadowStart, kLowShadowEnd, "LowShadow"); - if (kLowMemEnd + 1 < kLowShadowStart) - PrintRange(kLowMemEnd + 1, kLowShadowStart - 1, "ShadowGap"); - else - CHECK_EQ(kLowMemEnd + 1, kLowShadowStart); - PrintRange(kLowMemStart, kLowMemEnd, "LowMem"); - CHECK_EQ(0, kLowMemStart); - } else { - if (kLowMemEnd + 1 < kHighShadowStart) - PrintRange(kLowMemEnd + 1, kHighShadowStart - 1, "ShadowGap"); - else - CHECK_EQ(kLowMemEnd + 1, kHighShadowStart); - PrintRange(kLowMemStart, kLowMemEnd, "LowMem"); - CHECK_EQ(kLowShadowEnd + 1, kLowMemStart); - PrintRange(kLowShadowStart, kLowShadowEnd, "LowShadow"); - PrintRange(0, kLowShadowStart - 1, "ShadowGap"); - } + if (kLowShadowEnd + 1 < kHighShadowStart) + PrintRange(kLowShadowEnd + 1, kHighShadowStart - 1, "ShadowGap"); + else + CHECK_EQ(kLowMemEnd + 1, kHighShadowStart); + PrintRange(kLowShadowStart, kLowShadowEnd, "LowShadow"); + if (kLowMemEnd + 1 < kLowShadowStart) + PrintRange(kLowMemEnd + 1, kLowShadowStart - 1, "ShadowGap"); + else + CHECK_EQ(kLowMemEnd + 1, kLowShadowStart); + PrintRange(kLowMemStart, kLowMemEnd, "LowMem"); + CHECK_EQ(0, kLowMemStart); } static uptr GetHighMemEnd() { // HighMem covers the upper part of the address space. uptr max_address = GetMaxUserVirtualAddress(); - if (SHADOW_OFFSET) - // Adjust max address to make sure that kHighMemEnd and kHighMemStart are - // properly aligned: - max_address |= SHADOW_GRANULARITY * GetMmapGranularity() - 1; + // Adjust max address to make sure that kHighMemEnd and kHighMemStart are + // properly aligned: + max_address |= (GetMmapGranularity() << kShadowScale) - 1; return max_address; } static void InitializeShadowBaseAddress(uptr shadow_size_bytes) { - // Set the shadow memory address to uninitialized. - __hwasan_shadow_memory_dynamic_address = kDefaultShadowSentinel; - uptr shadow_start = SHADOW_OFFSET; - // Detect if a dynamic shadow address must be used and find the available - // location when necessary. When dynamic address is used, the macro - // kLowShadowBeg expands to __hwasan_shadow_memory_dynamic_address which - // was just set to kDefaultShadowSentinel. - if (shadow_start == kDefaultShadowSentinel) { - __hwasan_shadow_memory_dynamic_address = 0; - CHECK_EQ(0, SHADOW_OFFSET); - shadow_start = FindDynamicShadowStart(shadow_size_bytes); - } - // Update the shadow memory address (potentially) used by instrumentation. - __hwasan_shadow_memory_dynamic_address = shadow_start; + __hwasan_shadow_memory_dynamic_address = + FindDynamicShadowStart(shadow_size_bytes); } bool InitShadow() { @@ -157,29 +138,23 @@ bool InitShadow() { kHighMemEnd = GetHighMemEnd(); // Determine shadow memory base offset. - InitializeShadowBaseAddress(MEM_TO_SHADOW_SIZE(kHighMemEnd)); + InitializeShadowBaseAddress(MemToShadowSize(kHighMemEnd)); // Place the low memory first. - if (SHADOW_OFFSET) { - kLowMemEnd = SHADOW_OFFSET - 1; - kLowMemStart = 0; - } else { - // LowMem covers as much of the first 4GB as possible. - kLowMemEnd = (1UL << 32) - 1; - kLowMemStart = MEM_TO_SHADOW(kLowMemEnd) + 1; - } + kLowMemEnd = __hwasan_shadow_memory_dynamic_address - 1; + kLowMemStart = 0; // Define the low shadow based on the already placed low memory. - kLowShadowEnd = MEM_TO_SHADOW(kLowMemEnd); - kLowShadowStart = SHADOW_OFFSET ? SHADOW_OFFSET : MEM_TO_SHADOW(kLowMemStart); + kLowShadowEnd = MemToShadow(kLowMemEnd); + kLowShadowStart = __hwasan_shadow_memory_dynamic_address; // High shadow takes whatever memory is left up there (making sure it is not // interfering with low memory in the fixed case). - kHighShadowEnd = MEM_TO_SHADOW(kHighMemEnd); - kHighShadowStart = Max(kLowMemEnd, MEM_TO_SHADOW(kHighShadowEnd)) + 1; + kHighShadowEnd = MemToShadow(kHighMemEnd); + kHighShadowStart = Max(kLowMemEnd, MemToShadow(kHighShadowEnd)) + 1; // High memory starts where allocated shadow allows. - kHighMemStart = SHADOW_TO_MEM(kHighShadowStart); + kHighMemStart = ShadowToMem(kHighShadowStart); // Check the sanity of the defined memory ranges (there might be gaps). CHECK_EQ(kHighMemStart % GetMmapGranularity(), 0); @@ -188,10 +163,7 @@ bool InitShadow() { CHECK_GT(kHighShadowStart, kLowMemEnd); CHECK_GT(kLowMemEnd, kLowMemStart); CHECK_GT(kLowShadowEnd, kLowShadowStart); - if (SHADOW_OFFSET) - CHECK_GT(kLowShadowStart, kLowMemEnd); - else - CHECK_GT(kLowMemEnd, kLowShadowStart); + CHECK_GT(kLowShadowStart, kLowMemEnd); if (Verbosity()) PrintAddressSpaceLayout(); @@ -202,21 +174,30 @@ bool InitShadow() { // Protect all the gaps. ProtectGap(0, Min(kLowMemStart, kLowShadowStart)); - if (SHADOW_OFFSET) { - if (kLowMemEnd + 1 < kLowShadowStart) - ProtectGap(kLowMemEnd + 1, kLowShadowStart - kLowMemEnd - 1); - if (kLowShadowEnd + 1 < kHighShadowStart) - ProtectGap(kLowShadowEnd + 1, kHighShadowStart - kLowShadowEnd - 1); - } else { - if (kLowMemEnd + 1 < kHighShadowStart) - ProtectGap(kLowMemEnd + 1, kHighShadowStart - kLowMemEnd - 1); - } + if (kLowMemEnd + 1 < kLowShadowStart) + ProtectGap(kLowMemEnd + 1, kLowShadowStart - kLowMemEnd - 1); + if (kLowShadowEnd + 1 < kHighShadowStart) + ProtectGap(kLowShadowEnd + 1, kHighShadowStart - kLowShadowEnd - 1); if (kHighShadowEnd + 1 < kHighMemStart) ProtectGap(kHighShadowEnd + 1, kHighMemStart - kHighShadowEnd - 1); return true; } +void InitThreads() { + CHECK(__hwasan_shadow_memory_dynamic_address); + uptr guard_page_size = GetMmapGranularity(); + uptr thread_space_start = + __hwasan_shadow_memory_dynamic_address - (1ULL << kShadowBaseAlignment); + uptr thread_space_end = + __hwasan_shadow_memory_dynamic_address - guard_page_size; + ReserveShadowMemoryRange(thread_space_start, thread_space_end - 1, + "hwasan threads"); + ProtectGap(thread_space_end, + __hwasan_shadow_memory_dynamic_address - thread_space_end); + InitThreadList(thread_space_start, thread_space_end - thread_space_start); +} + static void MadviseShadowRegion(uptr beg, uptr end) { uptr size = end - beg + 1; if (common_flags()->no_huge_pages_for_shadow) @@ -251,37 +232,60 @@ void InstallAtExitHandler() { // ---------------------- TSD ---------------- {{{1 +extern "C" void __hwasan_thread_enter() { + hwasanThreadList().CreateCurrentThread(); +} + +extern "C" void __hwasan_thread_exit() { + Thread *t = GetCurrentThread(); + // Make sure that signal handler can not see a stale current thread pointer. + atomic_signal_fence(memory_order_seq_cst); + if (t) + hwasanThreadList().ReleaseThread(t); +} + +#if HWASAN_WITH_INTERCEPTORS static pthread_key_t tsd_key; static bool tsd_key_inited = false; -void HwasanTSDInit(void (*destructor)(void *tsd)) { +void HwasanTSDThreadInit() { + if (tsd_key_inited) + CHECK_EQ(0, pthread_setspecific(tsd_key, + (void *)GetPthreadDestructorIterations())); +} + +void HwasanTSDDtor(void *tsd) { + uptr iterations = (uptr)tsd; + if (iterations > 1) { + CHECK_EQ(0, pthread_setspecific(tsd_key, (void *)(iterations - 1))); + return; + } + __hwasan_thread_exit(); +} + +void HwasanTSDInit() { CHECK(!tsd_key_inited); tsd_key_inited = true; - CHECK_EQ(0, pthread_key_create(&tsd_key, destructor)); + CHECK_EQ(0, pthread_key_create(&tsd_key, HwasanTSDDtor)); } +#else +void HwasanTSDInit() {} +void HwasanTSDThreadInit() {} +#endif -HwasanThread *GetCurrentThread() { - return (HwasanThread*)pthread_getspecific(tsd_key); +#if SANITIZER_ANDROID +uptr *GetCurrentThreadLongPtr() { + return (uptr *)get_android_tls_ptr(); } - -void SetCurrentThread(HwasanThread *t) { - // Make sure that HwasanTSDDtor gets called at the end. - CHECK(tsd_key_inited); - // Make sure we do not reset the current HwasanThread. - CHECK_EQ(0, pthread_getspecific(tsd_key)); - pthread_setspecific(tsd_key, (void *)t); +#else +uptr *GetCurrentThreadLongPtr() { + return &__hwasan_tls; } +#endif -void HwasanTSDDtor(void *tsd) { - HwasanThread *t = (HwasanThread*)tsd; - if (t->destructor_iterations_ > 1) { - t->destructor_iterations_--; - CHECK_EQ(0, pthread_setspecific(tsd_key, tsd)); - return; - } - // Make sure that signal handler can not see a stale current thread pointer. - atomic_signal_fence(memory_order_seq_cst); - HwasanThread::TSDDtor(tsd); +Thread *GetCurrentThread() { + auto *R = (StackAllocationsRingBuffer*)GetCurrentThreadLongPtr(); + return hwasanThreadList().GetThreadByBufferAddress((uptr)(R->Next())); } struct AccessInfo { diff --git a/lib/hwasan/hwasan_mapping.h b/lib/hwasan/hwasan_mapping.h index 650a5aefc..e5e23dc60 100644 --- a/lib/hwasan/hwasan_mapping.h +++ b/lib/hwasan/hwasan_mapping.h @@ -16,68 +16,41 @@ #define HWASAN_MAPPING_H #include "sanitizer_common/sanitizer_internal_defs.h" +#include "hwasan_interface_internal.h" -// Typical mapping on Linux/x86_64 with fixed shadow mapping: -// || [0x080000000000, 0x7fffffffffff] || HighMem || -// || [0x008000000000, 0x07ffffffffff] || HighShadow || -// || [0x000100000000, 0x007fffffffff] || ShadowGap || -// || [0x000010000000, 0x0000ffffffff] || LowMem || -// || [0x000001000000, 0x00000fffffff] || LowShadow || -// || [0x000000000000, 0x000000ffffff] || ShadowGap || -// -// and with dynamic shadow mapped at [0x770d59f40000, 0x7f0d59f40000]: +// Typical mapping on Linux/x86_64: +// with dynamic shadow mapped at [0x770d59f40000, 0x7f0d59f40000]: // || [0x7f0d59f40000, 0x7fffffffffff] || HighMem || // || [0x7efe2f934000, 0x7f0d59f3ffff] || HighShadow || // || [0x7e7e2f934000, 0x7efe2f933fff] || ShadowGap || // || [0x770d59f40000, 0x7e7e2f933fff] || LowShadow || // || [0x000000000000, 0x770d59f3ffff] || LowMem || -// Typical mapping on Android/AArch64 (39-bit VMA): -// || [0x001000000000, 0x007fffffffff] || HighMem || -// || [0x000800000000, 0x000fffffffff] || ShadowGap || -// || [0x000100000000, 0x0007ffffffff] || HighShadow || -// || [0x000010000000, 0x0000ffffffff] || LowMem || -// || [0x000001000000, 0x00000fffffff] || LowShadow || -// || [0x000000000000, 0x000000ffffff] || ShadowGap || -// -// and with dynamic shadow mapped: [0x007477480000, 0x007c77480000]: +// Typical mapping on Android/AArch64 +// with dynamic shadow mapped: [0x007477480000, 0x007c77480000]: // || [0x007c77480000, 0x007fffffffff] || HighMem || // || [0x007c3ebc8000, 0x007c7747ffff] || HighShadow || // || [0x007bbebc8000, 0x007c3ebc7fff] || ShadowGap || // || [0x007477480000, 0x007bbebc7fff] || LowShadow || // || [0x000000000000, 0x00747747ffff] || LowMem || -static constexpr __sanitizer::u64 kDefaultShadowSentinel = ~(__sanitizer::u64)0; - // Reasonable values are 4 (for 1/16th shadow) and 6 (for 1/64th). -constexpr __sanitizer::uptr kShadowScale = 4; -constexpr __sanitizer::uptr kShadowAlignment = 1ULL << kShadowScale; - -#if SANITIZER_ANDROID -# define HWASAN_FIXED_MAPPING 0 -#else -# define HWASAN_FIXED_MAPPING 1 -#endif - -#if HWASAN_FIXED_MAPPING -# define SHADOW_OFFSET (0) -# define HWASAN_PREMAP_SHADOW 0 -#else -# define SHADOW_OFFSET (__hwasan_shadow_memory_dynamic_address) -# define HWASAN_PREMAP_SHADOW 1 -#endif - -#define SHADOW_GRANULARITY (1ULL << kShadowScale) - -#define MEM_TO_SHADOW(mem) (((uptr)(mem) >> kShadowScale) + SHADOW_OFFSET) -#define SHADOW_TO_MEM(shadow) (((uptr)(shadow) - SHADOW_OFFSET) << kShadowScale) - -#define MEM_TO_SHADOW_SIZE(size) ((uptr)(size) >> kShadowScale) - -#define MEM_IS_APP(mem) MemIsApp((uptr)(mem)) +constexpr uptr kShadowScale = 4; +constexpr uptr kShadowAlignment = 1ULL << kShadowScale; namespace __hwasan { +inline uptr MemToShadow(uptr untagged_addr) { + return (untagged_addr >> kShadowScale) + + __hwasan_shadow_memory_dynamic_address; +} +inline uptr ShadowToMem(uptr shadow_addr) { + return (shadow_addr - __hwasan_shadow_memory_dynamic_address) << kShadowScale; +} +inline uptr MemToShadowSize(uptr size) { + return size >> kShadowScale; +} + bool MemIsApp(uptr p); } // namespace __hwasan diff --git a/lib/hwasan/hwasan_poisoning.cc b/lib/hwasan/hwasan_poisoning.cc index b99d8ed0b..9c8e16b12 100644 --- a/lib/hwasan/hwasan_poisoning.cc +++ b/lib/hwasan/hwasan_poisoning.cc @@ -1,4 +1,4 @@ -//===-- hwasan_poisoning.cc ---------------------------------------*- C++ -*-===// +//===-- hwasan_poisoning.cc -------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -22,8 +22,8 @@ namespace __hwasan { uptr TagMemoryAligned(uptr p, uptr size, tag_t tag) { CHECK(IsAligned(p, kShadowAlignment)); CHECK(IsAligned(size, kShadowAlignment)); - uptr shadow_start = MEM_TO_SHADOW(p); - uptr shadow_size = MEM_TO_SHADOW_SIZE(size); + uptr shadow_start = MemToShadow(p); + uptr shadow_size = MemToShadowSize(size); internal_memset((void *)shadow_start, tag, shadow_size); return AddTagToPointer(p, tag); } diff --git a/lib/hwasan/hwasan_poisoning.h b/lib/hwasan/hwasan_poisoning.h index b44a91f97..0dbf9d8ed 100644 --- a/lib/hwasan/hwasan_poisoning.h +++ b/lib/hwasan/hwasan_poisoning.h @@ -1,4 +1,4 @@ -//===-- hwasan_poisoning.h ----------------------------------------*- C++ -*-===// +//===-- hwasan_poisoning.h --------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/hwasan/hwasan_report.cc b/lib/hwasan/hwasan_report.cc index 16e9016ea..cf5f468c4 100644 --- a/lib/hwasan/hwasan_report.cc +++ b/lib/hwasan/hwasan_report.cc @@ -15,6 +15,8 @@ #include "hwasan.h" #include "hwasan_allocator.h" #include "hwasan_mapping.h" +#include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_flags.h" @@ -34,51 +36,169 @@ static StackTrace GetStackTraceFromId(u32 id) { return res; } +// A RAII object that holds a copy of the current thread stack ring buffer. +// The actual stack buffer may change while we are iterating over it (for +// example, Printf may call syslog() which can itself be built with hwasan). +class SavedStackAllocations { + public: + SavedStackAllocations(StackAllocationsRingBuffer *rb) { + uptr size = rb->size() * sizeof(uptr); + void *storage = + MmapAlignedOrDieOnFatalError(size, size * 2, "saved stack allocations"); + new (&rb_) StackAllocationsRingBuffer(*rb, storage); + } + + ~SavedStackAllocations() { + StackAllocationsRingBuffer *rb = get(); + UnmapOrDie(rb->StartOfStorage(), rb->size() * sizeof(uptr)); + } + + StackAllocationsRingBuffer *get() { + return (StackAllocationsRingBuffer *)&rb_; + } + + private: + uptr rb_; +}; + class Decorator: public __sanitizer::SanitizerCommonDecorator { public: Decorator() : SanitizerCommonDecorator() { } + const char *Access() { return Blue(); } const char *Allocation() const { return Magenta(); } const char *Origin() const { return Magenta(); } const char *Name() const { return Green(); } + const char *Location() { return Green(); } + const char *Thread() { return Green(); } }; -struct HeapAddressDescription { - uptr addr; - u32 alloc_stack_id; - u32 free_stack_id; - - void Print() const { - Decorator d; - if (free_stack_id) { - Printf("%sfreed here:%s\n", d.Allocation(), d.Default()); - GetStackTraceFromId(free_stack_id).Print(); - Printf("%spreviously allocated here:%s\n", d.Allocation(), d.Default()); - } else { - Printf("%sallocated here:%s\n", d.Allocation(), d.Default()); +// Returns the index of the rb element that matches tagged_addr (plus one), +// or zero if found nothing. +uptr FindHeapAllocation(HeapAllocationsRingBuffer *rb, + uptr tagged_addr, + HeapAllocationRecord *har) { + if (!rb) return 0; + for (uptr i = 0, size = rb->size(); i < size; i++) { + auto h = (*rb)[i]; + if (h.tagged_addr <= tagged_addr && + h.tagged_addr + h.requested_size > tagged_addr) { + *har = h; + return i + 1; } - GetStackTraceFromId(alloc_stack_id).Print(); } -}; - -bool GetHeapAddressInformation(uptr addr, uptr access_size, - HeapAddressDescription *description) { - HwasanChunkView chunk = FindHeapChunkByAddress(addr); - if (!chunk.IsValid()) - return false; - description->addr = addr; - description->alloc_stack_id = chunk.GetAllocStackId(); - description->free_stack_id = chunk.GetFreeStackId(); - return true; + return 0; } -void PrintAddressDescription(uptr addr, uptr access_size) { - HeapAddressDescription heap_description; - if (GetHeapAddressInformation(addr, access_size, &heap_description)) { - heap_description.Print(); - return; +void PrintAddressDescription( + uptr tagged_addr, uptr access_size, + StackAllocationsRingBuffer *current_stack_allocations) { + Decorator d; + int num_descriptions_printed = 0; + uptr untagged_addr = UntagAddr(tagged_addr); + // Check if this looks like a heap buffer overflow by scanning + // the shadow left and right and looking for the first adjacent + // object with a different memory tag. If that tag matches addr_tag, + // check the allocator if it has a live chunk there. + tag_t addr_tag = GetTagFromPointer(tagged_addr); + tag_t *tag_ptr = reinterpret_cast<tag_t*>(MemToShadow(untagged_addr)); + if (*tag_ptr != addr_tag) { // should be true usually. + tag_t *left = tag_ptr, *right = tag_ptr; + // scan left. + for (int i = 0; i < 1000 && *left == *tag_ptr; i++, left--){} + // scan right. + for (int i = 0; i < 1000 && *right == *tag_ptr; i++, right++){} + // Chose the object that has addr_tag and that is closer to addr. + tag_t *candidate = nullptr; + if (*right == addr_tag && *left == addr_tag) + candidate = right - tag_ptr < tag_ptr - left ? right : left; + else if (*right == addr_tag) + candidate = right; + else if (*left == addr_tag) + candidate = left; + + if (candidate) { + uptr mem = ShadowToMem(reinterpret_cast<uptr>(candidate)); + HwasanChunkView chunk = FindHeapChunkByAddress(mem); + if (chunk.IsAllocated()) { + Printf("%s", d.Location()); + Printf( + "%p is located %zd bytes to the %s of %zd-byte region [%p,%p)\n", + untagged_addr, + candidate == left ? untagged_addr - chunk.End() + : chunk.Beg() - untagged_addr, + candidate == right ? "left" : "right", chunk.UsedSize(), + chunk.Beg(), chunk.End()); + Printf("%s", d.Allocation()); + Printf("allocated here:\n"); + Printf("%s", d.Default()); + GetStackTraceFromId(chunk.GetAllocStackId()).Print(); + num_descriptions_printed++; + } + } } - // We exhausted our possibilities. Bail out. - Printf("HWAddressSanitizer can not describe address in more detail.\n"); + + hwasanThreadList().VisitAllLiveThreads([&](Thread *t) { + // Scan all threads' ring buffers to find if it's a heap-use-after-free. + HeapAllocationRecord har; + if (uptr D = FindHeapAllocation(t->heap_allocations(), tagged_addr, &har)) { + Printf("%s", d.Location()); + Printf("%p is located %zd bytes inside of %zd-byte region [%p,%p)\n", + untagged_addr, untagged_addr - UntagAddr(har.tagged_addr), + har.requested_size, UntagAddr(har.tagged_addr), + UntagAddr(har.tagged_addr) + har.requested_size); + Printf("%s", d.Allocation()); + Printf("freed by thread T%zd here:\n", t->unique_id()); + Printf("%s", d.Default()); + GetStackTraceFromId(har.free_context_id).Print(); + + Printf("%s", d.Allocation()); + Printf("previously allocated here:\n", t); + Printf("%s", d.Default()); + GetStackTraceFromId(har.alloc_context_id).Print(); + t->Announce(); + + // Print a developer note: the index of this heap object + // in the thread's deallocation ring buffer. + Printf("hwasan_dev_note_heap_rb_distance: %zd %zd\n", D, + flags()->heap_history_size); + + num_descriptions_printed++; + } + + // Very basic check for stack memory. + if (t->AddrIsInStack(untagged_addr)) { + Printf("%s", d.Location()); + Printf("Address %p is located in stack of thread T%zd\n", untagged_addr, + t->unique_id()); + Printf("%s", d.Default()); + t->Announce(); + + // Temporary report section, needs to be improved. + Printf("Previosly allocated frames:\n"); + auto *sa = (t == GetCurrentThread() && current_stack_allocations) + ? current_stack_allocations + : t->stack_allocations(); + uptr frames = Min((uptr)flags()->stack_history_size, sa->size()); + for (uptr i = 0; i < frames; i++) { + uptr record = (*sa)[i]; + if (!record) + break; + uptr sp = (record >> 48) << 4; + uptr pc_mask = (1ULL << 48) - 1; + uptr pc = record & pc_mask; + uptr fixed_pc = StackTrace::GetNextInstructionPc(pc); + StackTrace stack(&fixed_pc, 1); + Printf("record: %p pc: %p sp: %p", record, pc, sp); + stack.Print(); + } + + num_descriptions_printed++; + } + }); + + if (!num_descriptions_printed) + // We exhausted our possibilities. Bail out. + Printf("HWAddressSanitizer can not describe address in more detail.\n"); } void ReportInvalidAccess(StackTrace *stack, u32 origin) { @@ -97,37 +217,106 @@ void ReportStats() {} void ReportInvalidAccessInsideAddressRange(const char *what, const void *start, uptr size, uptr offset) { ScopedErrorReportLock l; + SavedStackAllocations current_stack_allocations( + GetCurrentThread()->stack_allocations()); Decorator d; Printf("%s", d.Warning()); Printf("%sTag mismatch in %s%s%s at offset %zu inside [%p, %zu)%s\n", d.Warning(), d.Name(), what, d.Warning(), offset, start, size, d.Default()); - PrintAddressDescription((uptr)start + offset, 1); + PrintAddressDescription((uptr)start + offset, 1, + current_stack_allocations.get()); // if (__sanitizer::Verbosity()) // DescribeMemoryRange(start, size); } -void ReportTagMismatch(StackTrace *stack, uptr addr, uptr access_size, +static void PrintTagsAroundAddr(tag_t *tag_ptr) { + Printf( + "Memory tags around the buggy address (one tag corresponds to %zd " + "bytes):\n", kShadowAlignment); + + const uptr row_len = 16; // better be power of two. + const uptr num_rows = 11; + tag_t *center_row_beg = reinterpret_cast<tag_t *>( + RoundDownTo(reinterpret_cast<uptr>(tag_ptr), row_len)); + tag_t *beg_row = center_row_beg - row_len * (num_rows / 2); + tag_t *end_row = center_row_beg + row_len * (num_rows / 2); + InternalScopedString s(GetPageSizeCached()); + for (tag_t *row = beg_row; row < end_row; row += row_len) { + s.append("%s", row == center_row_beg ? "=>" : " "); + for (uptr i = 0; i < row_len; i++) { + s.append("%s", row + i == tag_ptr ? "[" : " "); + s.append("%02x", row[i]); + s.append("%s", row + i == tag_ptr ? "]" : " "); + } + s.append("%s\n", row == center_row_beg ? "<=" : " "); + Printf("%s", s.data()); + s.clear(); + } +} + +void ReportInvalidFree(StackTrace *stack, uptr tagged_addr) { + ScopedErrorReportLock l; + uptr untagged_addr = UntagAddr(tagged_addr); + tag_t ptr_tag = GetTagFromPointer(tagged_addr); + tag_t *tag_ptr = reinterpret_cast<tag_t*>(MemToShadow(untagged_addr)); + tag_t mem_tag = *tag_ptr; + Decorator d; + Printf("%s", d.Error()); + uptr pc = stack->size ? stack->trace[0] : 0; + const char *bug_type = "invalid-free"; + Report("ERROR: %s: %s on address %p at pc %p\n", SanitizerToolName, bug_type, + untagged_addr, pc); + Printf("%s", d.Access()); + Printf("tags: %02x/%02x (ptr/mem)\n", ptr_tag, mem_tag); + Printf("%s", d.Default()); + + stack->Print(); + + PrintAddressDescription(tagged_addr, 0, nullptr); + + PrintTagsAroundAddr(tag_ptr); + + ReportErrorSummary(bug_type, stack); + Die(); +} + +void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size, bool is_store) { ScopedErrorReportLock l; + SavedStackAllocations current_stack_allocations( + GetCurrentThread()->stack_allocations()); Decorator d; - Printf("%s", d.Warning()); - uptr address = GetAddressFromPointer(addr); - Printf("%s of size %zu at %p\n", is_store ? "WRITE" : "READ", access_size, - address); + Printf("%s", d.Error()); + uptr untagged_addr = UntagAddr(tagged_addr); + // TODO: when possible, try to print heap-use-after-free, etc. + const char *bug_type = "tag-mismatch"; + uptr pc = stack->size ? stack->trace[0] : 0; + Report("ERROR: %s: %s on address %p at pc %p\n", SanitizerToolName, bug_type, + untagged_addr, pc); - tag_t ptr_tag = GetTagFromPointer(addr); - tag_t mem_tag = *(tag_t *)MEM_TO_SHADOW(address); - Printf("pointer tag 0x%x\nmemory tag 0x%x\n", ptr_tag, mem_tag); + Thread *t = GetCurrentThread(); + + tag_t ptr_tag = GetTagFromPointer(tagged_addr); + tag_t *tag_ptr = reinterpret_cast<tag_t*>(MemToShadow(untagged_addr)); + tag_t mem_tag = *tag_ptr; + Printf("%s", d.Access()); + Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n", + is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag, + mem_tag, t->unique_id()); Printf("%s", d.Default()); stack->Print(); - PrintAddressDescription(address, access_size); + PrintAddressDescription(tagged_addr, access_size, + current_stack_allocations.get()); + t->Announce(); + + PrintTagsAroundAddr(tag_ptr); - ReportErrorSummary("tag-mismatch", stack); + ReportErrorSummary(bug_type, stack); } } // namespace __hwasan diff --git a/lib/hwasan/hwasan_report.h b/lib/hwasan/hwasan_report.h index bb33f1a87..7a9eec834 100644 --- a/lib/hwasan/hwasan_report.h +++ b/lib/hwasan/hwasan_report.h @@ -27,6 +27,7 @@ void ReportInvalidAccessInsideAddressRange(const char *what, const void *start, uptr size, uptr offset); void ReportTagMismatch(StackTrace *stack, uptr addr, uptr access_size, bool is_store); +void ReportInvalidFree(StackTrace *stack, uptr addr); void ReportAtExitStatistics(); diff --git a/lib/hwasan/hwasan_thread.cc b/lib/hwasan/hwasan_thread.cc index 8d9103087..0d15c7e46 100644 --- a/lib/hwasan/hwasan_thread.cc +++ b/lib/hwasan/hwasan_thread.cc @@ -6,8 +6,10 @@ #include "hwasan_interface_internal.h" #include "sanitizer_common/sanitizer_file.h" +#include "sanitizer_common/sanitizer_placement_new.h" #include "sanitizer_common/sanitizer_tls_get_addr.h" + namespace __hwasan { static u32 RandomSeed() { @@ -23,81 +25,79 @@ static u32 RandomSeed() { return seed; } -HwasanThread *HwasanThread::Create(thread_callback_t start_routine, - void *arg) { - uptr PageSize = GetPageSizeCached(); - uptr size = RoundUpTo(sizeof(HwasanThread), PageSize); - HwasanThread *thread = (HwasanThread*)MmapOrDie(size, __func__); - thread->start_routine_ = start_routine; - thread->arg_ = arg; - thread->destructor_iterations_ = GetPthreadDestructorIterations(); - thread->random_state_ = flags()->random_tags ? RandomSeed() : 0; - - return thread; -} +void Thread::Init(uptr stack_buffer_start, uptr stack_buffer_size) { + static u64 unique_id; + unique_id_ = unique_id++; + random_state_ = flags()->random_tags ? RandomSeed() : unique_id_; + if (auto sz = flags()->heap_history_size) + heap_allocations_ = HeapAllocationsRingBuffer::New(sz); + + HwasanTSDThreadInit(); // Only needed with interceptors. + uptr *ThreadLong = GetCurrentThreadLongPtr(); + // The following implicitly sets (this) as the current thread. + stack_allocations_ = new (ThreadLong) + StackAllocationsRingBuffer((void *)stack_buffer_start, stack_buffer_size); + // Check that it worked. + CHECK_EQ(GetCurrentThread(), this); + + // ScopedTaggingDisable needs GetCurrentThread to be set up. + ScopedTaggingDisabler disabler; -void HwasanThread::SetThreadStackAndTls() { // If this process is "init" (pid 1), /proc may not be mounted yet. if (IsMainThread() && !FileExists("/proc/self/maps")) { stack_top_ = stack_bottom_ = 0; tls_begin_ = tls_end_ = 0; - return; + } else { + uptr tls_size; + uptr stack_size; + GetThreadStackAndTls(IsMainThread(), &stack_bottom_, &stack_size, + &tls_begin_, &tls_size); + stack_top_ = stack_bottom_ + stack_size; + tls_end_ = tls_begin_ + tls_size; + + int local; + CHECK(AddrIsInStack((uptr)&local)); + CHECK(MemIsApp(stack_bottom_)); + CHECK(MemIsApp(stack_top_ - 1)); + + if (stack_bottom_) { + CHECK(MemIsApp(stack_bottom_)); + CHECK(MemIsApp(stack_top_ - 1)); + } } - uptr tls_size; - uptr stack_size; - GetThreadStackAndTls(IsMainThread(), &stack_bottom_, &stack_size, &tls_begin_, - &tls_size); - stack_top_ = stack_bottom_ + stack_size; - tls_end_ = tls_begin_ + tls_size; - - int local; - CHECK(AddrIsInStack((uptr)&local)); - CHECK(MEM_IS_APP(stack_bottom_)); - CHECK(MEM_IS_APP(stack_top_ - 1)); -} - -void HwasanThread::Init() { - SetThreadStackAndTls(); - if (stack_bottom_) { - CHECK(MEM_IS_APP(stack_bottom_)); - CHECK(MEM_IS_APP(stack_top_ - 1)); + if (flags()->verbose_threads) { + if (IsMainThread()) { + Printf("sizeof(Thread): %zd sizeof(HeapRB): %zd sizeof(StackRB): %zd\n", + sizeof(Thread), heap_allocations_->SizeInBytes(), + stack_allocations_->size() * sizeof(uptr)); + } + Print("Creating : "); } } -void HwasanThread::TSDDtor(void *tsd) { - HwasanThread *t = (HwasanThread*)tsd; - t->Destroy(); -} - -void HwasanThread::ClearShadowForThreadStackAndTLS() { +void Thread::ClearShadowForThreadStackAndTLS() { if (stack_top_ != stack_bottom_) TagMemory(stack_bottom_, stack_top_ - stack_bottom_, 0); if (tls_begin_ != tls_end_) TagMemory(tls_begin_, tls_end_ - tls_begin_, 0); } -void HwasanThread::Destroy() { - malloc_storage().CommitBack(); +void Thread::Destroy() { + if (flags()->verbose_threads) + Print("Destroying: "); + AllocatorSwallowThreadLocalCache(allocator_cache()); ClearShadowForThreadStackAndTLS(); - uptr size = RoundUpTo(sizeof(HwasanThread), GetPageSizeCached()); - UnmapOrDie(this, size); + if (heap_allocations_) + heap_allocations_->Delete(); DTLS_Destroy(); } -thread_return_t HwasanThread::ThreadStart() { - Init(); - - if (!start_routine_) { - // start_routine_ == 0 if we're on the main thread or on one of the - // OS X libdispatch worker threads. But nobody is supposed to call - // ThreadStart() for the worker threads. - return 0; - } - - thread_return_t res = start_routine_(arg_); - - return res; +void Thread::Print(const char *Prefix) { + Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix, + unique_id_, this, stack_bottom(), stack_top(), + stack_top() - stack_bottom(), + tls_begin(), tls_end()); } static u32 xorshift(u32 state) { @@ -108,7 +108,8 @@ static u32 xorshift(u32 state) { } // Generate a (pseudo-)random non-zero tag. -tag_t HwasanThread::GenerateRandomTag() { +tag_t Thread::GenerateRandomTag() { + if (tagging_disabled_) return 0; tag_t tag; do { if (flags()->random_tags) { diff --git a/lib/hwasan/hwasan_thread.h b/lib/hwasan/hwasan_thread.h index 1e482adea..4830473f4 100644 --- a/lib/hwasan/hwasan_thread.h +++ b/lib/hwasan/hwasan_thread.h @@ -1,4 +1,4 @@ -//===-- hwasan_thread.h -------------------------------------------*- C++ -*-===// +//===-- hwasan_thread.h -----------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,23 @@ #include "hwasan_allocator.h" #include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_ring_buffer.h" namespace __hwasan { -class HwasanThread { +typedef __sanitizer::CompactRingBuffer<uptr> StackAllocationsRingBuffer; + +class Thread { public: - static HwasanThread *Create(thread_callback_t start_routine, void *arg); - static void TSDDtor(void *tsd); + void Init(uptr stack_buffer_start, uptr stack_buffer_size); // Must be called from the thread itself. void Destroy(); - void Init(); // Should be called from the thread itself. - thread_return_t ThreadStart(); - uptr stack_top() { return stack_top_; } uptr stack_bottom() { return stack_bottom_; } + uptr stack_size() { return stack_top() - stack_bottom(); } uptr tls_begin() { return tls_begin_; } uptr tls_end() { return tls_end_; } - bool IsMainThread() { return start_routine_ == nullptr; } + bool IsMainThread() { return unique_id_ == 0; } bool AddrIsInStack(uptr addr) { return addr >= stack_bottom_ && addr < stack_top_; @@ -50,19 +50,28 @@ class HwasanThread { void EnterInterceptorScope() { in_interceptor_scope_++; } void LeaveInterceptorScope() { in_interceptor_scope_--; } - HwasanThreadLocalMallocStorage &malloc_storage() { return malloc_storage_; } + AllocatorCache *allocator_cache() { return &allocator_cache_; } + HeapAllocationsRingBuffer *heap_allocations() { return heap_allocations_; } + StackAllocationsRingBuffer *stack_allocations() { return stack_allocations_; } tag_t GenerateRandomTag(); - int destructor_iterations_; + void DisableTagging() { tagging_disabled_++; } + void EnableTagging() { tagging_disabled_--; } + bool TaggingIsDisabled() const { return tagging_disabled_; } + + u64 unique_id() const { return unique_id_; } + void Announce() { + if (announced_) return; + announced_ = true; + Print("Thread: "); + } private: - // NOTE: There is no HwasanThread constructor. It is allocated + // NOTE: There is no Thread constructor. It is allocated // via mmap() and *must* be valid in zero-initialized state. - void SetThreadStackAndTls(); void ClearShadowForThreadStackAndTLS(); - thread_callback_t start_routine_; - void *arg_; + void Print(const char *prefix); uptr stack_top_; uptr stack_bottom_; uptr tls_begin_; @@ -75,11 +84,30 @@ class HwasanThread { u32 random_state_; u32 random_buffer_; - HwasanThreadLocalMallocStorage malloc_storage_; + AllocatorCache allocator_cache_; + HeapAllocationsRingBuffer *heap_allocations_; + StackAllocationsRingBuffer *stack_allocations_; + + static void InsertIntoThreadList(Thread *t); + static void RemoveFromThreadList(Thread *t); + Thread *next_; // All live threads form a linked list. + + u64 unique_id_; // counting from zero. + + u32 tagging_disabled_; // if non-zero, malloc uses zero tag in this thread. + + bool announced_; + + friend struct ThreadListHead; }; -HwasanThread *GetCurrentThread(); -void SetCurrentThread(HwasanThread *t); +Thread *GetCurrentThread(); +uptr *GetCurrentThreadLongPtr(); + +struct ScopedTaggingDisabler { + ScopedTaggingDisabler() { GetCurrentThread()->DisableTagging(); } + ~ScopedTaggingDisabler() { GetCurrentThread()->EnableTagging(); } +}; } // namespace __hwasan diff --git a/lib/hwasan/hwasan_thread_list.cc b/lib/hwasan/hwasan_thread_list.cc new file mode 100644 index 000000000..a31eee84e --- /dev/null +++ b/lib/hwasan/hwasan_thread_list.cc @@ -0,0 +1,15 @@ +#include "hwasan_thread_list.h" + +namespace __hwasan { +static ALIGNED(16) char thread_list_placeholder[sizeof(HwasanThreadList)]; +static HwasanThreadList *hwasan_thread_list; + +HwasanThreadList &hwasanThreadList() { return *hwasan_thread_list; } + +void InitThreadList(uptr storage, uptr size) { + CHECK(hwasan_thread_list == nullptr); + hwasan_thread_list = + new (thread_list_placeholder) HwasanThreadList(storage, size); +} + +} // namespace diff --git a/lib/hwasan/hwasan_thread_list.h b/lib/hwasan/hwasan_thread_list.h new file mode 100644 index 000000000..53747b51f --- /dev/null +++ b/lib/hwasan/hwasan_thread_list.h @@ -0,0 +1,200 @@ +//===-- hwasan_thread_list.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of HWAddressSanitizer. +// +//===----------------------------------------------------------------------===// + +// HwasanThreadList is a registry for live threads, as well as an allocator for +// HwasanThread objects and their stack history ring buffers. There are +// constraints on memory layout of the shadow region and CompactRingBuffer that +// are part of the ABI contract between compiler-rt and llvm. +// +// * Start of the shadow memory region is aligned to 2**kShadowBaseAlignment. +// * All stack ring buffers are located within (2**kShadowBaseAlignment) +// sized region below and adjacent to the shadow region. +// * Each ring buffer has a size of (2**N)*4096 where N is in [0, 8), and is +// aligned to twice its size. The value of N can be different for each buffer. +// +// These constrains guarantee that, given an address A of any element of the +// ring buffer, +// A_next = (A + sizeof(uptr)) & ~((1 << (N + 13)) - 1) +// is the address of the next element of that ring buffer (with wrap-around). +// And, with K = kShadowBaseAlignment, +// S = (A | ((1 << K) - 1)) + 1 +// (align up to kShadowBaseAlignment) is the start of the shadow region. +// +// These calculations are used in compiler instrumentation to update the ring +// buffer and obtain the base address of shadow using only two inputs: address +// of the current element of the ring buffer, and N (i.e. size of the ring +// buffer). Since the value of N is very limited, we pack both inputs into a +// single thread-local word as +// (1 << (N + 56)) | A +// See the implementation of class CompactRingBuffer, which is what is stored in +// said thread-local word. +// +// Note the unusual way of aligning up the address of the shadow: +// (A | ((1 << K) - 1)) + 1 +// It is only correct if A is not already equal to the shadow base address, but +// it saves 2 instructions on AArch64. + +#include "hwasan.h" +#include "hwasan_allocator.h" +#include "hwasan_flags.h" +#include "hwasan_thread.h" + +#include "sanitizer_common/sanitizer_placement_new.h" + +namespace __hwasan { + +static uptr RingBufferSize() { + uptr desired_bytes = flags()->stack_history_size * sizeof(uptr); + // FIXME: increase the limit to 8 once this bug is fixed: + // https://bugs.llvm.org/show_bug.cgi?id=39030 + for (int shift = 1; shift < 7; ++shift) { + uptr size = 4096 * (1ULL << shift); + if (size >= desired_bytes) + return size; + } + Printf("stack history size too large: %d\n", flags()->stack_history_size); + CHECK(0); + return 0; +} + +struct ThreadListHead { + Thread *list_; + + ThreadListHead() : list_(nullptr) {} + + void Push(Thread *t) { + t->next_ = list_; + list_ = t; + } + + Thread *Pop() { + Thread *t = list_; + if (t) + list_ = t->next_; + return t; + } + + void Remove(Thread *t) { + Thread **cur = &list_; + while (*cur != t) cur = &(*cur)->next_; + CHECK(*cur && "thread not found"); + *cur = (*cur)->next_; + } + + template <class CB> + void ForEach(CB cb) { + Thread *t = list_; + while (t) { + cb(t); + t = t->next_; + } + } +}; + +struct ThreadStats { + uptr n_live_threads; + uptr total_stack_size; +}; + +class HwasanThreadList { + public: + HwasanThreadList(uptr storage, uptr size) + : free_space_(storage), + free_space_end_(storage + size), + ring_buffer_size_(RingBufferSize()) {} + + Thread *CreateCurrentThread() { + Thread *t; + { + SpinMutexLock l(&list_mutex_); + t = free_list_.Pop(); + if (t) + internal_memset((void *)t, 0, sizeof(Thread) + ring_buffer_size_); + else + t = AllocThread(); + live_list_.Push(t); + } + t->Init((uptr)(t + 1), ring_buffer_size_); + AddThreadStats(t); + return t; + } + + void ReleaseThread(Thread *t) { + // FIXME: madvise away the ring buffer? + RemoveThreadStats(t); + t->Destroy(); + SpinMutexLock l(&list_mutex_); + live_list_.Remove(t); + free_list_.Push(t); + } + + Thread *GetThreadByBufferAddress(uptr p) { + uptr align = ring_buffer_size_ * 2; + return (Thread *)(RoundDownTo(p, align) - sizeof(Thread)); + } + + uptr MemoryUsedPerThread() { + uptr res = sizeof(Thread) + ring_buffer_size_; + if (auto sz = flags()->heap_history_size) + res += HeapAllocationsRingBuffer::SizeInBytes(sz); + return res; + } + + template <class CB> + void VisitAllLiveThreads(CB cb) { + SpinMutexLock l(&list_mutex_); + live_list_.ForEach(cb); + } + + void AddThreadStats(Thread *t) { + SpinMutexLock l(&stats_mutex_); + stats_.n_live_threads++; + stats_.total_stack_size += t->stack_size(); + } + + void RemoveThreadStats(Thread *t) { + SpinMutexLock l(&stats_mutex_); + stats_.n_live_threads--; + stats_.total_stack_size -= t->stack_size(); + } + + ThreadStats GetThreadStats() { + SpinMutexLock l(&stats_mutex_); + return stats_; + } + + private: + Thread *AllocThread() { + uptr align = ring_buffer_size_ * 2; + uptr ring_buffer_start = RoundUpTo(free_space_ + sizeof(Thread), align); + free_space_ = ring_buffer_start + ring_buffer_size_; + CHECK(free_space_ <= free_space_end_ && "out of thread memory"); + return (Thread *)(ring_buffer_start - sizeof(Thread)); + } + + uptr free_space_; + uptr free_space_end_; + uptr ring_buffer_size_; + + ThreadListHead free_list_; + ThreadListHead live_list_; + SpinMutex list_mutex_; + + ThreadStats stats_; + SpinMutex stats_mutex_; +}; + +void InitThreadList(uptr storage, uptr size); +HwasanThreadList &hwasanThreadList(); + +} // namespace diff --git a/lib/interception/interception.h b/lib/interception/interception.h index ddd6ec209..07142f17b 100644 --- a/lib/interception/interception.h +++ b/lib/interception/interception.h @@ -29,6 +29,7 @@ typedef __sanitizer::uptr SIZE_T; typedef __sanitizer::sptr SSIZE_T; typedef __sanitizer::sptr PTRDIFF_T; typedef __sanitizer::s64 INTMAX_T; +typedef __sanitizer::u64 UINTMAX_T; typedef __sanitizer::OFF_T OFF_T; typedef __sanitizer::OFF64_T OFF64_T; diff --git a/lib/interception/interception_win.cc b/lib/interception/interception_win.cc index bd4ad7274..cd13827e5 100644 --- a/lib/interception/interception_win.cc +++ b/lib/interception/interception_win.cc @@ -223,8 +223,8 @@ static bool IsMemoryPadding(uptr address, uptr size) { return true; } -static const u8 kHintNop9Bytes[] = { - 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 +static const u8 kHintNop8Bytes[] = { + 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; template<class T> @@ -239,8 +239,8 @@ static bool FunctionHasPrefix(uptr address, const T &pattern) { static bool FunctionHasPadding(uptr address, uptr size) { if (IsMemoryPadding(address - size, size)) return true; - if (size <= sizeof(kHintNop9Bytes) && - FunctionHasPrefix(address, kHintNop9Bytes)) + if (size <= sizeof(kHintNop8Bytes) && + FunctionHasPrefix(address, kHintNop8Bytes)) return true; return false; } diff --git a/lib/lsan/lsan_common.cc b/lib/lsan/lsan_common.cc index 012a673c3..eaa5cadc8 100644 --- a/lib/lsan/lsan_common.cc +++ b/lib/lsan/lsan_common.cc @@ -100,8 +100,6 @@ static SuppressionContext *GetSuppressionContext() { static InternalMmapVector<RootRegion> *root_regions; -static uptr initialized_for_pid; - InternalMmapVector<RootRegion> const *GetRootRegions() { return root_regions; } void InitializeRootRegions() { @@ -115,7 +113,6 @@ const char *MaybeCallLsanDefaultOptions() { } void InitCommonLsan() { - initialized_for_pid = internal_getpid(); InitializeRootRegions(); if (common_flags()->detect_leaks) { // Initialization which can fail or print warnings should only be done if @@ -571,12 +568,6 @@ static void CheckForLeaksCallback(const SuspendedThreadsList &suspended_threads, static bool CheckForLeaks() { if (&__lsan_is_turned_off && __lsan_is_turned_off()) return false; - if (initialized_for_pid != internal_getpid()) { - // If process was forked and it had threads we fail to detect references - // from other threads. - Report("WARNING: LeakSanitizer is disabled in forked process.\n"); - return false; - } EnsureMainThreadIDIsCorrect(); CheckForLeaksParam param; param.success = false; diff --git a/lib/lsan/lsan_common_mac.cc b/lib/lsan/lsan_common_mac.cc index 2508c1dbd..8dd247ca5 100644 --- a/lib/lsan/lsan_common_mac.cc +++ b/lib/lsan/lsan_common_mac.cc @@ -142,12 +142,6 @@ void ProcessGlobalRegions(Frontier *frontier) { } void ProcessPlatformSpecificAllocations(Frontier *frontier) { - mach_port_name_t port; - if (task_for_pid(mach_task_self(), internal_getpid(), &port) - != KERN_SUCCESS) { - return; - } - unsigned depth = 1; vm_size_t size = 0; vm_address_t address = 0; @@ -158,7 +152,7 @@ void ProcessPlatformSpecificAllocations(Frontier *frontier) { while (err == KERN_SUCCESS) { struct vm_region_submap_info_64 info; - err = vm_region_recurse_64(port, &address, &size, &depth, + err = vm_region_recurse_64(mach_task_self(), &address, &size, &depth, (vm_region_info_t)&info, &count); uptr end_address = address + size; diff --git a/lib/msan/msan.cc b/lib/msan/msan.cc index 06bcbdf88..ba2d5d593 100644 --- a/lib/msan/msan.cc +++ b/lib/msan/msan.cc @@ -59,6 +59,10 @@ SANITIZER_INTERFACE_ATTRIBUTE ALIGNED(16) THREADLOCAL u64 __msan_va_arg_tls[kMsanParamTlsSize / sizeof(u64)]; SANITIZER_INTERFACE_ATTRIBUTE +ALIGNED(16) +THREADLOCAL u32 __msan_va_arg_origin_tls[kMsanParamTlsSize / sizeof(u32)]; + +SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 __msan_va_arg_overflow_size_tls; SANITIZER_INTERFACE_ATTRIBUTE @@ -277,6 +281,8 @@ void ScopedThreadLocalStateBackup::Restore() { internal_memset(__msan_param_tls, 0, sizeof(__msan_param_tls)); internal_memset(__msan_retval_tls, 0, sizeof(__msan_retval_tls)); internal_memset(__msan_va_arg_tls, 0, sizeof(__msan_va_arg_tls)); + internal_memset(__msan_va_arg_origin_tls, 0, + sizeof(__msan_va_arg_origin_tls)); if (__msan_get_track_origins()) { internal_memset(&__msan_retval_origin_tls, 0, diff --git a/lib/profile/WindowsMMap.h b/lib/profile/WindowsMMap.h index 271619aea..ac2c911c8 100644 --- a/lib/profile/WindowsMMap.h +++ b/lib/profile/WindowsMMap.h @@ -12,7 +12,7 @@ #if defined(_WIN32) -#include <BaseTsd.h> +#include <basetsd.h> #include <io.h> #include <sys/types.h> diff --git a/lib/safestack/safestack.cc b/lib/safestack/safestack.cc index 920b89b5e..e68208015 100644 --- a/lib/safestack/safestack.cc +++ b/lib/safestack/safestack.cc @@ -152,6 +152,7 @@ static void *thread_start(void *arg) { struct thread_stack_ll { struct thread_stack_ll *next; void *stack_base; + size_t size; pid_t pid; tid_t tid; }; @@ -182,8 +183,11 @@ static void thread_cleanup_handler(void *_iter) { thread_stack_ll **stackp = &temp_stacks; while (*stackp) { thread_stack_ll *stack = *stackp; - if (stack->pid != pid || TgKill(stack->pid, stack->tid, 0) == -ESRCH) { - UnmapOrDie(stack->stack_base, unsafe_stack_size + unsafe_stack_guard); + int error; + if (stack->pid != pid || + (internal_iserror(TgKill(stack->pid, stack->tid, 0), &error) && + error == ESRCH)) { + UnmapOrDie(stack->stack_base, stack->size); *stackp = stack->next; free(stack); } else @@ -193,6 +197,7 @@ static void thread_cleanup_handler(void *_iter) { thread_stack_ll *cur_stack = (thread_stack_ll *)malloc(sizeof(thread_stack_ll)); cur_stack->stack_base = (char *)unsafe_stack_start - unsafe_stack_guard; + cur_stack->size = unsafe_stack_size + unsafe_stack_guard; cur_stack->pid = pid; cur_stack->tid = tid; diff --git a/lib/sanitizer_common/CMakeLists.txt b/lib/sanitizer_common/CMakeLists.txt index 1be99616e..10b857ef8 100644 --- a/lib/sanitizer_common/CMakeLists.txt +++ b/lib/sanitizer_common/CMakeLists.txt @@ -158,6 +158,7 @@ set(SANITIZER_IMPL_HEADERS sanitizer_procmaps.h sanitizer_quarantine.h sanitizer_report_decorator.h + sanitizer_ring_buffer.h sanitizer_rtems.h sanitizer_signal_interceptors.inc sanitizer_stackdepot.h diff --git a/lib/sanitizer_common/sanitizer_allocator_size_class_map.h b/lib/sanitizer_common/sanitizer_allocator_size_class_map.h index 77ab4fb54..07958424c 100644 --- a/lib/sanitizer_common/sanitizer_allocator_size_class_map.h +++ b/lib/sanitizer_common/sanitizer_allocator_size_class_map.h @@ -232,3 +232,8 @@ class SizeClassMap { typedef SizeClassMap<3, 4, 8, 17, 128, 16> DefaultSizeClassMap; typedef SizeClassMap<3, 4, 8, 17, 64, 14> CompactSizeClassMap; typedef SizeClassMap<2, 5, 9, 16, 64, 14> VeryCompactSizeClassMap; + +// The following SizeClassMap only holds a way small number of cached entries, +// allowing for denser per-class arrays, smaller memory footprint and usually +// better performances in threaded environments. +typedef SizeClassMap<3, 4, 8, 17, 8, 10> DenseSizeClassMap; diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc index 7d72b0cfe..686896190 100644 --- a/lib/sanitizer_common/sanitizer_common.cc +++ b/lib/sanitizer_common/sanitizer_common.cc @@ -339,11 +339,6 @@ int __sanitizer_acquire_crash_state() { } SANITIZER_INTERFACE_ATTRIBUTE -void __sanitizer_set_death_callback(void (*callback)(void)) { - SetUserDieCallback(callback); -} - -SANITIZER_INTERFACE_ATTRIBUTE int __sanitizer_install_malloc_and_free_hooks(void (*malloc_hook)(const void *, uptr), void (*free_hook)(const void *)) { diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc index 801d6a0bd..530469997 100644 --- a/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -3345,14 +3345,14 @@ INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) { return res; } -INTERCEPTOR(INTMAX_T, strtoumax, const char *nptr, char **endptr, int base) { +INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base); // FIXME: under ASan the call below may write to freed memory and corrupt // its metadata. See // https://github.com/google/sanitizers/issues/321. char *real_endptr; - INTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base); + UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base); StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); return res; } diff --git a/lib/sanitizer_common/sanitizer_coverage_fuchsia.cc b/lib/sanitizer_common/sanitizer_coverage_fuchsia.cc index b4ffcca5c..f27b95fa9 100644 --- a/lib/sanitizer_common/sanitizer_coverage_fuchsia.cc +++ b/lib/sanitizer_common/sanitizer_coverage_fuchsia.cc @@ -146,9 +146,9 @@ class TracePcGuardController final { // indices, but we'll never move the mapping address so we don't have // any multi-thread synchronization issues with that. uintptr_t mapping; - status = _zx_vmar_map_old(_zx_vmar_root_self(), 0, vmo_, 0, MappingSize, - ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE, - &mapping); + status = + _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, + 0, vmo_, 0, MappingSize, &mapping); CHECK_EQ(status, ZX_OK); // Hereafter other threads are free to start storing into diff --git a/lib/sanitizer_common/sanitizer_coverage_win_sections.cc b/lib/sanitizer_common/sanitizer_coverage_win_sections.cc index 4b0bbf1ed..108f76eff 100644 --- a/lib/sanitizer_common/sanitizer_coverage_win_sections.cc +++ b/lib/sanitizer_common/sanitizer_coverage_win_sections.cc @@ -7,16 +7,57 @@ // //===----------------------------------------------------------------------===// // -// This file defines delimiters for Sanitizer Coverage's section. +// This file defines delimiters for Sanitizer Coverage's section. It contains +// Windows specific tricks to coax the linker into giving us the start and stop +// addresses of a section, as ELF linkers can do, to get the size of certain +// arrays. According to https://msdn.microsoft.com/en-us/library/7977wcck.aspx +// sections with the same name before "$" are sorted alphabetically by the +// string that comes after "$" and merged into one section. We take advantage +// of this by putting data we want the size of into the middle (M) of a section, +// by using the letter "M" after "$". We get the start of this data (ie: +// __start_section_name) by making the start variable come at the start of the +// section (using the letter A after "$"). We do the same to get the end of the +// data by using the letter "Z" after "$" to make the end variable come after +// the data. Note that because of our technique the address of the start +// variable is actually the address of data that comes before our middle +// section. We also need to prevent the linker from adding any padding. Each +// technique we use for this is explained in the comments below. //===----------------------------------------------------------------------===// #include "sanitizer_platform.h" #if SANITIZER_WINDOWS #include <stdint.h> -#pragma section(".SCOV$A", read, write) // NOLINT -#pragma section(".SCOV$Z", read, write) // NOLINT extern "C" { -__declspec(allocate(".SCOV$A")) uint32_t __start___sancov_guards = 0; -__declspec(allocate(".SCOV$Z")) uint32_t __stop___sancov_guards = 0; +// The Guard array and counter array should both be merged into the .data +// section to reduce the number of PE sections However, because PCTable is +// constant it should be merged with the .rdata section. +#pragma section(".SCOV$GA", read, write) // NOLINT +// Use align(1) to avoid adding any padding that will mess up clients trying to +// determine the start and end of the array. +__declspec(allocate(".SCOV$GA")) __declspec(align(1)) uint64_t + __start___sancov_guards = 0; +#pragma section(".SCOV$GZ", read, write) // NOLINT +__declspec(allocate(".SCOV$GZ")) __declspec(align(1)) uint64_t + __stop___sancov_guards = 0; + +#pragma section(".SCOV$CA", read, write) // NOLINT +__declspec(allocate(".SCOV$CA")) __declspec(align(1)) uint64_t + __start___sancov_cntrs = 0; +#pragma section(".SCOV$CZ", read, write) // NOLINT +__declspec(allocate(".SCOV$CZ")) __declspec(align(1)) uint64_t + __stop___sancov_cntrs = 0; + +#pragma comment(linker, "/MERGE:.SCOV=.data") + +// Use uint64_t so there won't be any issues if the linker tries to word align +// the pc array. +#pragma section(".SCOVP$A", read) // NOLINT +__declspec(allocate(".SCOVP$A")) __declspec(align(1)) uint64_t + __start___sancov_pcs = 0; +#pragma section(".SCOVP$Z", read) // NOLINT +__declspec(allocate(".SCOVP$Z")) __declspec(align(1)) uint64_t + __stop___sancov_pcs = 0; + +#pragma comment(linker, "/MERGE:.SCOVP=.rdata") } -#endif // SANITIZER_WINDOWS +#endif // SANITIZER_WINDOWS diff --git a/lib/sanitizer_common/sanitizer_fuchsia.cc b/lib/sanitizer_common/sanitizer_fuchsia.cc index 391620690..9c54e1ed3 100644 --- a/lib/sanitizer_common/sanitizer_fuchsia.cc +++ b/lib/sanitizer_common/sanitizer_fuchsia.cc @@ -29,9 +29,6 @@ namespace __sanitizer { -// TODO(phosek): remove this and replace it with ZX_TIME_INFINITE -#define ZX_TIME_INFINITE_OLD INT64_MAX - void NORETURN internal__exit(int exitcode) { _zx_process_exit(exitcode); } uptr internal_sched_yield() { @@ -123,7 +120,7 @@ void BlockingMutex::Lock() { return; while (atomic_exchange(m, MtxSleeping, memory_order_acquire) != MtxUnlocked) { zx_status_t status = _zx_futex_wait(reinterpret_cast<zx_futex_t *>(m), - MtxSleeping, ZX_TIME_INFINITE_OLD); + MtxSleeping, ZX_TIME_INFINITE); if (status != ZX_ERR_BAD_STATE) // Normal race. CHECK_EQ(status, ZX_OK); } @@ -175,8 +172,8 @@ static void *DoAnonymousMmapOrDie(uptr size, const char *mem_type, // TODO(mcgrathr): Maybe allocate a VMAR for all sanitizer heap and use that? uintptr_t addr; status = - _zx_vmar_map_old(_zx_vmar_root_self(), 0, vmo, 0, size, - ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE, &addr); + _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, + vmo, 0, size, &addr); _zx_handle_close(vmo); if (status != ZX_OK) { @@ -239,10 +236,9 @@ static uptr DoMmapFixedOrDie(zx_handle_t vmar, uptr fixed_addr, uptr map_size, DCHECK_GE(base + size_, map_size + offset); uintptr_t addr; - status = _zx_vmar_map_old( - vmar, offset, vmo, 0, map_size, - ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE | ZX_VM_FLAG_SPECIFIC, - &addr); + status = + _zx_vmar_map(vmar, ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | ZX_VM_SPECIFIC, + offset, vmo, 0, map_size, &addr); _zx_handle_close(vmo); if (status != ZX_OK) { if (status != ZX_ERR_NO_MEMORY || die_for_nomem) { @@ -281,14 +277,22 @@ void UnmapOrDieVmar(void *addr, uptr size, zx_handle_t target_vmar) { void ReservedAddressRange::Unmap(uptr addr, uptr size) { CHECK_LE(size, size_); - if (addr == reinterpret_cast<uptr>(base_)) - // If we unmap the whole range, just null out the base. - base_ = (size == size_) ? nullptr : reinterpret_cast<void*>(addr + size); - else + const zx_handle_t vmar = static_cast<zx_handle_t>(os_handle_); + if (addr == reinterpret_cast<uptr>(base_)) { + if (size == size_) { + // Destroying the vmar effectively unmaps the whole mapping. + _zx_vmar_destroy(vmar); + _zx_handle_close(vmar); + os_handle_ = static_cast<uptr>(ZX_HANDLE_INVALID); + DecreaseTotalMmap(size); + return; + } + } else { CHECK_EQ(addr + size, reinterpret_cast<uptr>(base_) + size_); - size_ -= size; - UnmapOrDieVmar(reinterpret_cast<void *>(addr), size, - static_cast<zx_handle_t>(os_handle_)); + } + // Partial unmapping does not affect the fact that the initial range is still + // reserved, and the resulting unmapped memory can't be reused. + UnmapOrDieVmar(reinterpret_cast<void *>(addr), size, vmar); } // This should never be called. @@ -321,8 +325,8 @@ void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment, size_t map_size = size + alignment; uintptr_t addr; status = - _zx_vmar_map_old(_zx_vmar_root_self(), 0, vmo, 0, map_size, - ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE, &addr); + _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, + vmo, 0, map_size, &addr); if (status == ZX_OK) { uintptr_t map_addr = addr; uintptr_t map_end = map_addr + map_size; @@ -334,11 +338,10 @@ void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment, sizeof(info), NULL, NULL); if (status == ZX_OK) { uintptr_t new_addr; - status = _zx_vmar_map_old(_zx_vmar_root_self(), addr - info.base, vmo, - 0, size, - ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE | - ZX_VM_FLAG_SPECIFIC_OVERWRITE, - &new_addr); + status = _zx_vmar_map( + _zx_vmar_root_self(), + ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | ZX_VM_SPECIFIC_OVERWRITE, + addr - info.base, vmo, 0, size, &new_addr); if (status == ZX_OK) CHECK_EQ(new_addr, addr); } } @@ -398,8 +401,8 @@ bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size, if (vmo_size < max_len) max_len = vmo_size; size_t map_size = RoundUpTo(max_len, PAGE_SIZE); uintptr_t addr; - status = _zx_vmar_map_old(_zx_vmar_root_self(), 0, vmo, 0, map_size, - ZX_VM_FLAG_PERM_READ, &addr); + status = _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ, 0, vmo, 0, + map_size, &addr); if (status == ZX_OK) { *buff = reinterpret_cast<char *>(addr); *buff_size = map_size; diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h index f8a405ba6..34022430d 100644 --- a/lib/sanitizer_common/sanitizer_internal_defs.h +++ b/lib/sanitizer_common/sanitizer_internal_defs.h @@ -105,8 +105,8 @@ // // FIXME: do we have anything like this on Mac? #ifndef SANITIZER_CAN_USE_PREINIT_ARRAY -#if ((SANITIZER_LINUX && !SANITIZER_ANDROID) || SANITIZER_OPENBSD) && \ - !defined(PIC) +#if ((SANITIZER_LINUX && !SANITIZER_ANDROID) || SANITIZER_OPENBSD || \ + SANITIZER_FUCHSIA) && !defined(PIC) #define SANITIZER_CAN_USE_PREINIT_ARRAY 1 // Before Solaris 11.4, .preinit_array is fully supported only with GNU ld. // FIXME: Check for those conditions. @@ -275,8 +275,6 @@ typedef thread_return_t (THREAD_CALLING_CONV *thread_callback_t)(void* arg); // NOTE: Functions below must be defined in each run-time. void NORETURN Die(); -// FIXME: No, this shouldn't be in the sanitizer interface. -SANITIZER_INTERFACE_ATTRIBUTE void NORETURN CheckFailed(const char *file, int line, const char *cond, u64 v1, u64 v2); @@ -431,6 +429,7 @@ namespace __scudo { using namespace __sanitizer; } // NOLINT namespace __ubsan { using namespace __sanitizer; } // NOLINT namespace __xray { using namespace __sanitizer; } // NOLINT namespace __interception { using namespace __sanitizer; } // NOLINT +namespace __hwasan { using namespace __sanitizer; } // NOLINT #endif // SANITIZER_DEFS_H diff --git a/lib/sanitizer_common/sanitizer_libc.cc b/lib/sanitizer_common/sanitizer_libc.cc index 4b462bfe9..4032cb104 100644 --- a/lib/sanitizer_common/sanitizer_libc.cc +++ b/lib/sanitizer_common/sanitizer_libc.cc @@ -73,6 +73,18 @@ void *internal_memmove(void *dest, const void *src, uptr n) { } void *internal_memset(void* s, int c, uptr n) { + // Optimize for the most performance-critical case: + if ((reinterpret_cast<uptr>(s) % 16) == 0 && (n % 16) == 0) { + u64 *p = reinterpret_cast<u64*>(s); + u64 *e = p + n / 8; + u64 v = c; + v |= v << 8; + v |= v << 16; + v |= v << 32; + for (; p < e; p += 2) + p[0] = p[1] = v; + return s; + } // The next line prevents Clang from making a call to memset() instead of the // loop below. // FIXME: building the runtime with -ffreestanding is a better idea. However diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc index 9dbdd21ac..a2aa77869 100644 --- a/lib/sanitizer_common/sanitizer_linux.cc +++ b/lib/sanitizer_common/sanitizer_linux.cc @@ -69,6 +69,7 @@ #endif #if SANITIZER_OPENBSD #include <sys/futex.h> +#include <sys/sysctl.h> #endif #include <unistd.h> @@ -648,10 +649,10 @@ void ReExec() { CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME, }; char path[400]; - size_t len; + uptr len; len = sizeof(path); - if (sysctl(name, ARRAY_SIZE(name), path, &len, NULL, 0) != -1) + if (internal_sysctl(name, ARRAY_SIZE(name), path, &len, NULL, 0) != -1) pathname = path; #elif SANITIZER_SOLARIS pathname = getexecname(); @@ -815,6 +816,18 @@ int internal_fork() { #endif } +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD +int internal_sysctl(const int *name, unsigned int namelen, void *oldp, + uptr *oldlenp, const void *newp, uptr newlen) { +#if SANITIZER_OPENBSD + return sysctl(name, namelen, oldp, (size_t *)oldlenp, (void *)newp, + (size_t)newlen); +#else + return sysctl(name, namelen, oldp, (size_t *)oldlenp, newp, (size_t)newlen); +#endif +} +#endif + #if SANITIZER_LINUX #define SA_RESTORER 0x04000000 // Doesn't set sa_restorer if the caller did not set it, so use with caution @@ -1114,8 +1127,9 @@ uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) { const int Mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME}; #endif const char *default_module_name = "kern.proc.pathname"; - size_t Size = buf_len; - bool IsErr = (sysctl(Mib, ARRAY_SIZE(Mib), buf, &Size, NULL, 0) != 0); + uptr Size = buf_len; + bool IsErr = + (internal_sysctl(Mib, ARRAY_SIZE(Mib), buf, &Size, NULL, 0) != 0); int readlink_error = IsErr ? errno : 0; uptr module_name_len = Size; #else @@ -1657,6 +1671,16 @@ static int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size, static atomic_uint32_t android_api_level; +static AndroidApiLevel AndroidDetectApiLevelStatic() { +#if __ANDROID_API__ <= 19 + return ANDROID_KITKAT; +#elif __ANDROID_API__ <= 22 + return ANDROID_LOLLIPOP_MR1; +#else + return ANDROID_POST_LOLLIPOP; +#endif +} + static AndroidApiLevel AndroidDetectApiLevel() { if (!&dl_iterate_phdr) return ANDROID_KITKAT; // K or lower @@ -1669,11 +1693,14 @@ static AndroidApiLevel AndroidDetectApiLevel() { // interesting to detect. } +extern "C" __attribute__((weak)) void* _DYNAMIC; + AndroidApiLevel AndroidGetApiLevel() { AndroidApiLevel level = (AndroidApiLevel)atomic_load(&android_api_level, memory_order_relaxed); if (level) return level; - level = AndroidDetectApiLevel(); + level = &_DYNAMIC == nullptr ? AndroidDetectApiLevelStatic() + : AndroidDetectApiLevel(); atomic_store(&android_api_level, level, memory_order_relaxed); return level; } @@ -1980,13 +2007,13 @@ void CheckASLR() { #if SANITIZER_NETBSD int mib[3]; int paxflags; - size_t len = sizeof(paxflags); + uptr len = sizeof(paxflags); mib[0] = CTL_PROC; mib[1] = internal_getpid(); mib[2] = PROC_PID_PAXFLAGS; - if (UNLIKELY(sysctl(mib, 3, &paxflags, &len, NULL, 0) == -1)) { + if (UNLIKELY(internal_sysctl(mib, 3, &paxflags, &len, NULL, 0) == -1)) { Printf("sysctl failed\n"); Die(); } diff --git a/lib/sanitizer_common/sanitizer_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_linux_libcdep.cc index 4962ff832..7859557c8 100644 --- a/lib/sanitizer_common/sanitizer_linux_libcdep.cc +++ b/lib/sanitizer_common/sanitizer_linux_libcdep.cc @@ -652,10 +652,10 @@ u32 GetNumberOfCPUs() { #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD u32 ncpu; int req[2]; - size_t len = sizeof(ncpu); + uptr len = sizeof(ncpu); req[0] = CTL_HW; req[1] = HW_NCPU; - CHECK_EQ(sysctl(req, 2, &ncpu, &len, NULL, 0), 0); + CHECK_EQ(internal_sysctl(req, 2, &ncpu, &len, NULL, 0), 0); return ncpu; #elif SANITIZER_ANDROID && !defined(CPU_COUNT) && !defined(__aarch64__) // Fall back to /sys/devices/system/cpu on Android when cpu_set_t doesn't diff --git a/lib/sanitizer_common/sanitizer_mac.cc b/lib/sanitizer_common/sanitizer_mac.cc index 48747bc83..4861558e9 100644 --- a/lib/sanitizer_common/sanitizer_mac.cc +++ b/lib/sanitizer_common/sanitizer_mac.cc @@ -213,6 +213,12 @@ int internal_fork() { return fork(); } +int internal_sysctl(const int *name, unsigned int namelen, void *oldp, + uptr *oldlenp, const void *newp, uptr newlen) { + return sysctl((int *)name, namelen, oldp, (size_t *)oldlenp, (void *)newp, + (size_t)newlen); +} + int internal_forkpty(int *amaster) { int master, slave; if (openpty(&master, &slave, nullptr, nullptr, nullptr) == -1) return -1; @@ -499,9 +505,9 @@ MacosVersion GetMacosVersionInternal() { uptr len = 0, maxlen = sizeof(version) / sizeof(version[0]); for (uptr i = 0; i < maxlen; i++) version[i] = '\0'; // Get the version length. - CHECK_NE(sysctl(mib, 2, 0, &len, 0, 0), -1); + CHECK_NE(internal_sysctl(mib, 2, 0, &len, 0, 0), -1); CHECK_LT(len, maxlen); - CHECK_NE(sysctl(mib, 2, version, &len, 0, 0), -1); + CHECK_NE(internal_sysctl(mib, 2, version, &len, 0, 0), -1); switch (version[0]) { case '9': return MACOS_VERSION_LEOPARD; case '1': { @@ -511,6 +517,10 @@ MacosVersion GetMacosVersionInternal() { case '2': return MACOS_VERSION_MOUNTAIN_LION; case '3': return MACOS_VERSION_MAVERICKS; case '4': return MACOS_VERSION_YOSEMITE; + case '5': return MACOS_VERSION_EL_CAPITAN; + case '6': return MACOS_VERSION_SIERRA; + case '7': return MACOS_VERSION_HIGH_SIERRA; + case '8': return MACOS_VERSION_MOJAVE; default: if (IsDigit(version[1])) return MACOS_VERSION_UNKNOWN_NEWER; @@ -1060,14 +1070,16 @@ void CheckNoDeepBind(const char *filename, int flag) { // Do nothing. } -// FIXME: implement on this platform. bool GetRandom(void *buffer, uptr length, bool blocking) { - UNIMPLEMENTED(); + if (!buffer || !length || length > 256) + return false; + // arc4random never fails. + arc4random_buf(buffer, length); + return true; } -// FIXME: implement on this platform. u32 GetNumberOfCPUs() { - UNIMPLEMENTED(); + return (u32)sysconf(_SC_NPROCESSORS_ONLN); } } // namespace __sanitizer diff --git a/lib/sanitizer_common/sanitizer_mac.h b/lib/sanitizer_common/sanitizer_mac.h index e022a2c03..58423a774 100644 --- a/lib/sanitizer_common/sanitizer_mac.h +++ b/lib/sanitizer_common/sanitizer_mac.h @@ -40,6 +40,10 @@ enum MacosVersion { MACOS_VERSION_MOUNTAIN_LION, MACOS_VERSION_MAVERICKS, MACOS_VERSION_YOSEMITE, + MACOS_VERSION_EL_CAPITAN, + MACOS_VERSION_SIERRA, + MACOS_VERSION_HIGH_SIERRA, + MACOS_VERSION_MOJAVE, MACOS_VERSION_UNKNOWN_NEWER }; diff --git a/lib/sanitizer_common/sanitizer_openbsd.cc b/lib/sanitizer_common/sanitizer_openbsd.cc index 2aea7cb14..dc955109a 100644 --- a/lib/sanitizer_common/sanitizer_openbsd.cc +++ b/lib/sanitizer_common/sanitizer_openbsd.cc @@ -54,9 +54,9 @@ int internal_mprotect(void *addr, uptr length, int prot) { uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) { // On OpenBSD we cannot get the full path struct kinfo_proc kp; - size_t kl; + uptr kl; const int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid()}; - if (sysctl(Mib, ARRAY_SIZE(Mib), &kp, &kl, NULL, 0) != -1) + if (internal_sysctl(Mib, ARRAY_SIZE(Mib), &kp, &kl, NULL, 0) != -1) return internal_snprintf(buf, (KI_MAXCOMLEN < buf_len ? KI_MAXCOMLEN : buf_len), "%s", kp.p_comm); @@ -64,23 +64,23 @@ uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) { } static void GetArgsAndEnv(char ***argv, char ***envp) { - size_t nargv; - size_t nenv; + uptr nargv; + uptr nenv; int argvmib[4] = {CTL_KERN, KERN_PROC_ARGS, getpid(), KERN_PROC_ARGV}; int envmib[4] = {CTL_KERN, KERN_PROC_ARGS, getpid(), KERN_PROC_ENV}; - if (sysctl(argvmib, 4, NULL, &nargv, NULL, 0) == -1) { + if (internal_sysctl(argvmib, 4, NULL, &nargv, NULL, 0) == -1) { Printf("sysctl KERN_PROC_NARGV failed\n"); Die(); } - if (sysctl(envmib, 4, NULL, &nenv, NULL, 0) == -1) { + if (internal_sysctl(envmib, 4, NULL, &nenv, NULL, 0) == -1) { Printf("sysctl KERN_PROC_NENV failed\n"); Die(); } - if (sysctl(argvmib, 4, &argv, &nargv, NULL, 0) == -1) { + if (internal_sysctl(argvmib, 4, &argv, &nargv, NULL, 0) == -1) { Printf("sysctl KERN_PROC_ARGV failed\n"); Die(); } - if (sysctl(envmib, 4, &envp, &nenv, NULL, 0) == -1) { + if (internal_sysctl(envmib, 4, &envp, &nenv, NULL, 0) == -1) { Printf("sysctl KERN_PROC_ENV failed\n"); Die(); } diff --git a/lib/sanitizer_common/sanitizer_platform.h b/lib/sanitizer_common/sanitizer_platform.h index d81e25580..106a147e5 100644 --- a/lib/sanitizer_common/sanitizer_platform.h +++ b/lib/sanitizer_common/sanitizer_platform.h @@ -235,7 +235,12 @@ #if defined(__mips__) # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 40) #elif defined(__aarch64__) -# define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 48) +# if SANITIZER_MAC +// Darwin iOS/ARM64 has a 36-bit VMA, 64GiB VM +# define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 36) +# else +# define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 48) +# endif #else # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 47) #endif diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc index edc744a80..cd1b73d58 100644 --- a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc +++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc @@ -1037,7 +1037,11 @@ CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_len); CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_level); CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_type); -#if SANITIZER_LINUX && (!defined(__ANDROID__) || __ANDROID_API__ >= 21) +#ifndef __GLIBC_PREREQ +#define __GLIBC_PREREQ(x, y) 0 +#endif + +#if SANITIZER_LINUX && (__ANDROID_API__ >= 21 || __GLIBC_PREREQ (2, 14)) CHECK_TYPE_SIZE(mmsghdr); CHECK_SIZE_AND_OFFSET(mmsghdr, msg_hdr); CHECK_SIZE_AND_OFFSET(mmsghdr, msg_len); @@ -1078,9 +1082,6 @@ COMPILER_CHECK(sizeof(__sanitizer_sigaction) == sizeof(struct sigaction)); // Can't write checks for sa_handler and sa_sigaction due to them being // preprocessor macros. CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask); -#ifndef __GLIBC_PREREQ -#define __GLIBC_PREREQ(x, y) 0 -#endif #if !defined(__s390x__) || __GLIBC_PREREQ (2, 20) // On s390x glibc 2.19 and earlier sa_flags was unsigned long, and sa_resv // didn't exist. diff --git a/lib/sanitizer_common/sanitizer_posix.h b/lib/sanitizer_common/sanitizer_posix.h index da447002b..c81eda61c 100644 --- a/lib/sanitizer_common/sanitizer_posix.h +++ b/lib/sanitizer_common/sanitizer_posix.h @@ -60,6 +60,9 @@ uptr internal_waitpid(int pid, int *status, int options); int internal_fork(); int internal_forkpty(int *amaster); +int internal_sysctl(const int *name, unsigned int namelen, void *oldp, + uptr *oldlenp, const void *newp, uptr newlen); + // These functions call appropriate pthread_ functions directly, bypassing // the interceptor. They are weak and may not be present in some tools. SANITIZER_WEAK_ATTRIBUTE diff --git a/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/lib/sanitizer_common/sanitizer_procmaps_mac.cc index 0167ab18b..d90e4b993 100644 --- a/lib/sanitizer_common/sanitizer_procmaps_mac.cc +++ b/lib/sanitizer_common/sanitizer_procmaps_mac.cc @@ -140,12 +140,6 @@ void MemoryMappingLayout::LoadFromCache() { // early in the process, when dyld is one of the only images loaded, // so it will be hit after only a few iterations. static mach_header *get_dyld_image_header() { - mach_port_name_t port; - if (task_for_pid(mach_task_self(), internal_getpid(), &port) != - KERN_SUCCESS) { - return nullptr; - } - unsigned depth = 1; vm_size_t size = 0; vm_address_t address = 0; @@ -154,7 +148,7 @@ static mach_header *get_dyld_image_header() { while (true) { struct vm_region_submap_info_64 info; - err = vm_region_recurse_64(port, &address, &size, &depth, + err = vm_region_recurse_64(mach_task_self(), &address, &size, &depth, (vm_region_info_t)&info, &count); if (err != KERN_SUCCESS) return nullptr; diff --git a/lib/sanitizer_common/sanitizer_ring_buffer.h b/lib/sanitizer_common/sanitizer_ring_buffer.h new file mode 100644 index 000000000..d15f27fd4 --- /dev/null +++ b/lib/sanitizer_common/sanitizer_ring_buffer.h @@ -0,0 +1,162 @@ +//===-- sanitizer_ring_buffer.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simple ring buffer. +// +//===----------------------------------------------------------------------===// +#ifndef SANITIZER_RING_BUFFER_H +#define SANITIZER_RING_BUFFER_H + +#include "sanitizer_common.h" + +namespace __sanitizer { +// RingBuffer<T>: fixed-size ring buffer optimized for speed of push(). +// T should be a POD type and sizeof(T) should be divisible by sizeof(void*). +// At creation, all elements are zero. +template<class T> +class RingBuffer { + public: + COMPILER_CHECK(sizeof(T) % sizeof(void *) == 0); + static RingBuffer *New(uptr Size) { + void *Ptr = MmapOrDie(SizeInBytes(Size), "RingBuffer"); + RingBuffer *RB = reinterpret_cast<RingBuffer*>(Ptr); + uptr End = reinterpret_cast<uptr>(Ptr) + SizeInBytes(Size); + RB->last_ = RB->next_ = reinterpret_cast<T*>(End - sizeof(T)); + return RB; + } + void Delete() { + UnmapOrDie(this, SizeInBytes(size())); + } + uptr size() const { + return last_ + 1 - + reinterpret_cast<T *>(reinterpret_cast<uptr>(this) + + 2 * sizeof(T *)); + } + + static uptr SizeInBytes(uptr Size) { + return Size * sizeof(T) + 2 * sizeof(T*); + } + + uptr SizeInBytes() { return SizeInBytes(size()); } + + void push(T t) { + *next_ = t; + next_--; + // The condition below works only if sizeof(T) is divisible by sizeof(T*). + if (next_ <= reinterpret_cast<T*>(&next_)) + next_ = last_; + } + + T operator[](uptr Idx) const { + CHECK_LT(Idx, size()); + sptr IdxNext = Idx + 1; + if (IdxNext > last_ - next_) + IdxNext -= size(); + return next_[IdxNext]; + } + + private: + RingBuffer() {} + ~RingBuffer() {} + RingBuffer(const RingBuffer&) = delete; + + // Data layout: + // LNDDDDDDDD + // D: data elements. + // L: last_, always points to the last data element. + // N: next_, initially equals to last_, is decremented on every push, + // wraps around if it's less or equal than its own address. + T *last_; + T *next_; + T data_[1]; // flexible array. +}; + +// A ring buffer with externally provided storage that encodes its state in 8 +// bytes. Has significant constraints on size and alignment of storage. +// See a comment in hwasan/hwasan_thread_list.h for the motivation behind this. +#if SANITIZER_WORDSIZE == 64 +template <class T> +class CompactRingBuffer { + // Top byte of long_ stores the buffer size in pages. + // Lower bytes store the address of the next buffer element. + static constexpr int kPageSizeBits = 12; + static constexpr int kSizeShift = 56; + static constexpr uptr kNextMask = (1ULL << kSizeShift) - 1; + + uptr GetStorageSize() const { return (long_ >> kSizeShift) << kPageSizeBits; } + + void Init(void *storage, uptr size) { + CHECK_EQ(sizeof(CompactRingBuffer<T>), sizeof(void *)); + CHECK(IsPowerOfTwo(size)); + CHECK_GE(size, 1 << kPageSizeBits); + CHECK_LE(size, 128 << kPageSizeBits); + CHECK_EQ(size % 4096, 0); + CHECK_EQ(size % sizeof(T), 0); + CHECK_EQ((uptr)storage % (size * 2), 0); + long_ = (uptr)storage | ((size >> kPageSizeBits) << kSizeShift); + } + + void SetNext(const T *next) { + long_ = (long_ & ~kNextMask) | (uptr)next; + } + + public: + CompactRingBuffer(void *storage, uptr size) { + Init(storage, size); + } + + // A copy constructor of sorts. + CompactRingBuffer(const CompactRingBuffer &other, void *storage) { + uptr size = other.GetStorageSize(); + internal_memcpy(storage, other.StartOfStorage(), size); + Init(storage, size); + uptr Idx = other.Next() - (const T *)other.StartOfStorage(); + SetNext((const T *)storage + Idx); + } + + T *Next() const { return (T *)(long_ & kNextMask); } + + void *StartOfStorage() const { + return (void *)((uptr)Next() & ~(GetStorageSize() - 1)); + } + + void *EndOfStorage() const { + return (void *)((uptr)StartOfStorage() + GetStorageSize()); + } + + uptr size() const { return GetStorageSize() / sizeof(T); } + + void push(T t) { + T *next = Next(); + *next = t; + next++; + next = (T *)((uptr)next & ~GetStorageSize()); + SetNext(next); + } + + T operator[](uptr Idx) const { + CHECK_LT(Idx, size()); + const T *Begin = (const T *)StartOfStorage(); + sptr StorageIdx = Next() - Begin; + StorageIdx -= (sptr)(Idx + 1); + if (StorageIdx < 0) + StorageIdx += size(); + return Begin[StorageIdx]; + } + + public: + ~CompactRingBuffer() {} + CompactRingBuffer(const CompactRingBuffer &) = delete; + + uptr long_; +}; +#endif +} // namespace __sanitizer + +#endif // SANITIZER_RING_BUFFER_H diff --git a/lib/sanitizer_common/sanitizer_termination.cc b/lib/sanitizer_common/sanitizer_termination.cc index 8243fc05d..35e4403ad 100644 --- a/lib/sanitizer_common/sanitizer_termination.cc +++ b/lib/sanitizer_common/sanitizer_termination.cc @@ -84,3 +84,12 @@ void NORETURN CheckFailed(const char *file, int line, const char *cond, } } // namespace __sanitizer + +using namespace __sanitizer; // NOLINT + +extern "C" { +SANITIZER_INTERFACE_ATTRIBUTE +void __sanitizer_set_death_callback(void (*callback)(void)) { + SetUserDieCallback(callback); +} +} // extern "C" diff --git a/lib/sanitizer_common/sanitizer_win_defs.h b/lib/sanitizer_common/sanitizer_win_defs.h index 077ff9ccc..10fc2d021 100644 --- a/lib/sanitizer_common/sanitizer_win_defs.h +++ b/lib/sanitizer_common/sanitizer_win_defs.h @@ -17,17 +17,27 @@ #if SANITIZER_WINDOWS #ifndef WINAPI -#ifdef _M_IX86 +#if defined(_M_IX86) || defined(__i386__) #define WINAPI __stdcall #else #define WINAPI #endif #endif -#if defined(_WIN64) +#if defined(_M_IX86) || defined(__i386__) +#define WIN_SYM_PREFIX "_" +#else #define WIN_SYM_PREFIX +#endif + +// For MinGW, the /export: directives contain undecorated symbols, contrary to +// link/lld-link. The GNU linker doesn't support /alternatename and /include +// though, thus lld-link in MinGW mode interprets them in the same way as +// in the default mode. +#ifdef __MINGW32__ +#define WIN_EXPORT_PREFIX #else -#define WIN_SYM_PREFIX "_" +#define WIN_EXPORT_PREFIX WIN_SYM_PREFIX #endif // Intermediate macro to ensure the parameter is expanded before stringified. @@ -62,8 +72,8 @@ __pragma(comment(linker, "/include:" WIN_SYM_PREFIX STRINGIFY(Name))) #define WIN_EXPORT(ExportedName, Name) \ - __pragma(comment(linker, "/export:" WIN_SYM_PREFIX STRINGIFY(ExportedName) \ - "=" WIN_SYM_PREFIX STRINGIFY(Name))) + __pragma(comment(linker, "/export:" WIN_EXPORT_PREFIX STRINGIFY(ExportedName)\ + "=" WIN_EXPORT_PREFIX STRINGIFY(Name))) // We cannot define weak functions on Windows, but we can use WIN_WEAK_ALIAS() // which defines an alias to a default implementation, and only works when diff --git a/lib/sanitizer_common/tests/CMakeLists.txt b/lib/sanitizer_common/tests/CMakeLists.txt index 401682baa..4642c59ec 100644 --- a/lib/sanitizer_common/tests/CMakeLists.txt +++ b/lib/sanitizer_common/tests/CMakeLists.txt @@ -26,6 +26,7 @@ set(SANITIZER_UNITTESTS sanitizer_posix_test.cc sanitizer_printf_test.cc sanitizer_procmaps_test.cc + sanitizer_ring_buffer_test.cc sanitizer_quarantine_test.cc sanitizer_stackdepot_test.cc sanitizer_stacktrace_printer_test.cc diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc index ef4c10b8d..05fef252b 100644 --- a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc +++ b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc @@ -89,11 +89,20 @@ struct AP64VeryCompact { static const uptr kFlags = 0; }; +struct AP64Dense { + static const uptr kSpaceBeg = kAllocatorSpace; + static const uptr kSpaceSize = kAllocatorSize; + static const uptr kMetadataSize = 16; + typedef DenseSizeClassMap SizeClassMap; + typedef NoOpMapUnmapCallback MapUnmapCallback; + static const uptr kFlags = 0; +}; typedef SizeClassAllocator64<AP64> Allocator64; typedef SizeClassAllocator64<AP64Dyn> Allocator64Dynamic; typedef SizeClassAllocator64<AP64Compact> Allocator64Compact; typedef SizeClassAllocator64<AP64VeryCompact> Allocator64VeryCompact; +typedef SizeClassAllocator64<AP64Dense> Allocator64Dense; #elif defined(__mips64) static const u64 kAddressSpaceSize = 1ULL << 40; #elif defined(__aarch64__) @@ -144,6 +153,10 @@ TEST(SanitizerCommon, InternalSizeClassMap) { TestSizeClassMap<InternalSizeClassMap>(); } +TEST(SanitizerCommon, DenseSizeClassMap) { + TestSizeClassMap<VeryCompactSizeClassMap>(); +} + template <class Allocator> void TestSizeClassAllocator() { Allocator *a = new Allocator; @@ -226,9 +239,14 @@ TEST(SanitizerCommon, SizeClassAllocator64Dynamic) { } #if !SANITIZER_ANDROID +//FIXME(kostyak): find values so that those work on Android as well. TEST(SanitizerCommon, SizeClassAllocator64Compact) { TestSizeClassAllocator<Allocator64Compact>(); } + +TEST(SanitizerCommon, SizeClassAllocator64Dense) { + TestSizeClassAllocator<Allocator64Dense>(); +} #endif TEST(SanitizerCommon, SizeClassAllocator64VeryCompact) { diff --git a/lib/sanitizer_common/tests/sanitizer_common_test.cc b/lib/sanitizer_common/tests/sanitizer_common_test.cc index 0177484a5..6b091de60 100644 --- a/lib/sanitizer_common/tests/sanitizer_common_test.cc +++ b/lib/sanitizer_common/tests/sanitizer_common_test.cc @@ -354,7 +354,8 @@ TEST(SanitizerCommon, InternalScopedString) { EXPECT_STREQ("012345678", str.data()); } -#if SANITIZER_LINUX +#if SANITIZER_LINUX || SANITIZER_FREEBSD || \ + SANITIZER_OPENBSD || SANITIZER_MAC || SANITIZER_IOS TEST(SanitizerCommon, GetRandom) { u8 buffer_1[32], buffer_2[32]; for (bool blocking : { false, true }) { diff --git a/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc b/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc new file mode 100644 index 000000000..80aa57c52 --- /dev/null +++ b/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc @@ -0,0 +1,99 @@ +//===-- sanitizer_vector_test.cc ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of *Sanitizer runtime. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_ring_buffer.h" +#include "gtest/gtest.h" + +namespace __sanitizer { + +struct LargeStruct { + int64_t v; + int64_t extra[3]; + + explicit LargeStruct(int64_t v) : v(v) {} + operator int64_t() { return v; } +}; + +struct Struct10Bytes { + short t[3]; +}; + +TEST(RingBuffer, Construct) { + RingBuffer<int64_t> *RBlong = RingBuffer<int64_t>::New(20); + EXPECT_EQ(RBlong->size(), 20U); + RBlong->Delete(); +} + +template <class T> void TestRB() { + RingBuffer<T> *RB; + const size_t Sizes[] = {1, 2, 3, 5, 8, 16, 20, 40, 10000}; + for (size_t Size : Sizes) { + RB = RingBuffer<T>::New(Size); + EXPECT_EQ(RB->size(), Size); + RB->Delete(); + } + + RB = RingBuffer<T>::New(4); + EXPECT_EQ(RB->size(), 4U); +#define EXPECT_RING_BUFFER(a0, a1, a2, a3) \ + EXPECT_EQ((int64_t)(*RB)[0], (int64_t)a0); \ + EXPECT_EQ((int64_t)(*RB)[1], (int64_t)a1); \ + EXPECT_EQ((int64_t)(*RB)[2], (int64_t)a2); \ + EXPECT_EQ((int64_t)(*RB)[3], (int64_t)a3); + + RB->push(T(1)); EXPECT_RING_BUFFER(1, 0, 0, 0); + RB->push(T(2)); EXPECT_RING_BUFFER(2, 1, 0, 0); + RB->push(T(3)); EXPECT_RING_BUFFER(3, 2, 1, 0); + RB->push(T(4)); EXPECT_RING_BUFFER(4, 3, 2, 1); + RB->push(T(5)); EXPECT_RING_BUFFER(5, 4, 3, 2); + RB->push(T(6)); EXPECT_RING_BUFFER(6, 5, 4, 3); + RB->push(T(7)); EXPECT_RING_BUFFER(7, 6, 5, 4); + RB->push(T(8)); EXPECT_RING_BUFFER(8, 7, 6, 5); + RB->push(T(9)); EXPECT_RING_BUFFER(9, 8, 7, 6); + RB->push(T(10)); EXPECT_RING_BUFFER(10, 9, 8, 7); + RB->push(T(11)); EXPECT_RING_BUFFER(11, 10, 9, 8); + RB->push(T(12)); EXPECT_RING_BUFFER(12, 11, 10, 9); + +#undef EXPECT_RING_BUFFER +} + +#if SANITIZER_WORDSIZE == 64 +TEST(RingBuffer, int64) { + TestRB<int64_t>(); +} + +TEST(RingBuffer, LargeStruct) { + TestRB<LargeStruct>(); +} + +template<typename T> +CompactRingBuffer<T> *AllocCompactRingBuffer(size_t count) { + size_t sz = sizeof(T) * count; + EXPECT_EQ(0ULL, sz % 4096); + void *p = MmapAlignedOrDieOnFatalError(sz, sz * 2, "CompactRingBuffer"); + return new CompactRingBuffer<T>(p, sz); +} + +TEST(CompactRingBuffer, int64) { + const size_t page_sizes[] = {1, 2, 4, 128}; + + for (size_t pages : page_sizes) { + size_t count = 4096 * pages / sizeof(int64_t); + auto R = AllocCompactRingBuffer<int64_t>(count); + int64_t top = count * 3 + 13; + for (int64_t i = 0; i < top; ++i) R->push(i); + for (int64_t i = 0; i < (int64_t)count; ++i) + EXPECT_EQ(top - i - 1, (*R)[i]); + } +} +#endif +} // namespace __sanitizer diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp index df91fdc80..fb04fb281 100644 --- a/lib/scudo/scudo_allocator.cpp +++ b/lib/scudo/scudo_allocator.cpp @@ -129,16 +129,9 @@ namespace Chunk { computeChecksum(Ptr, &NewUnpackedHeader)); } - // Nulls out a chunk header. When returning the chunk to the backend, there - // is no need to store a valid ChunkAvailable header, as this would be - // computationally expensive. Zeroing out serves the same purpose by making - // the header invalid. In the extremely rare event where 0 would be a valid - // checksum for the chunk, the state of the chunk is ChunkAvailable anyway. + // Ensure that ChunkAvailable is 0, so that if a 0 checksum is ever valid + // for a fully nulled out header, its state will be available anyway. COMPILER_CHECK(ChunkAvailable == 0); - static INLINE void eraseHeader(void *Ptr) { - const PackedHeader NullPackedHeader = 0; - atomic_store_relaxed(getAtomicHeader(Ptr), NullPackedHeader); - } // Loads and unpacks the header, verifying the checksum in the process. static INLINE @@ -185,7 +178,9 @@ struct QuarantineCallback { Chunk::loadHeader(Ptr, &Header); if (UNLIKELY(Header.State != ChunkQuarantine)) dieWithMessage("invalid chunk state when recycling address %p\n", Ptr); - Chunk::eraseHeader(Ptr); + UnpackedHeader NewHeader = Header; + NewHeader.State = ChunkAvailable; + Chunk::compareExchangeHeader(Ptr, &NewHeader, &Header); void *BackendPtr = Chunk::getBackendPtr(Ptr, &Header); if (Header.ClassId) getBackend().deallocatePrimary(Cache_, BackendPtr, Header.ClassId); diff --git a/lib/tsan/CMakeLists.txt b/lib/tsan/CMakeLists.txt index 4a2ea3f4a..d501d0cba 100644 --- a/lib/tsan/CMakeLists.txt +++ b/lib/tsan/CMakeLists.txt @@ -158,6 +158,15 @@ else() VERBATIM) elseif(arch STREQUAL "aarch64") add_asm_sources(TSAN_ASM_SOURCES rtl/tsan_rtl_aarch64.S) + # Sanity check for Go runtime. + set(BUILDGO_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/go/buildgo.sh) + add_custom_target(GotsanRuntimeCheck + COMMAND env "CC=${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}" + IN_TMPDIR=1 SILENT=1 ${BUILDGO_SCRIPT} + DEPENDS clang_rt.tsan-${arch} ${BUILDGO_SCRIPT} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/go + COMMENT "Checking TSan Go runtime..." + VERBATIM) elseif(arch MATCHES "powerpc64|powerpc64le") add_asm_sources(TSAN_ASM_SOURCES rtl/tsan_rtl_ppc64.S) # Sanity check for Go runtime. diff --git a/lib/tsan/check_analyze.sh b/lib/tsan/check_analyze.sh index 9b5abc317..b2beb8578 100755 --- a/lib/tsan/check_analyze.sh +++ b/lib/tsan/check_analyze.sh @@ -34,16 +34,16 @@ check() { fi } -for f in write1 write2 write4 write8; do +for f in write1 write2 write4 write8 read2 read4 read8; do check $f rsp 1 - check $f push 2 - check $f pop 12 + check $f push 1 + check $f pop 6 done -for f in read1 read2 read4 read8; do +for f in read1; do check $f rsp 1 - check $f push 3 - check $f pop 18 + check $f push 2 + check $f pop 12 done for f in func_entry func_exit; do diff --git a/lib/tsan/go/buildgo.sh b/lib/tsan/go/buildgo.sh index 7f570ca81..6f6183b8b 100755 --- a/lib/tsan/go/buildgo.sh +++ b/lib/tsan/go/buildgo.sh @@ -55,13 +55,19 @@ if [ "`uname -a | grep Linux`" != "" ]; then " if [ "`uname -a | grep ppc64le`" != "" ]; then SUFFIX="linux_ppc64le" + ARCHCFLAGS="-m64" elif [ "`uname -a | grep x86_64`" != "" ]; then SUFFIX="linux_amd64" + ARCHCFLAGS="-m64" OSCFLAGS="$OSCFLAGS -ffreestanding -Wno-unused-const-variable -Werror -Wno-unknown-warning-option" + elif [ "`uname -a | grep aarch64`" != "" ]; then + SUFFIX="linux_arm64" + ARCHCFLAGS="" fi elif [ "`uname -a | grep FreeBSD`" != "" ]; then SUFFIX="freebsd_amd64" OSCFLAGS="-fno-strict-aliasing -fPIC -Werror" + ARCHCFLAGS="-m64" OSLDFLAGS="-lpthread -fPIC -fpie" SRCS=" $SRCS @@ -77,6 +83,7 @@ elif [ "`uname -a | grep FreeBSD`" != "" ]; then elif [ "`uname -a | grep NetBSD`" != "" ]; then SUFFIX="netbsd_amd64" OSCFLAGS="-fno-strict-aliasing -fPIC -Werror" + ARCHCFLAGS="-m64" OSLDFLAGS="-lpthread -fPIC -fpie" SRCS=" $SRCS @@ -92,6 +99,7 @@ elif [ "`uname -a | grep NetBSD`" != "" ]; then elif [ "`uname -a | grep Darwin`" != "" ]; then SUFFIX="darwin_amd64" OSCFLAGS="-fPIC -Wno-unused-const-variable -Wno-unknown-warning-option -mmacosx-version-min=10.7" + ARCHCFLAGS="-m64" OSLDFLAGS="-lpthread -fPIC -fpie -mmacosx-version-min=10.7" SRCS=" $SRCS @@ -104,6 +112,7 @@ elif [ "`uname -a | grep Darwin`" != "" ]; then elif [ "`uname -a | grep MINGW`" != "" ]; then SUFFIX="windows_amd64" OSCFLAGS="-Wno-error=attributes -Wno-attributes -Wno-unused-const-variable -Wno-unknown-warning-option" + ARCHCFLAGS="-m64" OSLDFLAGS="" SRCS=" $SRCS @@ -136,7 +145,7 @@ for F in $SRCS; do cat $F >> $DIR/gotsan.cc done -FLAGS=" -I../rtl -I../.. -I../../sanitizer_common -I../../../include -std=c++11 -m64 -Wall -fno-exceptions -fno-rtti -DSANITIZER_GO=1 -DSANITIZER_DEADLOCK_DETECTOR_VERSION=2 $OSCFLAGS" +FLAGS=" -I../rtl -I../.. -I../../sanitizer_common -I../../../include -std=c++11 -Wall -fno-exceptions -fno-rtti -DSANITIZER_GO=1 -DSANITIZER_DEADLOCK_DETECTOR_VERSION=2 $OSCFLAGS $ARCHCFLAGS" if [ "$DEBUG" = "" ]; then FLAGS="$FLAGS -DSANITIZER_DEBUG=0 -O3 -fomit-frame-pointer" if [ "$SUFFIX" = "linux_ppc64le" ]; then @@ -153,7 +162,7 @@ if [ "$SILENT" != "1" ]; then fi $CC $DIR/gotsan.cc -c -o $DIR/race_$SUFFIX.syso $FLAGS $CFLAGS -$CC $OSCFLAGS test.c $DIR/race_$SUFFIX.syso -m64 -g -o $DIR/test $OSLDFLAGS $LDFLAGS +$CC $OSCFLAGS $ARCHCFLAGS test.c $DIR/race_$SUFFIX.syso -g -o $DIR/test $OSLDFLAGS $LDFLAGS export GORACE="exitcode=0 atexit_sleep_ms=0" if [ "$SILENT" != "1" ]; then diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc index 901997b3e..5e64d11f3 100644 --- a/lib/tsan/rtl/tsan_interceptors.cc +++ b/lib/tsan/rtl/tsan_interceptors.cc @@ -508,7 +508,8 @@ static void LongJmp(ThreadState *thr, uptr *env) { uptr mangled_sp = env[6]; #elif SANITIZER_MAC # ifdef __aarch64__ - uptr mangled_sp = env[13]; + uptr mangled_sp = + (GetMacosVersion() >= MACOS_VERSION_MOJAVE) ? env[12] : env[13]; # else uptr mangled_sp = env[2]; # endif diff --git a/lib/tsan/rtl/tsan_libdispatch_mac.cc b/lib/tsan/rtl/tsan_libdispatch_mac.cc index d6c1ca662..df22888b3 100644 --- a/lib/tsan/rtl/tsan_libdispatch_mac.cc +++ b/lib/tsan/rtl/tsan_libdispatch_mac.cc @@ -185,11 +185,8 @@ static void invoke_and_release_block(void *param) { TSAN_INTERCEPTOR(void, name, dispatch_queue_t q, \ DISPATCH_NOESCAPE dispatch_block_t block) { \ SCOPED_TSAN_INTERCEPTOR(name, q, block); \ - SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START(); \ - dispatch_block_t heap_block = Block_copy(block); \ - SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END(); \ tsan_block_context_t new_context = { \ - q, heap_block, &invoke_and_release_block, false, true, barrier, 0}; \ + q, block, &invoke_block, false, true, barrier, 0}; \ Release(thr, pc, (uptr)&new_context); \ SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START(); \ REAL(name##_f)(q, &new_context, dispatch_callback_wrap); \ diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h index 6b3e6bac2..70ae6170a 100644 --- a/lib/tsan/rtl/tsan_platform.h +++ b/lib/tsan/rtl/tsan_platform.h @@ -458,6 +458,32 @@ struct Mapping47 { static const uptr kAppMemEnd = 0x00e000000000ull; }; +#elif SANITIZER_GO && defined(__aarch64__) + +/* Go on linux/aarch64 (48-bit VMA) +0000 0000 1000 - 0000 1000 0000: executable +0000 1000 0000 - 00c0 0000 0000: - +00c0 0000 0000 - 00e0 0000 0000: heap +00e0 0000 0000 - 2000 0000 0000: - +2000 0000 0000 - 3000 0000 0000: shadow +3000 0000 0000 - 3000 0000 0000: - +3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) +4000 0000 0000 - 6000 0000 0000: - +6000 0000 0000 - 6200 0000 0000: traces +6200 0000 0000 - 8000 0000 0000: - +*/ + +struct Mapping48 { + static const uptr kMetaShadowBeg = 0x300000000000ull; + static const uptr kMetaShadowEnd = 0x400000000000ull; + static const uptr kTraceMemBeg = 0x600000000000ull; + static const uptr kTraceMemEnd = 0x620000000000ull; + static const uptr kShadowBeg = 0x200000000000ull; + static const uptr kShadowEnd = 0x300000000000ull; + static const uptr kAppMemBeg = 0x000000001000ull; + static const uptr kAppMemEnd = 0x00e000000000ull; +}; + // Indicates the runtime will define the memory regions at runtime. #define TSAN_RUNTIME_VMA 1 @@ -525,8 +551,10 @@ template<int Type> uptr MappingArchImpl(void) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return MappingImpl<Mapping39, Type>(); case 42: return MappingImpl<Mapping42, Type>(); +#endif case 48: return MappingImpl<Mapping48, Type>(); } DCHECK(0); @@ -682,8 +710,10 @@ ALWAYS_INLINE bool IsAppMem(uptr mem) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return IsAppMemImpl<Mapping39>(mem); case 42: return IsAppMemImpl<Mapping42>(mem); +#endif case 48: return IsAppMemImpl<Mapping48>(mem); } DCHECK(0); @@ -713,8 +743,10 @@ ALWAYS_INLINE bool IsShadowMem(uptr mem) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return IsShadowMemImpl<Mapping39>(mem); case 42: return IsShadowMemImpl<Mapping42>(mem); +#endif case 48: return IsShadowMemImpl<Mapping48>(mem); } DCHECK(0); @@ -744,8 +776,10 @@ ALWAYS_INLINE bool IsMetaMem(uptr mem) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return IsMetaMemImpl<Mapping39>(mem); case 42: return IsMetaMemImpl<Mapping42>(mem); +#endif case 48: return IsMetaMemImpl<Mapping48>(mem); } DCHECK(0); @@ -785,8 +819,10 @@ ALWAYS_INLINE uptr MemToShadow(uptr x) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return MemToShadowImpl<Mapping39>(x); case 42: return MemToShadowImpl<Mapping42>(x); +#endif case 48: return MemToShadowImpl<Mapping48>(x); } DCHECK(0); @@ -828,8 +864,10 @@ ALWAYS_INLINE u32 *MemToMeta(uptr x) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return MemToMetaImpl<Mapping39>(x); case 42: return MemToMetaImpl<Mapping42>(x); +#endif case 48: return MemToMetaImpl<Mapping48>(x); } DCHECK(0); @@ -884,8 +922,10 @@ ALWAYS_INLINE uptr ShadowToMem(uptr s) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return ShadowToMemImpl<Mapping39>(s); case 42: return ShadowToMemImpl<Mapping42>(s); +#endif case 48: return ShadowToMemImpl<Mapping48>(s); } DCHECK(0); @@ -923,8 +963,10 @@ ALWAYS_INLINE uptr GetThreadTrace(int tid) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return GetThreadTraceImpl<Mapping39>(tid); case 42: return GetThreadTraceImpl<Mapping42>(tid); +#endif case 48: return GetThreadTraceImpl<Mapping48>(tid); } DCHECK(0); @@ -957,8 +999,10 @@ ALWAYS_INLINE uptr GetThreadTraceHeader(int tid) { #if defined(__aarch64__) && !defined(__APPLE__) switch (vmaSize) { +#if !SANITIZER_GO case 39: return GetThreadTraceHeaderImpl<Mapping39>(tid); case 42: return GetThreadTraceHeaderImpl<Mapping42>(tid); +#endif case 48: return GetThreadTraceHeaderImpl<Mapping48>(tid); } DCHECK(0); diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc index de989b780..d2ce60709 100644 --- a/lib/tsan/rtl/tsan_platform_linux.cc +++ b/lib/tsan/rtl/tsan_platform_linux.cc @@ -212,11 +212,19 @@ void InitializePlatformEarly() { vmaSize = (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1); #if defined(__aarch64__) +# if !SANITIZER_GO if (vmaSize != 39 && vmaSize != 42 && vmaSize != 48) { Printf("FATAL: ThreadSanitizer: unsupported VMA range\n"); Printf("FATAL: Found %zd - Supported 39, 42 and 48\n", vmaSize); Die(); } +#else + if (vmaSize != 48) { + Printf("FATAL: ThreadSanitizer: unsupported VMA range\n"); + Printf("FATAL: Found %zd - Supported 48\n", vmaSize); + Die(); + } +#endif #elif defined(__powerpc64__) # if !SANITIZER_GO if (vmaSize != 44 && vmaSize != 46 && vmaSize != 47) { diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc index f8d7324b7..7e3a47387 100644 --- a/lib/tsan/rtl/tsan_platform_mac.cc +++ b/lib/tsan/rtl/tsan_platform_mac.cc @@ -240,6 +240,9 @@ void InitializePlatformEarly() { #endif } +static const uptr kPthreadSetjmpXorKeySlot = 0x7; +extern "C" uptr __tsan_darwin_setjmp_xor_key = 0; + void InitializePlatform() { DisableCoreDumperIfNecessary(); #if !SANITIZER_GO @@ -251,6 +254,11 @@ void InitializePlatform() { prev_pthread_introspection_hook = pthread_introspection_hook_install(&my_pthread_introspection_hook); #endif + + if (GetMacosVersion() >= MACOS_VERSION_MOJAVE) { + __tsan_darwin_setjmp_xor_key = + (uptr)pthread_getspecific(kPthreadSetjmpXorKeySlot); + } } #if !SANITIZER_GO diff --git a/lib/tsan/rtl/tsan_rtl_aarch64.S b/lib/tsan/rtl/tsan_rtl_aarch64.S index 844c2e23d..3d02bf22f 100644 --- a/lib/tsan/rtl/tsan_rtl_aarch64.S +++ b/lib/tsan/rtl/tsan_rtl_aarch64.S @@ -120,8 +120,10 @@ ASM_SYMBOL_INTERCEPTOR(setjmp): add x0, x29, 32 eor x1, x2, x0 #else + adrp x2, ___tsan_darwin_setjmp_xor_key@page + ldr x2, [x2, ___tsan_darwin_setjmp_xor_key@pageoff] add x0, x29, 32 - mov x1, x0 + eor x1, x2, x0 #endif // call tsan interceptor @@ -178,8 +180,10 @@ ASM_SYMBOL_INTERCEPTOR(_setjmp): add x0, x29, 32 eor x1, x2, x0 #else + adrp x2, ___tsan_darwin_setjmp_xor_key@page + ldr x2, [x2, ___tsan_darwin_setjmp_xor_key@pageoff] add x0, x29, 32 - mov x1, x0 + eor x1, x2, x0 #endif // call tsan interceptor @@ -238,8 +242,10 @@ ASM_SYMBOL_INTERCEPTOR(sigsetjmp): add x0, x29, 32 eor x1, x2, x0 #else + adrp x2, ___tsan_darwin_setjmp_xor_key@page + ldr x2, [x2, ___tsan_darwin_setjmp_xor_key@pageoff] add x0, x29, 32 - mov x1, x0 + eor x1, x2, x0 #endif // call tsan interceptor diff --git a/lib/tsan/rtl/tsan_rtl_amd64.S b/lib/tsan/rtl/tsan_rtl_amd64.S index 8af61bf0e..34ef51c2a 100644 --- a/lib/tsan/rtl/tsan_rtl_amd64.S +++ b/lib/tsan/rtl/tsan_rtl_amd64.S @@ -196,6 +196,7 @@ ASM_SYMBOL_INTERCEPTOR(setjmp): #elif defined(__APPLE__) lea 16(%rsp), %rdi mov %rdi, %rsi + xorq ___tsan_darwin_setjmp_xor_key(%rip), %rsi #elif defined(__linux__) lea 16(%rsp), %rdi mov %rdi, %rsi @@ -244,6 +245,7 @@ ASM_SYMBOL_INTERCEPTOR(_setjmp): #elif defined(__APPLE__) lea 16(%rsp), %rdi mov %rdi, %rsi + xorq ___tsan_darwin_setjmp_xor_key(%rip), %rsi #elif defined(__linux__) lea 16(%rsp), %rdi mov %rdi, %rsi @@ -299,6 +301,7 @@ ASM_SYMBOL_INTERCEPTOR(sigsetjmp): #elif defined(__APPLE__) lea 32(%rsp), %rdi mov %rdi, %rsi + xorq ___tsan_darwin_setjmp_xor_key(%rip), %rsi #elif defined(__linux__) lea 32(%rsp), %rdi mov %rdi, %rsi diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt index 8e18f5565..4bb25d454 100644 --- a/lib/xray/CMakeLists.txt +++ b/lib/xray/CMakeLists.txt @@ -147,7 +147,7 @@ if (APPLE) add_compiler_rt_object_libraries(RTXray OS ${XRAY_SUPPORTED_OS} ARCHS ${XRAY_SUPPORTED_ARCH} - SOURCES ${x86_64_SOURCES} + SOURCES ${XRAY_SOURCES} ${x86_64_SOURCES} ADDITIONAL_HEADERS ${XRAY_IMPL_HEADERS} CFLAGS ${XRAY_CFLAGS} DEFS ${XRAY_COMMON_DEFINITIONS} diff --git a/lib/xray/tests/CMakeLists.txt b/lib/xray/tests/CMakeLists.txt index 11f373167..2f167e3ae 100644 --- a/lib/xray/tests/CMakeLists.txt +++ b/lib/xray/tests/CMakeLists.txt @@ -19,9 +19,16 @@ set(XRAY_UNITTEST_CFLAGS ${XRAY_CFLAGS} ${COMPILER_RT_UNITTEST_CFLAGS} ${COMPILER_RT_GTEST_CFLAGS} + ${COMPILER_RT_GMOCK_CFLAGS} -I${COMPILER_RT_SOURCE_DIR}/include -I${COMPILER_RT_SOURCE_DIR}/lib/xray - -I${COMPILER_RT_SOURCE_DIR}/lib) + -I${COMPILER_RT_SOURCE_DIR}/lib + ) + +# We add the include directories one at a time in our CFLAGS. +foreach (DIR ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) + list(APPEND XRAY_UNITTEST_CFLAGS -I${DIR}) +endforeach() function(add_xray_lib library) add_library(${library} STATIC ${ARGN}) @@ -42,10 +49,27 @@ endfunction() set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH}) set(XRAY_UNITTEST_LINK_FLAGS ${CMAKE_THREAD_LIBS_INIT} - -l${SANITIZER_CXX_ABI_LIBRARY} - -fxray-instrument - ) + -l${SANITIZER_CXX_ABI_LIBRARY}) + if (NOT APPLE) + # Needed by LLVMSupport. + append_list_if( + COMPILER_RT_HAS_TERMINFO + -l${COMPILER_RT_TERMINFO_LIB} XRAY_UNITTEST_LINK_FLAGS) + + if (COMPILER_RT_STANDALONE_BUILD) + append_list_if(COMPILER_RT_HAS_LLVMXRAY ${LLVM_XRAY_LDFLAGS} XRAY_UNITTEST_LINK_FLAGS) + append_list_if(COMPILER_RT_HAS_LLVMXRAY ${LLVM_XRAY_LIBLIST} XRAY_UNITTEST_LINK_FLAGS) + else() + # We add the library directories one at a time in our CFLAGS. + foreach (DIR ${LLVM_LIBRARY_DIR}) + list(APPEND XRAY_UNITTEST_LINK_FLAGS -L${DIR}) + endforeach() + + # We also add the actual libraries to link as dependencies. + list(APPEND XRAY_UNITTEST_LINK_FLAGS -lLLVMXRay -lLLVMSupport -lLLVMTestingSupport) + endif() + append_list_if(COMPILER_RT_HAS_LIBM -lm XRAY_UNITTEST_LINK_FLAGS) append_list_if(COMPILER_RT_HAS_LIBRT -lrt XRAY_UNITTEST_LINK_FLAGS) append_list_if(COMPILER_RT_HAS_LIBDL -ldl XRAY_UNITTEST_LINK_FLAGS) @@ -62,17 +86,21 @@ macro(add_xray_unittest testname) generate_compiler_rt_tests(TEST_OBJECTS XRayUnitTests "${testname}-${arch}-Test" "${arch}" SOURCES ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE} + ${COMPILER_RT_GMOCK_SOURCE} + # Note that any change in the implementations will cause all the unit # tests to be re-built. This is by design, but may be cumbersome during # the build/test cycle. COMPILE_DEPS ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE} ${XRAY_HEADERS} ${XRAY_ALL_SOURCE_FILES_ABS_PATHS} RUNTIME "${XRAY_RUNTIME_LIBS}" - DEPS gtest xray llvm-xray + DEPS gtest xray llvm-xray LLVMXRay LLVMTestingSupport CFLAGS ${XRAY_UNITTEST_CFLAGS} - LINK_FLAGS ${TARGET_LINK_FLAGS} ${XRAY_UNITTEST_LINK_FLAGS}) + LINK_FLAGS ${TARGET_LINK_FLAGS} ${XRAY_UNITTEST_LINK_FLAGS} + ) set_target_properties(XRayUnitTests - PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endforeach() endif() endmacro() diff --git a/lib/xray/tests/unit/CMakeLists.txt b/lib/xray/tests/unit/CMakeLists.txt index b42eb50d0..d0ead947d 100644 --- a/lib/xray/tests/unit/CMakeLists.txt +++ b/lib/xray/tests/unit/CMakeLists.txt @@ -1,12 +1,8 @@ -add_xray_unittest(XRayBufferQueueTest SOURCES - buffer_queue_test.cc xray_unit_test_main.cc) -add_xray_unittest(XRayFDRLoggingTest SOURCES - fdr_logging_test.cc xray_unit_test_main.cc) -add_xray_unittest(XRayAllocatorTest SOURCES - allocator_test.cc xray_unit_test_main.cc) -add_xray_unittest(XRaySegmentedArrayTest SOURCES - segmented_array_test.cc xray_unit_test_main.cc) -add_xray_unittest(XRayFunctionCallTrieTest SOURCES - function_call_trie_test.cc xray_unit_test_main.cc) -add_xray_unittest(XRayProfileCollectorTest SOURCES - profile_collector_test.cc xray_unit_test_main.cc) +add_xray_unittest(XRayTest SOURCES + buffer_queue_test.cc + allocator_test.cc + segmented_array_test.cc + function_call_trie_test.cc + profile_collector_test.cc + fdr_log_writer_test.cc + xray_unit_test_main.cc) diff --git a/lib/xray/tests/unit/fdr_log_writer_test.cc b/lib/xray/tests/unit/fdr_log_writer_test.cc new file mode 100644 index 000000000..3a2138cd8 --- /dev/null +++ b/lib/xray/tests/unit/fdr_log_writer_test.cc @@ -0,0 +1,92 @@ +//===-- fdr_log_writer_test.cc --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#include <time.h> + +#include "xray/xray_records.h" +#include "xray_fdr_log_writer.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/XRay/Trace.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace __xray { +namespace { + +static constexpr size_t kSize = 4096; + +using ::llvm::HasValue; +using ::testing::Eq; +using ::testing::SizeIs; + +// Exercise the common code path where we initialize a buffer and are able to +// write some records successfully. +TEST(FdrLogWriterTest, WriteSomeRecords) { + bool Success = false; + BufferQueue Buffers(kSize, 1, Success); + BufferQueue::Buffer B; + ASSERT_EQ(Buffers.getBuffer(B), BufferQueue::ErrorCode::Ok); + + FDRLogWriter Writer(B); + MetadataRecord Preamble[] = { + createMetadataRecord<MetadataRecord::RecordKinds::NewBuffer>(int32_t{1}), + createMetadataRecord<MetadataRecord::RecordKinds::WalltimeMarker>( + int64_t{1}, int32_t{2}), + createMetadataRecord<MetadataRecord::RecordKinds::Pid>(int32_t{1}), + }; + ASSERT_THAT(Writer.writeMetadataRecords(Preamble), + Eq(sizeof(MetadataRecord) * 3)); + ASSERT_TRUE(Writer.writeMetadata<MetadataRecord::RecordKinds::NewCPUId>(1)); + ASSERT_TRUE( + Writer.writeFunction(FDRLogWriter::FunctionRecordKind::Enter, 1, 1)); + ASSERT_TRUE( + Writer.writeFunction(FDRLogWriter::FunctionRecordKind::Exit, 1, 1)); + ASSERT_EQ(Buffers.releaseBuffer(B), BufferQueue::ErrorCode::Ok); + ASSERT_EQ(B.Data, nullptr); + ASSERT_EQ(Buffers.finalize(), BufferQueue::ErrorCode::Ok); + + // We then need to go through each element of the Buffers, and re-create a + // flat buffer that we would see if they were laid out in a file. This also + // means we need to write out the header manually. + // TODO: Isolate the file header writing. + std::string Serialized; + std::aligned_storage<sizeof(XRayFileHeader), alignof(XRayFileHeader)>::type + HeaderStorage; + auto *Header = reinterpret_cast<XRayFileHeader *>(&HeaderStorage); + new (Header) XRayFileHeader(); + Header->Version = 3; + Header->Type = FileTypes::FDR_LOG; + Header->CycleFrequency = 3e9; + Header->ConstantTSC = 1; + Header->NonstopTSC = 1; + Serialized.append(reinterpret_cast<const char *>(&HeaderStorage), + sizeof(XRayFileHeader)); + size_t BufferCount = 0; + Buffers.apply([&](const BufferQueue::Buffer &B) { + ++BufferCount; + auto Size = atomic_load_relaxed(&B.Extents); + auto Extents = + createMetadataRecord<MetadataRecord::RecordKinds::BufferExtents>(Size); + Serialized.append(reinterpret_cast<const char *>(&Extents), + sizeof(Extents)); + Serialized.append(reinterpret_cast<const char *>(B.Data), Size); + }); + ASSERT_EQ(BufferCount, 1u); + + llvm::DataExtractor DE(Serialized, true, 8); + auto TraceOrErr = llvm::xray::loadTrace(DE); + EXPECT_THAT_EXPECTED(TraceOrErr, HasValue(SizeIs(2))); +} + +} // namespace +} // namespace __xray diff --git a/lib/xray/tests/unit/fdr_logging_test.cc b/lib/xray/tests/unit/fdr_logging_test.cc deleted file mode 100644 index b6961efbc..000000000 --- a/lib/xray/tests/unit/fdr_logging_test.cc +++ /dev/null @@ -1,202 +0,0 @@ -//===-- fdr_logging_test.cc -----------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a function call tracing system. -// -//===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_common.h" -#include "xray_fdr_logging.h" -#include "gtest/gtest.h" - -#include <array> -#include <fcntl.h> -#include <iostream> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/syscall.h> -#include <sys/types.h> -#include <system_error> -#include <thread> -#include <unistd.h> - -#include "xray/xray_records.h" - -namespace __xray { -namespace { - -constexpr auto kBufferSize = 16384; -constexpr auto kBufferMax = 10; - -struct ScopedFileCloserAndDeleter { - explicit ScopedFileCloserAndDeleter(int Fd, const char *Filename) - : Fd(Fd), Filename(Filename) {} - - ~ScopedFileCloserAndDeleter() { - if (Map) - munmap(Map, Size); - if (Fd) { - close(Fd); - unlink(Filename); - } - } - - void registerMap(void *M, size_t S) { - Map = M; - Size = S; - } - - int Fd; - const char *Filename; - void *Map = nullptr; - size_t Size = 0; -}; - -TEST(FDRLoggingTest, Simple) { - FDRLoggingOptions Options; - Options.ReportErrors = true; - char TmpFilename[] = "fdr-logging-test.XXXXXX"; - Options.Fd = mkstemp(TmpFilename); - ASSERT_NE(Options.Fd, -1); - ASSERT_EQ(fdrLoggingInit(kBufferSize, kBufferMax, &Options, - sizeof(FDRLoggingOptions)), - XRayLogInitStatus::XRAY_LOG_INITIALIZED); - fdrLoggingHandleArg0(1, XRayEntryType::ENTRY); - fdrLoggingHandleArg0(1, XRayEntryType::EXIT); - ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED); - ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED); - - // To do this properly, we have to close the file descriptor then re-open the - // file for reading this time. - ASSERT_EQ(close(Options.Fd), 0); - int Fd = open(TmpFilename, O_RDONLY); - ASSERT_NE(-1, Fd); - ScopedFileCloserAndDeleter Guard(Fd, TmpFilename); - auto Size = lseek(Fd, 0, SEEK_END); - ASSERT_NE(Size, 0); - // Map the file contents. - void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0); - const char *Contents = static_cast<const char *>(Map); - Guard.registerMap(Map, Size); - ASSERT_NE(Contents, nullptr); - - XRayFileHeader H; - memcpy(&H, Contents, sizeof(XRayFileHeader)); - ASSERT_EQ(H.Version, 3); - ASSERT_EQ(H.Type, FileTypes::FDR_LOG); - - // We require one buffer at least to have the "extents" metadata record, - // followed by the NewBuffer record. - MetadataRecord MDR0, MDR1; - memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord)); - memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord), - sizeof(MetadataRecord)); - ASSERT_EQ(MDR0.RecordKind, - uint8_t(MetadataRecord::RecordKinds::BufferExtents)); - ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer)); -} - -TEST(FDRLoggingTest, Multiple) { - FDRLoggingOptions Options; - char TmpFilename[] = "fdr-logging-test.XXXXXX"; - Options.Fd = mkstemp(TmpFilename); - ASSERT_NE(Options.Fd, -1); - ASSERT_EQ(fdrLoggingInit(kBufferSize, kBufferMax, &Options, - sizeof(FDRLoggingOptions)), - XRayLogInitStatus::XRAY_LOG_INITIALIZED); - for (uint64_t I = 0; I < 100; ++I) { - fdrLoggingHandleArg0(1, XRayEntryType::ENTRY); - fdrLoggingHandleArg0(1, XRayEntryType::EXIT); - } - ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED); - ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED); - - // To do this properly, we have to close the file descriptor then re-open the - // file for reading this time. - ASSERT_EQ(close(Options.Fd), 0); - int Fd = open(TmpFilename, O_RDONLY); - ASSERT_NE(-1, Fd); - ScopedFileCloserAndDeleter Guard(Fd, TmpFilename); - auto Size = lseek(Fd, 0, SEEK_END); - ASSERT_NE(Size, 0); - // Map the file contents. - void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0); - const char *Contents = static_cast<const char *>(Map); - Guard.registerMap(Map, Size); - ASSERT_NE(Contents, nullptr); - - XRayFileHeader H; - memcpy(&H, Contents, sizeof(XRayFileHeader)); - ASSERT_EQ(H.Version, 3); - ASSERT_EQ(H.Type, FileTypes::FDR_LOG); - - MetadataRecord MDR0, MDR1; - memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord)); - memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord), - sizeof(MetadataRecord)); - ASSERT_EQ(MDR0.RecordKind, - uint8_t(MetadataRecord::RecordKinds::BufferExtents)); - ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer)); -} - -TEST(FDRLoggingTest, MultiThreadedCycling) { - FDRLoggingOptions Options; - char TmpFilename[] = "fdr-logging-test.XXXXXX"; - Options.Fd = mkstemp(TmpFilename); - ASSERT_NE(Options.Fd, -1); - ASSERT_EQ(fdrLoggingInit(kBufferSize, 1, &Options, sizeof(FDRLoggingOptions)), - XRayLogInitStatus::XRAY_LOG_INITIALIZED); - - // Now we want to create one thread, do some logging, then create another one, - // in succession and making sure that we're able to get thread records from - // the latest thread (effectively being able to recycle buffers). - std::array<tid_t, 2> Threads; - for (uint64_t I = 0; I < 2; ++I) { - std::thread t{[I, &Threads] { - fdrLoggingHandleArg0(I + 1, XRayEntryType::ENTRY); - fdrLoggingHandleArg0(I + 1, XRayEntryType::EXIT); - Threads[I] = GetTid(); - }}; - t.join(); - } - ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED); - ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED); - - // To do this properly, we have to close the file descriptor then re-open the - // file for reading this time. - ASSERT_EQ(close(Options.Fd), 0); - int Fd = open(TmpFilename, O_RDONLY); - ASSERT_NE(-1, Fd); - ScopedFileCloserAndDeleter Guard(Fd, TmpFilename); - auto Size = lseek(Fd, 0, SEEK_END); - ASSERT_NE(Size, 0); - // Map the file contents. - void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0); - const char *Contents = static_cast<const char *>(Map); - Guard.registerMap(Map, Size); - ASSERT_NE(Contents, nullptr); - - XRayFileHeader H; - memcpy(&H, Contents, sizeof(XRayFileHeader)); - ASSERT_EQ(H.Version, 3); - ASSERT_EQ(H.Type, FileTypes::FDR_LOG); - - MetadataRecord MDR0, MDR1; - memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord)); - memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord), - sizeof(MetadataRecord)); - ASSERT_EQ(MDR0.RecordKind, - uint8_t(MetadataRecord::RecordKinds::BufferExtents)); - ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer)); - int32_t Latest = 0; - memcpy(&Latest, MDR1.Data, sizeof(int32_t)); - ASSERT_EQ(Latest, static_cast<int32_t>(Threads[1])); -} - -} // namespace -} // namespace __xray diff --git a/lib/xray/xray_allocator.h b/lib/xray/xray_allocator.h index 270262118..f77bccbd9 100644 --- a/lib/xray/xray_allocator.h +++ b/lib/xray/xray_allocator.h @@ -20,6 +20,7 @@ #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_mutex.h" #include "sanitizer_common/sanitizer_posix.h" +#include "xray_defs.h" #include "xray_utils.h" #include <cstddef> #include <cstdint> @@ -27,6 +28,62 @@ namespace __xray { +// We implement our own memory allocation routine which will bypass the +// internal allocator. This allows us to manage the memory directly, using +// mmap'ed memory to back the allocators. +template <class T> T *allocate() XRAY_NEVER_INSTRUMENT { + uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached()); + uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + int ErrNo; + if (UNLIKELY(internal_iserror(B, &ErrNo))) { + if (Verbosity()) + Report( + "XRay Profiling: Failed to allocate memory of size %d; Error = %d.\n", + RoundedSize, B); + return nullptr; + } + return reinterpret_cast<T *>(B); +} + +template <class T> void deallocate(T *B) XRAY_NEVER_INSTRUMENT { + if (B == nullptr) + return; + uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached()); + internal_munmap(B, RoundedSize); +} + +template <class T = uint8_t> T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT { + uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached()); + uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + int ErrNo; + if (UNLIKELY(internal_iserror(B, &ErrNo))) { + if (Verbosity()) + Report( + "XRay Profiling: Failed to allocate memory of size %d; Error = %d.\n", + RoundedSize, B); + return nullptr; + } + return reinterpret_cast<T *>(B); +} + +template <class T> void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT { + if (B == nullptr) + return; + uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached()); + internal_munmap(B, RoundedSize); +} + +template <class T, class... U> +T *initArray(size_t N, U &&... Us) XRAY_NEVER_INSTRUMENT { + auto A = allocateBuffer<T>(N); + if (A != nullptr) + while (N > 0) + new (A + (--N)) T(std::forward<U>(Us)...); + return A; +} + /// The Allocator type hands out fixed-sized chunks of memory that are /// cache-line aligned and sized. This is useful for placement of /// performance-sensitive data in memory that's frequently accessed. The @@ -54,19 +111,16 @@ template <size_t N> struct Allocator { private: const size_t MaxMemory{0}; - void *BackingStore = nullptr; - void *AlignedNextBlock = nullptr; + uint8_t *BackingStore = nullptr; + uint8_t *AlignedNextBlock = nullptr; size_t AllocatedBlocks = 0; SpinMutex Mutex{}; - void *Alloc() { + void *Alloc() XRAY_NEVER_INSTRUMENT { SpinMutexLock Lock(&Mutex); if (UNLIKELY(BackingStore == nullptr)) { - BackingStore = reinterpret_cast<void *>( - internal_mmap(NULL, MaxMemory, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0)); - if (BackingStore == MAP_FAILED) { - BackingStore = nullptr; + BackingStore = allocateBuffer(MaxMemory); + if (BackingStore == nullptr) { if (Verbosity()) Report("XRay Profiling: Failed to allocate memory for allocator.\n"); return nullptr; @@ -79,7 +133,7 @@ private: auto AlignedNextBlockNum = nearest_boundary( reinterpret_cast<uintptr_t>(AlignedNextBlock), kCacheLineSize); if (diff(AlignedNextBlockNum, BackingStoreNum) > ptrdiff_t(MaxMemory)) { - munmap(BackingStore, MaxMemory); + deallocateBuffer(BackingStore, MaxMemory); AlignedNextBlock = BackingStore = nullptr; if (Verbosity()) Report("XRay Profiling: Cannot obtain enough memory from " @@ -87,7 +141,7 @@ private: return nullptr; } - AlignedNextBlock = reinterpret_cast<void *>(AlignedNextBlockNum); + AlignedNextBlock = reinterpret_cast<uint8_t *>(AlignedNextBlockNum); // Assert that AlignedNextBlock is cache-line aligned. DCHECK_EQ(reinterpret_cast<uintptr_t>(AlignedNextBlock) % kCacheLineSize, @@ -100,21 +154,21 @@ private: // Align the pointer we'd like to return to an appropriate alignment, then // advance the pointer from where to start allocations. void *Result = AlignedNextBlock; - AlignedNextBlock = reinterpret_cast<void *>( - reinterpret_cast<char *>(AlignedNextBlock) + N); + AlignedNextBlock = reinterpret_cast<uint8_t *>( + reinterpret_cast<uint8_t *>(AlignedNextBlock) + N); ++AllocatedBlocks; return Result; } public: - explicit Allocator(size_t M) + explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT : MaxMemory(nearest_boundary(M, kCacheLineSize)) {} - Block Allocate() { return {Alloc()}; } + Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; } - ~Allocator() NOEXCEPT { + ~Allocator() NOEXCEPT XRAY_NEVER_INSTRUMENT { if (BackingStore != nullptr) { - internal_munmap(BackingStore, MaxMemory); + deallocateBuffer(BackingStore, MaxMemory); } } }; diff --git a/lib/xray/xray_basic_logging.cc b/lib/xray/xray_basic_logging.cc index 585ca641c..ee28d598f 100644 --- a/lib/xray/xray_basic_logging.cc +++ b/lib/xray/xray_basic_logging.cc @@ -38,8 +38,9 @@ namespace __xray { -SpinMutex LogMutex; +static SpinMutex LogMutex; +namespace { // We use elements of this type to record the entry TSC of every function ID we // see as we're tracing a particular thread's execution. struct alignas(16) StackEntry { @@ -62,11 +63,18 @@ struct alignas(64) ThreadLocalData { int Fd = -1; }; +struct BasicLoggingOptions { + int DurationFilterMicros = 0; + size_t MaxStackDepth = 0; + size_t ThreadBufferSize = 0; +}; +} // namespace + static pthread_key_t PThreadKey; static atomic_uint8_t BasicInitialized{0}; -BasicLoggingOptions GlobalOptions; +struct BasicLoggingOptions GlobalOptions; thread_local atomic_uint8_t Guard{0}; @@ -360,8 +368,8 @@ static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { fsync(TLD.Fd); } -XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax, - void *Options, +XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize, + UNUSED size_t BufferMax, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, @@ -385,43 +393,32 @@ XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax, "using emulation instead.\n"); }); - if (BufferSize == 0 && BufferMax == 0 && Options != nullptr) { - FlagParser P; - BasicFlags F; - F.setDefaults(); - registerXRayBasicFlags(&P, &F); - P.ParseString(useCompilerDefinedBasicFlags()); - auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); - if (EnvOpts == nullptr) - EnvOpts = ""; - - P.ParseString(EnvOpts); - - // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options - // set through XRAY_OPTIONS instead. - if (internal_strlen(EnvOpts) == 0) { - F.func_duration_threshold_us = - flags()->xray_naive_log_func_duration_threshold_us; - F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; - F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; - } - - P.ParseString(static_cast<const char *>(Options)); - GlobalOptions.ThreadBufferSize = F.thread_buffer_size; - GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; - GlobalOptions.MaxStackDepth = F.max_stack_depth; - *basicFlags() = F; - } else if (OptionsSize != sizeof(BasicLoggingOptions)) { - Report("Invalid options size, potential ABI mismatch; expected %d got %d", - sizeof(BasicLoggingOptions), OptionsSize); - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } else { - if (Verbosity()) - Report("XRay Basic: struct-based init is deprecated, please use " - "string-based configuration instead.\n"); - GlobalOptions = *reinterpret_cast<BasicLoggingOptions *>(Options); + FlagParser P; + BasicFlags F; + F.setDefaults(); + registerXRayBasicFlags(&P, &F); + P.ParseString(useCompilerDefinedBasicFlags()); + auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); + if (EnvOpts == nullptr) + EnvOpts = ""; + + P.ParseString(EnvOpts); + + // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options + // set through XRAY_OPTIONS instead. + if (internal_strlen(EnvOpts) == 0) { + F.func_duration_threshold_us = + flags()->xray_naive_log_func_duration_threshold_us; + F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; + F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; } + P.ParseString(static_cast<const char *>(Options)); + GlobalOptions.ThreadBufferSize = F.thread_buffer_size; + GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; + GlobalOptions.MaxStackDepth = F.max_stack_depth; + *basicFlags() = F; + atomic_store(&ThresholdTicks, atomic_load(&TicksPerSec, memory_order_acquire) * GlobalOptions.DurationFilterMicros / 1000000, diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc index 5bdd9148d..5a88ecd33 100644 --- a/lib/xray/xray_buffer_queue.cc +++ b/lib/xray/xray_buffer_queue.cc @@ -16,68 +16,42 @@ #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_posix.h" +#include "xray_allocator.h" +#include "xray_defs.h" #include <memory> #include <sys/mman.h> using namespace __xray; using namespace __sanitizer; -template <class T> static T *allocRaw(size_t N) { - // TODO: Report errors? - void *A = reinterpret_cast<void *>( - internal_mmap(NULL, N * sizeof(T), PROT_WRITE | PROT_READ, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); - return (A == MAP_FAILED) ? nullptr : reinterpret_cast<T *>(A); -} - -template <class T> static void deallocRaw(T *ptr, size_t N) { - // TODO: Report errors? - if (ptr != nullptr) - internal_munmap(ptr, N); -} - -template <class T> static T *initArray(size_t N) { - auto A = allocRaw<T>(N); - if (A != nullptr) - while (N > 0) - new (A + (--N)) T(); - return A; -} - -BufferQueue::BufferQueue(size_t B, size_t N, bool &Success) - : BufferSize(B), Buffers(initArray<BufferQueue::BufferRep>(N)), - BufferCount(N), Finalizing{0}, OwnedBuffers(initArray<void *>(N)), - Next(Buffers), First(Buffers), LiveBuffers(0) { - if (Buffers == nullptr) { +BufferQueue::BufferQueue(size_t B, size_t N, + bool &Success) XRAY_NEVER_INSTRUMENT + : BufferSize(B), + BufferCount(N), + Mutex(), + Finalizing{0}, + BackingStore(allocateBuffer(B *N)), + Buffers(initArray<BufferQueue::BufferRep>(N)), + Next(Buffers), + First(Buffers), + LiveBuffers(0) { + if (BackingStore == nullptr) { Success = false; return; } - if (OwnedBuffers == nullptr) { - // Clean up the buffers we've already allocated. - for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) - B->~BufferRep(); - deallocRaw(Buffers, N); + if (Buffers == nullptr) { + deallocateBuffer(BackingStore, BufferSize * BufferCount); Success = false; return; - }; + } for (size_t i = 0; i < N; ++i) { auto &T = Buffers[i]; - void *Tmp = allocRaw<char>(BufferSize); - if (Tmp == nullptr) { - Success = false; - return; - } - auto *Extents = allocRaw<BufferExtents>(1); - if (Extents == nullptr) { - Success = false; - return; - } auto &Buf = T.Buff; - Buf.Data = Tmp; + Buf.Data = reinterpret_cast<char *>(BackingStore) + (BufferSize * i); Buf.Size = B; - Buf.Extents = Extents; - OwnedBuffers[i] = Tmp; + atomic_store(&Buf.Extents, 0, memory_order_release); + T.Used = false; } Success = true; } @@ -85,13 +59,17 @@ BufferQueue::BufferQueue(size_t B, size_t N, bool &Success) BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) { if (atomic_load(&Finalizing, memory_order_acquire)) return ErrorCode::QueueFinalizing; + SpinMutexLock Guard(&Mutex); if (LiveBuffers == BufferCount) return ErrorCode::NotEnoughMemory; auto &T = *Next; auto &B = T.Buff; - Buf = B; + auto Extents = atomic_load(&B.Extents, memory_order_acquire); + atomic_store(&Buf.Extents, Extents, memory_order_release); + Buf.Data = B.Data; + Buf.Size = B.Size; T.Used = true; ++LiveBuffers; @@ -102,15 +80,11 @@ BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) { } BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) { - // Blitz through the buffers array to find the buffer. - bool Found = false; - for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) { - if (*I == Buf.Data) { - Found = true; - break; - } - } - if (!Found) + // Check whether the buffer being referred to is within the bounds of the + // backing store's range. + if (Buf.Data < BackingStore || + Buf.Data > + reinterpret_cast<char *>(BackingStore) + (BufferCount * BufferSize)) return ErrorCode::UnrecognizedBuffer; SpinMutexLock Guard(&Mutex); @@ -121,10 +95,14 @@ BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) { return ErrorCode::NotEnoughMemory; // Now that the buffer has been released, we mark it as "used". - First->Buff = Buf; + auto Extents = atomic_load(&Buf.Extents, memory_order_acquire); + atomic_store(&First->Buff.Extents, Extents, memory_order_release); + First->Buff.Data = Buf.Data; + First->Buff.Size = Buf.Size; First->Used = true; Buf.Data = nullptr; Buf.Size = 0; + atomic_store(&Buf.Extents, 0, memory_order_release); --LiveBuffers; if (++First == (Buffers + BufferCount)) First = Buffers; @@ -139,14 +117,8 @@ BufferQueue::ErrorCode BufferQueue::finalize() { } BufferQueue::~BufferQueue() { - for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) { - auto &T = *I; - auto &Buf = T.Buff; - deallocRaw(Buf.Data, Buf.Size); - deallocRaw(Buf.Extents, 1); - } for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) B->~BufferRep(); - deallocRaw(Buffers, BufferCount); - deallocRaw(OwnedBuffers, BufferCount); + deallocateBuffer(Buffers, BufferCount); + deallocateBuffer(BackingStore, BufferSize * BufferCount); } diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h index e76fa7983..c1fa9fab7 100644 --- a/lib/xray/xray_buffer_queue.h +++ b/lib/xray/xray_buffer_queue.h @@ -18,7 +18,9 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_mutex.h" +#include "xray_defs.h" #include <cstddef> +#include <cstdint> namespace __xray { @@ -29,14 +31,10 @@ namespace __xray { /// trace collection. class BufferQueue { public: - struct alignas(64) BufferExtents { - atomic_uint64_t Size; - }; - struct Buffer { + atomic_uint64_t Extents{0}; void *Data = nullptr; size_t Size = 0; - BufferExtents *Extents; }; struct BufferRep { @@ -76,8 +74,10 @@ private: T *operator->() const { return &(Buffers[Offset].Buff); } - Iterator(BufferRep *Root, size_t O, size_t M) - : Buffers(Root), Offset(O), Max(M) { + Iterator(BufferRep *Root, size_t O, size_t M) XRAY_NEVER_INSTRUMENT + : Buffers(Root), + Offset(O), + Max(M) { // We want to advance to the first Offset where the 'Used' property is // true, or to the end of the list/queue. while (!Buffers[Offset].Used && Offset != Max) { @@ -107,16 +107,18 @@ private: // Size of each individual Buffer. size_t BufferSize; - BufferRep *Buffers; - // Amount of pre-allocated buffers. size_t BufferCount; SpinMutex Mutex; atomic_uint8_t Finalizing; - // Pointers to buffers managed/owned by the BufferQueue. - void **OwnedBuffers; + // A pointer to a contiguous block of memory to serve as the backing store for + // all the individual buffers handed out. + uint8_t *BackingStore; + + // A dynamically allocated array of BufferRep instances. + BufferRep *Buffers; // Pointer to the next buffer to be handed out. BufferRep *Next; @@ -198,7 +200,7 @@ public: /// Applies the provided function F to each Buffer in the queue, only if the /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a /// releaseBuffer(...) operation). - template <class F> void apply(F Fn) { + template <class F> void apply(F Fn) XRAY_NEVER_INSTRUMENT { SpinMutexLock G(&Mutex); for (auto I = begin(), E = end(); I != E; ++I) Fn(*I); diff --git a/lib/xray/xray_fdr_log_records.h b/lib/xray/xray_fdr_log_records.h index 87096d4fc..e7b1ee562 100644 --- a/lib/xray/xray_fdr_log_records.h +++ b/lib/xray/xray_fdr_log_records.h @@ -12,6 +12,9 @@ //===----------------------------------------------------------------------===// #ifndef XRAY_XRAY_FDR_LOG_RECORDS_H #define XRAY_XRAY_FDR_LOG_RECORDS_H +#include <cstdint> + +namespace __xray { enum class RecordType : uint8_t { Function, Metadata }; @@ -68,4 +71,6 @@ struct alignas(8) FunctionRecord { static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord."); +} // namespace __xray + #endif // XRAY_XRAY_FDR_LOG_RECORDS_H diff --git a/lib/xray/xray_fdr_log_writer.h b/lib/xray/xray_fdr_log_writer.h new file mode 100644 index 000000000..28af356e7 --- /dev/null +++ b/lib/xray/xray_fdr_log_writer.h @@ -0,0 +1,119 @@ +//===-- xray_fdr_log_writer.h ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef COMPILER_RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ +#define COMPILER_RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ + +#include "xray_buffer_queue.h" +#include "xray_fdr_log_records.h" +#include <functional> +#include <tuple> +#include <type_traits> +#include <utility> + +namespace __xray { + +template <size_t Index> struct SerializerImpl { + template <class Tuple, + typename std::enable_if< + Index<std::tuple_size< + typename std::remove_reference<Tuple>::type>::value, + int>::type = 0> static void serializeTo(char *Buffer, + Tuple &&T) { + auto P = reinterpret_cast<const char *>(&std::get<Index>(T)); + constexpr auto Size = sizeof(std::get<Index>(T)); + internal_memcpy(Buffer, P, Size); + SerializerImpl<Index + 1>::serializeTo(Buffer + Size, + std::forward<Tuple>(T)); + } + + template <class Tuple, + typename std::enable_if< + Index >= std::tuple_size<typename std::remove_reference< + Tuple>::type>::value, + int>::type = 0> + static void serializeTo(char *, Tuple &&){}; +}; + +using Serializer = SerializerImpl<0>; + +template <MetadataRecord::RecordKinds Kind, class... DataTypes> +MetadataRecord createMetadataRecord(DataTypes &&... Ds) { + MetadataRecord R; + R.Type = 1; + R.RecordKind = static_cast<uint8_t>(Kind); + Serializer::serializeTo(R.Data, + std::make_tuple(std::forward<DataTypes>(Ds)...)); + return R; +} + +class FDRLogWriter { + BufferQueue::Buffer &Buffer; + char *NextRecord = nullptr; + + template <class T> void writeRecord(const T &R) { + internal_memcpy(NextRecord, reinterpret_cast<const char *>(&R), sizeof(T)); + NextRecord += sizeof(T); + atomic_fetch_add(&Buffer.Extents, sizeof(T), memory_order_acq_rel); + } + +public: + explicit FDRLogWriter(BufferQueue::Buffer &B, char *P) + : Buffer(B), NextRecord(P) { + DCHECK_NE(Buffer.Data, nullptr); + DCHECK_NE(NextRecord, nullptr); + } + + explicit FDRLogWriter(BufferQueue::Buffer &B) + : FDRLogWriter(B, static_cast<char *>(B.Data)) {} + + template <MetadataRecord::RecordKinds Kind, class... Data> + bool writeMetadata(Data &&... Ds) { + // TODO: Check boundary conditions: + // 1) Buffer is full, and cannot handle one metadata record. + // 2) Buffer queue is finalising. + writeRecord(createMetadataRecord<Kind>(std::forward<Data>(Ds)...)); + return true; + } + + template <size_t N> size_t writeMetadataRecords(MetadataRecord (&Recs)[N]) { + constexpr auto Size = sizeof(MetadataRecord) * N; + internal_memcpy(NextRecord, reinterpret_cast<const char *>(Recs), Size); + NextRecord += Size; + atomic_fetch_add(&Buffer.Extents, Size, memory_order_acq_rel); + return Size; + } + + enum class FunctionRecordKind : uint8_t { + Enter = 0x00, + Exit = 0x01, + TailExit = 0x02, + EnterArg = 0x03, + }; + + bool writeFunction(FunctionRecordKind Kind, int32_t FuncId, int32_t Delta) { + FunctionRecord R; + R.Type = 0; + R.RecordKind = uint8_t(Kind); + R.FuncId = FuncId; + R.TSCDelta = Delta; + writeRecord(R); + return true; + } + + char *getNextRecord() const { return NextRecord; } + +}; // namespace __xray + +} // namespace __xray + +#endif // COMPILER-RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc index 6cb2dfa0c..2d6af443d 100644 --- a/lib/xray/xray_fdr_logging.cc +++ b/lib/xray/xray_fdr_logging.cc @@ -30,9 +30,11 @@ #include "sanitizer_common/sanitizer_common.h" #include "xray/xray_interface.h" #include "xray/xray_records.h" +#include "xray_allocator.h" #include "xray_buffer_queue.h" #include "xray_defs.h" #include "xray_fdr_flags.h" +#include "xray_fdr_log_writer.h" #include "xray_flags.h" #include "xray_recursion_guard.h" #include "xray_tsc.h" @@ -40,14 +42,16 @@ namespace __xray { -atomic_sint32_t LoggingStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; +static atomic_sint32_t LoggingStatus = { + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; +namespace { // Group together thread-local-data in a struct, then hide it behind a function // call so that it can be initialized on first use instead of as a global. We // force the alignment to 64-bytes for x86 cache line alignment, as this // structure is used in the hot path of implementation. struct alignas(64) ThreadLocalData { - BufferQueue::Buffer Buffer; + BufferQueue::Buffer Buffer{}; char *RecordPtr = nullptr; // The number of FunctionEntry records immediately preceding RecordPtr. uint8_t NumConsecutiveFnEnters = 0; @@ -70,6 +74,7 @@ struct alignas(64) ThreadLocalData { // FDRLogging, and that we're going to clean it up when the thread exits. BufferQueue *BQ = nullptr; }; +} // namespace static_assert(std::is_trivially_destructible<ThreadLocalData>::value, "ThreadLocalData must be trivially destructible"); @@ -81,15 +86,12 @@ static constexpr auto FunctionRecSize = sizeof(FunctionRecord); static pthread_key_t Key; // Global BufferQueue. +static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage; static BufferQueue *BQ = nullptr; static atomic_sint32_t LogFlushStatus = { XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; -static FDRLoggingOptions FDROptions; - -static SpinMutex FDROptionsMutex; - // This function will initialize the thread-local data structure used by the FDR // logging implementation and return a reference to it. The implementation // details require a bit of care to maintain. @@ -140,60 +142,35 @@ static ThreadLocalData &getThreadLocalData() { static void writeNewBufferPreamble(tid_t Tid, timespec TS, pid_t Pid) XRAY_NEVER_INSTRUMENT { - static constexpr int InitRecordsCount = 3; + static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes"); auto &TLD = getThreadLocalData(); - MetadataRecord Metadata[InitRecordsCount]; - { - // Write out a MetadataRecord to signify that this is the start of a new - // buffer, associated with a particular thread, with a new CPU. For the - // data, we have 15 bytes to squeeze as much information as we can. At this - // point we only write down the following bytes: - // - Thread ID (tid_t, cast to 4 bytes type due to Darwin being 8 bytes) - auto &NewBuffer = Metadata[0]; - NewBuffer.Type = uint8_t(RecordType::Metadata); - NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer); - int32_t tid = static_cast<int32_t>(Tid); - internal_memcpy(&NewBuffer.Data, &tid, sizeof(tid)); - } - - // Also write the WalltimeMarker record. - { - static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes"); - auto &WalltimeMarker = Metadata[1]; - WalltimeMarker.Type = uint8_t(RecordType::Metadata); - WalltimeMarker.RecordKind = - uint8_t(MetadataRecord::RecordKinds::WalltimeMarker); - - // We only really need microsecond precision here, and enforce across - // platforms that we need 64-bit seconds and 32-bit microseconds encoded in - // the Metadata record. - int32_t Micros = TS.tv_nsec / 1000; - int64_t Seconds = TS.tv_sec; - internal_memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds)); - internal_memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, - sizeof(Micros)); - } - - // Also write the Pid record. - { - // Write out a MetadataRecord that contains the current pid - auto &PidMetadata = Metadata[2]; - PidMetadata.Type = uint8_t(RecordType::Metadata); - PidMetadata.RecordKind = uint8_t(MetadataRecord::RecordKinds::Pid); - int32_t pid = static_cast<int32_t>(Pid); - internal_memcpy(&PidMetadata.Data, &pid, sizeof(pid)); - } + MetadataRecord Metadata[] = { + // Write out a MetadataRecord to signify that this is the start of a new + // buffer, associated with a particular thread, with a new CPU. For the + // data, we have 15 bytes to squeeze as much information as we can. At + // this point we only write down the following bytes: + // - Thread ID (tid_t, cast to 4 bytes type due to Darwin being 8 bytes) + createMetadataRecord<MetadataRecord::RecordKinds::NewBuffer>( + static_cast<int32_t>(Tid)), + + // Also write the WalltimeMarker record. We only really need microsecond + // precision here, and enforce across platforms that we need 64-bit + // seconds and 32-bit microseconds encoded in the Metadata record. + createMetadataRecord<MetadataRecord::RecordKinds::WalltimeMarker>( + static_cast<int64_t>(TS.tv_sec), + static_cast<int32_t>(TS.tv_nsec / 1000)), + + // Also write the Pid record. + createMetadataRecord<MetadataRecord::RecordKinds::Pid>( + static_cast<int32_t>(Pid)), + }; TLD.NumConsecutiveFnEnters = 0; TLD.NumTailCalls = 0; if (TLD.BQ == nullptr || TLD.BQ->finalizing()) return; - internal_memcpy(TLD.RecordPtr, Metadata, sizeof(Metadata)); - TLD.RecordPtr += sizeof(Metadata); - // Since we write out the extents as the first metadata record of the - // buffer, we need to write out the extents including the extents record. - atomic_store(&TLD.Buffer.Extents->Size, sizeof(Metadata), - memory_order_release); + FDRLogWriter Writer(TLD.Buffer); + TLD.RecordPtr += Writer.writeMetadataRecords(Metadata); } static void setupNewBuffer(int (*wall_clock_reader)( @@ -201,6 +178,7 @@ static void setupNewBuffer(int (*wall_clock_reader)( auto &TLD = getThreadLocalData(); auto &B = TLD.Buffer; TLD.RecordPtr = static_cast<char *>(B.Data); + atomic_store(&B.Extents, 0, memory_order_release); tid_t Tid = GetTid(); timespec TS{0, 0}; pid_t Pid = internal_getpid(); @@ -213,91 +191,90 @@ static void setupNewBuffer(int (*wall_clock_reader)( static void incrementExtents(size_t Add) { auto &TLD = getThreadLocalData(); - atomic_fetch_add(&TLD.Buffer.Extents->Size, Add, memory_order_acq_rel); + atomic_fetch_add(&TLD.Buffer.Extents, Add, memory_order_acq_rel); } static void decrementExtents(size_t Subtract) { auto &TLD = getThreadLocalData(); - atomic_fetch_sub(&TLD.Buffer.Extents->Size, Subtract, memory_order_acq_rel); + atomic_fetch_sub(&TLD.Buffer.Extents, Subtract, memory_order_acq_rel); } static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); - MetadataRecord NewCPUId; - NewCPUId.Type = uint8_t(RecordType::Metadata); - NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId); + FDRLogWriter W(TLD.Buffer, TLD.RecordPtr); // The data for the New CPU will contain the following bytes: // - CPU ID (uint16_t, 2 bytes) // - Full TSC (uint64_t, 8 bytes) // Total = 10 bytes. - internal_memcpy(&NewCPUId.Data, &CPU, sizeof(CPU)); - internal_memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC)); - internal_memcpy(TLD.RecordPtr, &NewCPUId, sizeof(MetadataRecord)); - TLD.RecordPtr += sizeof(MetadataRecord); + W.writeMetadata<MetadataRecord::RecordKinds::NewCPUId>(CPU, TSC); + TLD.RecordPtr = W.getNextRecord(); TLD.NumConsecutiveFnEnters = 0; TLD.NumTailCalls = 0; - incrementExtents(sizeof(MetadataRecord)); } static void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); - MetadataRecord TSCWrap; - TSCWrap.Type = uint8_t(RecordType::Metadata); - TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap); + FDRLogWriter W(TLD.Buffer, TLD.RecordPtr); // The data for the TSCWrap record contains the following bytes: // - Full TSC (uint64_t, 8 bytes) // Total = 8 bytes. - internal_memcpy(&TSCWrap.Data, &TSC, sizeof(TSC)); - internal_memcpy(TLD.RecordPtr, &TSCWrap, sizeof(MetadataRecord)); - TLD.RecordPtr += sizeof(MetadataRecord); + W.writeMetadata<MetadataRecord::RecordKinds::TSCWrap>(TSC); + TLD.RecordPtr = W.getNextRecord(); TLD.NumConsecutiveFnEnters = 0; TLD.NumTailCalls = 0; - incrementExtents(sizeof(MetadataRecord)); } // Call Argument metadata records store the arguments to a function in the // order of their appearance; holes are not supported by the buffer format. static void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); - MetadataRecord CallArg; - CallArg.Type = uint8_t(RecordType::Metadata); - CallArg.RecordKind = uint8_t(MetadataRecord::RecordKinds::CallArgument); - - internal_memcpy(CallArg.Data, &A, sizeof(A)); - internal_memcpy(TLD.RecordPtr, &CallArg, sizeof(MetadataRecord)); - TLD.RecordPtr += sizeof(MetadataRecord); - incrementExtents(sizeof(MetadataRecord)); + FDRLogWriter W(TLD.Buffer, TLD.RecordPtr); + W.writeMetadata<MetadataRecord::RecordKinds::CallArgument>(A); + TLD.RecordPtr = W.getNextRecord(); } -static void writeFunctionRecord(int FuncId, uint32_t TSCDelta, +static void writeFunctionRecord(int32_t FuncId, uint32_t TSCDelta, XRayEntryType EntryType) XRAY_NEVER_INSTRUMENT { - FunctionRecord FuncRecord; - FuncRecord.Type = uint8_t(RecordType::Function); - // Only take 28 bits of the function id. - FuncRecord.FuncId = FuncId & ~(0x0F << 28); - FuncRecord.TSCDelta = TSCDelta; + constexpr int32_t MaxFuncId = (1 << 29) - 1; + if (UNLIKELY(FuncId > MaxFuncId)) { + if (Verbosity()) + Report("Warning: Function ID '%d' > max function id: '%d'", FuncId, + MaxFuncId); + return; + } auto &TLD = getThreadLocalData(); + FDRLogWriter W(TLD.Buffer, TLD.RecordPtr); + + // Only take 28 bits of the function id. + // + // We need to be careful about the sign bit and the bitwise operations being + // performed here. In effect, we want to truncate the value of the function id + // to the first 28 bits. To do this properly, this means we need to mask the + // function id with (2 ^ 28) - 1 == 0x0fffffff. + // + auto TruncatedId = FuncId & MaxFuncId; + auto Kind = FDRLogWriter::FunctionRecordKind::Enter; + switch (EntryType) { case XRayEntryType::ENTRY: ++TLD.NumConsecutiveFnEnters; - FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter); break; case XRayEntryType::LOG_ARGS_ENTRY: // We should not rewind functions with logged args. TLD.NumConsecutiveFnEnters = 0; TLD.NumTailCalls = 0; - FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter); + Kind = FDRLogWriter::FunctionRecordKind::EnterArg; break; case XRayEntryType::EXIT: // If we've decided to log the function exit, we will never erase the log // before it. TLD.NumConsecutiveFnEnters = 0; TLD.NumTailCalls = 0; - FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit); + Kind = FDRLogWriter::FunctionRecordKind::Exit; break; case XRayEntryType::TAIL: // If we just entered the function we're tail exiting from or erased every @@ -312,8 +289,7 @@ static void writeFunctionRecord(int FuncId, uint32_t TSCDelta, TLD.NumTailCalls = 0; TLD.NumConsecutiveFnEnters = 0; } - FuncRecord.RecordKind = - uint8_t(FunctionRecord::RecordKinds::FunctionTailExit); + Kind = FDRLogWriter::FunctionRecordKind::TailExit; break; case XRayEntryType::CUSTOM_EVENT: { // This is a bug in patching, so we'll report it once and move on. @@ -334,9 +310,8 @@ static void writeFunctionRecord(int FuncId, uint32_t TSCDelta, } } - internal_memcpy(TLD.RecordPtr, &FuncRecord, sizeof(FunctionRecord)); - TLD.RecordPtr += sizeof(FunctionRecord); - incrementExtents(sizeof(FunctionRecord)); + W.writeFunction(Kind, TruncatedId, TSCDelta); + TLD.RecordPtr = W.getNextRecord(); } static atomic_uint64_t TicksPerSec{0}; @@ -345,7 +320,8 @@ static atomic_uint64_t ThresholdTicks{0}; // Re-point the thread local pointer into this thread's Buffer before the recent // "Function Entry" record and any "Tail Call Exit" records after that. static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC, - uint64_t &LastFunctionEntryTSC, int32_t FuncId) { + uint64_t &LastFunctionEntryTSC, + int32_t FuncId) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); TLD.RecordPtr -= FunctionRecSize; decrementExtents(FunctionRecSize); @@ -410,6 +386,9 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC, static bool releaseThreadLocalBuffer(BufferQueue &BQArg) { auto &TLD = getThreadLocalData(); auto EC = BQArg.releaseBuffer(TLD.Buffer); + if (TLD.Buffer.Data == nullptr) + return true; + if (EC != BufferQueue::ErrorCode::Ok) { Report("Failed to release buffer at %p; error=%s\n", TLD.Buffer.Data, BufferQueue::getErrorString(EC)); @@ -521,6 +500,7 @@ static uint32_t writeCurrentCPUTSC(ThreadLocalData &TLD, uint64_t TSC, writeNewCPUIdMetadata(CPU, TSC); return 0; } + // If the delta is greater than the range for a uint32_t, then we write out // the TSC wrap metadata entry with the full TSC, and the TSC for the // function record be 0. @@ -570,7 +550,7 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry, auto &TLD = getThreadLocalData(); - if (TLD.BQ == nullptr) + if (TLD.BQ == nullptr && BQ != nullptr) TLD.BQ = BQ; if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, wall_clock_reader)) @@ -596,14 +576,16 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry, // 1. When the delta between the TSC we get and the previous TSC for the // same CPU is outside of the uint32_t range, we end up having to // write a MetadataRecord to indicate a "tsc wrap" before the actual - // FunctionRecord. + // FunctionRecord. This means we have: 1 MetadataRecord + 1 Function + // Record. // 2. When we learn that we've moved CPUs, we need to write a // MetadataRecord to indicate a "cpu change", and thus write out the // current TSC for that CPU before writing out the actual - // FunctionRecord. - // 3. When we learn about a new CPU ID, we need to write down a "new cpu - // id" MetadataRecord before writing out the actual FunctionRecord. - // 4. The second MetadataRecord is the optional function call argument. + // FunctionRecord. This means we have: 1 MetadataRecord + 1 Function + // Record. + // 3. Given the previous two cases, in addition we can add at most one + // function argument record. This means we have: 2 MetadataRecord + 1 + // Function Record. // // So the math we need to do is to determine whether writing 40 bytes past the // current pointer exceeds the buffer's maximum size. If we don't have enough @@ -615,20 +597,21 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry, return; } - // By this point, we are now ready to write up to 40 bytes (explained above). - DCHECK((TLD.RecordPtr + MaxSize) - static_cast<char *>(TLD.Buffer.Data) >= - static_cast<ptrdiff_t>(MetadataRecSize) && - "Misconfigured BufferQueue provided; Buffer size not large enough."); - auto RecordTSCDelta = writeCurrentCPUTSC(TLD, TSC, CPU); TLD.LastTSC = TSC; TLD.CurrentCPU = CPU; switch (Entry) { case XRayEntryType::ENTRY: - case XRayEntryType::LOG_ARGS_ENTRY: // Update the thread local state for the next invocation. TLD.LastFunctionEntryTSC = TSC; break; + case XRayEntryType::LOG_ARGS_ENTRY: + // Update the thread local state for the next invocation, but also prevent + // rewinding when we have arguments logged. + TLD.LastFunctionEntryTSC = TSC; + TLD.NumConsecutiveFnEnters = 0; + TLD.NumTailCalls = 0; + break; case XRayEntryType::TAIL: case XRayEntryType::EXIT: // Break out and write the exit record if we can't erase any functions. @@ -741,7 +724,8 @@ XRayBuffer fdrIterator(const XRayBuffer B) { static BufferQueue::const_iterator It{}; static BufferQueue::const_iterator End{}; - static void *CurrentBuffer{nullptr}; + static uint8_t *CurrentBuffer{nullptr}; + static size_t SerializedBufferSize = 0; if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { // From this point on, we provide raw access to the raw buffer we're getting // from the BufferQueue. We're relying on the iterators from the current @@ -751,7 +735,7 @@ XRayBuffer fdrIterator(const XRayBuffer B) { } if (CurrentBuffer != nullptr) { - InternalFree(CurrentBuffer); + deallocateBuffer(CurrentBuffer, SerializedBufferSize); CurrentBuffer = nullptr; } @@ -762,9 +746,9 @@ XRayBuffer fdrIterator(const XRayBuffer B) { // out to disk. The difference here would be that we still write "empty" // buffers, or at least go through the iterators faithfully to let the // handlers see the empty buffers in the queue. - auto BufferSize = atomic_load(&It->Extents->Size, memory_order_acquire); - auto SerializedBufferSize = BufferSize + sizeof(MetadataRecord); - CurrentBuffer = InternalAlloc(SerializedBufferSize); + auto BufferSize = atomic_load(&It->Extents, memory_order_acquire); + SerializedBufferSize = BufferSize + sizeof(MetadataRecord); + CurrentBuffer = allocateBuffer(SerializedBufferSize); if (CurrentBuffer == nullptr) return {nullptr, 0}; @@ -832,7 +816,6 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr) releaseThreadLocalBuffer(*TLD.BQ); BQ->~BufferQueue(); - InternalFree(BQ); BQ = nullptr; } }); @@ -855,15 +838,7 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { // (fixed-sized) and let the tools reading the buffers deal with the data // afterwards. // - int Fd = -1; - { - // FIXME: Remove this section of the code, when we remove the struct-based - // configuration API. - SpinMutexLock Guard(&FDROptionsMutex); - Fd = FDROptions.Fd; - } - if (Fd == -1) - Fd = getLogFD(); + int Fd = getLogFD(); if (Fd == -1) { auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; atomic_store(&LogFlushStatus, Result, memory_order_release); @@ -875,6 +850,13 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { retryingWriteAll(Fd, reinterpret_cast<char *>(&Header), reinterpret_cast<char *>(&Header) + sizeof(Header)); + // Release the current thread's buffer before we attempt to write out all the + // buffers. This ensures that in case we had only a single thread going, that + // we are able to capture the data nonetheless. + auto &TLD = getThreadLocalData(); + if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr) + releaseThreadLocalBuffer(*TLD.BQ); + BQ->apply([&](const BufferQueue::Buffer &B) { // Starting at version 2 of the FDR logging implementation, we only write // the records identified by the extents of the buffer. We use the Extents @@ -882,7 +864,7 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { // still use a Metadata record, but fill in the extents instead for the // data. MetadataRecord ExtentsRecord; - auto BufferExtents = atomic_load(&B.Extents->Size, memory_order_acquire); + auto BufferExtents = atomic_load(&B.Extents, memory_order_acquire); DCHECK(BufferExtents <= B.Size); ExtentsRecord.Type = uint8_t(RecordType::Metadata); ExtentsRecord.RecordKind = @@ -914,7 +896,12 @@ XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { // Do special things to make the log finalize itself, and not allow any more // operations to be performed until re-initialized. - BQ->finalize(); + if (BQ == nullptr) { + if (Verbosity()) + Report("Attempting to finalize an uninitialized global buffer!\n"); + } else { + BQ->finalize(); + } atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, memory_order_release); @@ -986,11 +973,17 @@ void fdrLoggingHandleCustomEvent(void *Event, // - The metadata record we're going to write. (16 bytes) // - The additional data we're going to write. Currently, that's the size // of the event we're going to dump into the log as free-form bytes. - if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) { + if (!prepareBuffer(TSC, CPU, clock_gettime, + MetadataRecSize + ReducedEventSize)) { TLD.BQ = nullptr; return; } + // We need to reset the counts for the number of functions we're able to + // rewind. + TLD.NumConsecutiveFnEnters = 0; + TLD.NumTailCalls = 0; + // Write the custom event metadata record, which consists of the following // information: // - 8 bytes (64-bits) for the full TSC when the event started. @@ -1001,11 +994,12 @@ void fdrLoggingHandleCustomEvent(void *Event, uint8_t(MetadataRecord::RecordKinds::CustomEventMarker); constexpr auto TSCSize = sizeof(TC.TSC); internal_memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t)); - internal_memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize); + internal_memcpy(&CustomEvent.Data + sizeof(int32_t), &TSC, TSCSize); internal_memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent)); TLD.RecordPtr += sizeof(CustomEvent); internal_memcpy(TLD.RecordPtr, Event, ReducedEventSize); - incrementExtents(MetadataRecSize + EventSize); + TLD.RecordPtr += ReducedEventSize; + incrementExtents(MetadataRecSize + ReducedEventSize); endBufferIfFull(); } @@ -1031,7 +1025,8 @@ void fdrLoggingHandleTypedEvent( // - The metadata record we're going to write. (16 bytes) // - The additional data we're going to write. Currently, that's the size // of the event we're going to dump into the log as free-form bytes. - if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) { + if (!prepareBuffer(TSC, CPU, clock_gettime, + MetadataRecSize + ReducedEventSize)) { TLD.BQ = nullptr; return; } @@ -1056,12 +1051,13 @@ void fdrLoggingHandleTypedEvent( TLD.RecordPtr += sizeof(TypedEvent); internal_memcpy(TLD.RecordPtr, Event, ReducedEventSize); + TLD.RecordPtr += ReducedEventSize; incrementExtents(MetadataRecSize + EventSize); endBufferIfFull(); } -XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax, - void *Options, +XRayLogInitStatus fdrLoggingInit(UNUSED size_t BufferSize, + UNUSED size_t BufferMax, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { if (Options == nullptr) return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; @@ -1075,76 +1071,51 @@ XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax, return static_cast<XRayLogInitStatus>(CurrentStatus); } - // Because of __xray_log_init_mode(...) which guarantees that this will be - // called with BufferSize == 0 and BufferMax == 0 we parse the configuration - // provided in the Options pointer as a string instead. - if (BufferSize == 0 && BufferMax == 0) { - if (Verbosity()) - Report("Initializing FDR mode with options: %s\n", - static_cast<const char *>(Options)); - - // TODO: Factor out the flags specific to the FDR mode implementation. For - // now, use the global/single definition of the flags, since the FDR mode - // flags are already defined there. - FlagParser FDRParser; - FDRFlags FDRFlags; - registerXRayFDRFlags(&FDRParser, &FDRFlags); - FDRFlags.setDefaults(); - - // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided - // options until we migrate everyone to use the XRAY_FDR_OPTIONS - // compiler-provided options. - FDRParser.ParseString(useCompilerDefinedFlags()); - FDRParser.ParseString(useCompilerDefinedFDRFlags()); - auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); - if (EnvOpts == nullptr) - EnvOpts = ""; - FDRParser.ParseString(EnvOpts); - - // FIXME: Remove this when we fully remove the deprecated flags. - if (internal_strlen(EnvOpts) == 0) { - FDRFlags.func_duration_threshold_us = - flags()->xray_fdr_log_func_duration_threshold_us; - FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; - } - - // The provided options should always override the compiler-provided and - // environment-variable defined options. - FDRParser.ParseString(static_cast<const char *>(Options)); - *fdrFlags() = FDRFlags; - BufferSize = FDRFlags.buffer_size; - BufferMax = FDRFlags.buffer_max; - SpinMutexLock Guard(&FDROptionsMutex); - FDROptions.Fd = -1; - FDROptions.ReportErrors = true; - } else if (OptionsSize != sizeof(FDRLoggingOptions)) { - // FIXME: This is deprecated, and should really be removed. - // At this point we use the flag parser specific to the FDR mode - // implementation. - if (Verbosity()) - Report("Cannot initialize FDR logging; wrong size for options: %d\n", - OptionsSize); - return static_cast<XRayLogInitStatus>( - atomic_load(&LoggingStatus, memory_order_acquire)); - } else { - if (Verbosity()) - Report("XRay FDR: struct-based init is deprecated, please use " - "string-based configuration instead.\n"); - SpinMutexLock Guard(&FDROptionsMutex); - internal_memcpy(&FDROptions, Options, OptionsSize); + if (Verbosity()) + Report("Initializing FDR mode with options: %s\n", + static_cast<const char *>(Options)); + + // TODO: Factor out the flags specific to the FDR mode implementation. For + // now, use the global/single definition of the flags, since the FDR mode + // flags are already defined there. + FlagParser FDRParser; + FDRFlags FDRFlags; + registerXRayFDRFlags(&FDRParser, &FDRFlags); + FDRFlags.setDefaults(); + + // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided + // options until we migrate everyone to use the XRAY_FDR_OPTIONS + // compiler-provided options. + FDRParser.ParseString(useCompilerDefinedFlags()); + FDRParser.ParseString(useCompilerDefinedFDRFlags()); + auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); + if (EnvOpts == nullptr) + EnvOpts = ""; + FDRParser.ParseString(EnvOpts); + + // FIXME: Remove this when we fully remove the deprecated flags. + if (internal_strlen(EnvOpts) == 0) { + FDRFlags.func_duration_threshold_us = + flags()->xray_fdr_log_func_duration_threshold_us; + FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; } + // The provided options should always override the compiler-provided and + // environment-variable defined options. + FDRParser.ParseString(static_cast<const char *>(Options)); + *fdrFlags() = FDRFlags; + BufferSize = FDRFlags.buffer_size; + BufferMax = FDRFlags.buffer_max; + bool Success = false; if (BQ != nullptr) { BQ->~BufferQueue(); - InternalFree(BQ); BQ = nullptr; } if (BQ == nullptr) { - BQ = reinterpret_cast<BufferQueue *>( - InternalAlloc(sizeof(BufferQueue), nullptr, 64)); + BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); new (BQ) BufferQueue(BufferSize, BufferMax, Success); } @@ -1152,7 +1123,6 @@ XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax, Report("BufferQueue init failed.\n"); if (BQ != nullptr) { BQ->~BufferQueue(); - InternalFree(BQ); BQ = nullptr; } return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; @@ -1170,6 +1140,8 @@ XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax, auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); if (TLD.BQ == nullptr) return; + if (TLD.Buffer.Data == nullptr) + return; auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); if (EC != BufferQueue::ErrorCode::Ok) Report("At thread exit, failed to release buffer at %p; error=%s\n", @@ -1209,11 +1181,22 @@ bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { }; auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && - Verbosity()) + Verbosity()) { Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", RegistrationResult); - if (flags()->xray_fdr_log || !internal_strcmp(flags()->xray_mode, "xray-fdr")) - __xray_set_log_impl(Impl); + return false; + } + + if (flags()->xray_fdr_log || + !internal_strcmp(flags()->xray_mode, "xray-fdr")) { + auto SelectResult = __xray_log_select_mode("xray-fdr"); + if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && + Verbosity()) { + Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", + SelectResult); + return false; + } + } return true; } diff --git a/lib/xray/xray_function_call_trie.h b/lib/xray/xray_function_call_trie.h index 2acf14aa5..f4c2fc335 100644 --- a/lib/xray/xray_function_call_trie.h +++ b/lib/xray/xray_function_call_trie.h @@ -15,7 +15,7 @@ #ifndef XRAY_FUNCTION_CALL_TRIE_H #define XRAY_FUNCTION_CALL_TRIE_H -#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "xray_defs.h" #include "xray_profiling_flags.h" #include "xray_segmented_array.h" #include <memory> // For placement new. @@ -120,9 +120,11 @@ public: // We add a constructor here to allow us to inplace-construct through // Array<...>'s AppendEmplace. Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT, - int32_t F) - : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT), - FId(F) {} + int32_t F) XRAY_NEVER_INSTRUMENT : Parent(P), + Callees(A), + CallCount(CC), + CumulativeLocalTime(CLT), + FId(F) {} // TODO: Include the compact histogram. }; @@ -134,7 +136,8 @@ private: // We add a constructor here to allow us to inplace-construct through // Array<...>'s AppendEmplace. - ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {} + ShadowStackEntry(uint64_t T, Node *N) XRAY_NEVER_INSTRUMENT : EntryTSC{T}, + NodePtr{N} {} }; using NodeArray = Array<Node>; @@ -158,8 +161,9 @@ public: Allocators(const Allocators &) = delete; Allocators &operator=(const Allocators &) = delete; - Allocators(Allocators &&O) - : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator), + Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT + : NodeAllocator(O.NodeAllocator), + RootAllocator(O.RootAllocator), ShadowStackAllocator(O.ShadowStackAllocator), NodeIdPairAllocator(O.NodeIdPairAllocator) { O.NodeAllocator = nullptr; @@ -168,7 +172,7 @@ public: O.NodeIdPairAllocator = nullptr; } - Allocators &operator=(Allocators &&O) { + Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { { auto Tmp = O.NodeAllocator; O.NodeAllocator = this->NodeAllocator; @@ -192,58 +196,54 @@ public: return *this; } - ~Allocators() { + ~Allocators() XRAY_NEVER_INSTRUMENT { // Note that we cannot use delete on these pointers, as they need to be // returned to the sanitizer_common library's internal memory tracking // system. if (NodeAllocator != nullptr) { NodeAllocator->~NodeAllocatorType(); - InternalFree(NodeAllocator); + deallocate(NodeAllocator); NodeAllocator = nullptr; } if (RootAllocator != nullptr) { RootAllocator->~RootAllocatorType(); - InternalFree(RootAllocator); + deallocate(RootAllocator); RootAllocator = nullptr; } if (ShadowStackAllocator != nullptr) { ShadowStackAllocator->~ShadowStackAllocatorType(); - InternalFree(ShadowStackAllocator); + deallocate(ShadowStackAllocator); ShadowStackAllocator = nullptr; } if (NodeIdPairAllocator != nullptr) { NodeIdPairAllocator->~NodeIdPairAllocatorType(); - InternalFree(NodeIdPairAllocator); + deallocate(NodeIdPairAllocator); NodeIdPairAllocator = nullptr; } } }; // TODO: Support configuration of options through the arguments. - static Allocators InitAllocators() { + static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); } - static Allocators InitAllocatorsCustom(uptr Max) { + static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { Allocators A; - auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>( - InternalAlloc(sizeof(Allocators::NodeAllocatorType))); + auto NodeAllocator = allocate<Allocators::NodeAllocatorType>(); new (NodeAllocator) Allocators::NodeAllocatorType(Max); A.NodeAllocator = NodeAllocator; - auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>( - InternalAlloc(sizeof(Allocators::RootAllocatorType))); + auto RootAllocator = allocate<Allocators::RootAllocatorType>(); new (RootAllocator) Allocators::RootAllocatorType(Max); A.RootAllocator = RootAllocator; auto ShadowStackAllocator = - reinterpret_cast<Allocators::ShadowStackAllocatorType *>( - InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType))); + allocate<Allocators::ShadowStackAllocatorType>(); new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max); A.ShadowStackAllocator = ShadowStackAllocator; - auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( - InternalAlloc(sizeof(NodeIdPairAllocatorType))); + auto NodeIdPairAllocator = allocate<NodeIdPairAllocatorType>(); new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max); A.NodeIdPairAllocator = NodeIdPairAllocator; return A; @@ -256,12 +256,13 @@ private: NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; public: - explicit FunctionCallTrie(const Allocators &A) - : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator), + explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT + : Nodes(*A.NodeAllocator), + Roots(*A.RootAllocator), ShadowStack(*A.ShadowStackAllocator), NodeIdPairAllocator(A.NodeIdPairAllocator) {} - void enterFunction(const int32_t FId, uint64_t TSC) { + void enterFunction(const int32_t FId, uint64_t TSC) XRAY_NEVER_INSTRUMENT { DCHECK_NE(FId, 0); // This function primarily deals with ensuring that the ShadowStack is // consistent and ready for when an exit event is encountered. @@ -301,7 +302,7 @@ public: return; } - void exitFunction(int32_t FId, uint64_t TSC) { + void exitFunction(int32_t FId, uint64_t TSC) XRAY_NEVER_INSTRUMENT { // When we exit a function, we look up the ShadowStack to see whether we've // entered this function before. We do as little processing here as we can, // since most of the hard work would have already been done at function @@ -323,7 +324,7 @@ public: } } - const RootArray &getRoots() const { return Roots; } + const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } // The deepCopyInto operation will update the provided FunctionCallTrie by // re-creating the contents of this particular FunctionCallTrie in the other @@ -338,7 +339,7 @@ public: // synchronisation of both "this" and |O|. // // This function must *not* be called with a non-empty FunctionCallTrie |O|. - void deepCopyInto(FunctionCallTrie &O) const { + void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { DCHECK(O.getRoots().empty()); // We then push the root into a stack, to use as the parent marker for new @@ -394,7 +395,7 @@ public: // // This function is *not* thread-safe, and may require external // synchronisation of both "this" and |O|. - void mergeInto(FunctionCallTrie &O) const { + void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { struct NodeAndTarget { FunctionCallTrie::Node *OrigNode; FunctionCallTrie::Node *TargetNode; diff --git a/lib/xray/xray_init.cc b/lib/xray/xray_init.cc index b4e069795..8886a600d 100644 --- a/lib/xray/xray_init.cc +++ b/lib/xray/xray_init.cc @@ -27,6 +27,15 @@ extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak)); extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak)); extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak)); extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak)); + +#if SANITIZER_MAC +// HACK: This is a temporary workaround to make XRay build on +// Darwin, but it will probably not work at runtime. +const XRaySledEntry __start_xray_instr_map[] = {}; +extern const XRaySledEntry __stop_xray_instr_map[] = {}; +extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {}; +extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {}; +#endif } using namespace __xray; diff --git a/lib/xray/xray_profile_collector.cc b/lib/xray/xray_profile_collector.cc index 857c7f8d4..a2a8f1ffe 100644 --- a/lib/xray/xray_profile_collector.cc +++ b/lib/xray/xray_profile_collector.cc @@ -15,6 +15,7 @@ #include "xray_profile_collector.h" #include "sanitizer_common/sanitizer_common.h" #include "xray_allocator.h" +#include "xray_defs.h" #include "xray_profiling_flags.h" #include "xray_segmented_array.h" #include <memory> @@ -81,26 +82,9 @@ static ProfileBufferArray *ProfileBuffers = nullptr; static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr; static FunctionCallTrie::Allocators *GlobalAllocators = nullptr; -static void *allocateBuffer(size_t S) { - auto B = reinterpret_cast<void *>(internal_mmap( - NULL, S, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); - if (B == MAP_FAILED) { - if (Verbosity()) - Report("XRay Profiling: Failed to allocate memory of size %d.\n", S); - return nullptr; - } - return B; -} - -static void deallocateBuffer(void *B, size_t S) { - if (B == nullptr) - return; - internal_munmap(B, S); -} - } // namespace -void post(const FunctionCallTrie &T, tid_t TId) { +void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT { static pthread_once_t Once = PTHREAD_ONCE_INIT; pthread_once(&Once, +[] { reset(); }); @@ -134,8 +118,10 @@ struct ProfileRecord { const FunctionCallTrie::Node *Node = nullptr; // Constructor for in-place construction. - ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N) - : Path(A), Node(N) {} + ProfileRecord(PathAllocator &A, + const FunctionCallTrie::Node *N) XRAY_NEVER_INSTRUMENT + : Path(A), + Node(N) {} }; namespace { @@ -144,9 +130,9 @@ using ProfileRecordArray = Array<ProfileRecord>; // Walk a depth-first traversal of each root of the FunctionCallTrie to generate // the path(s) and the data associated with the path. -static void populateRecords(ProfileRecordArray &PRs, - ProfileRecord::PathAllocator &PA, - const FunctionCallTrie &Trie) { +static void +populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, + const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT { using StackArray = Array<const FunctionCallTrie::Node *>; using StackAllocator = typename StackArray::AllocatorType; StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); @@ -174,40 +160,41 @@ static void populateRecords(ProfileRecordArray &PRs, } static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header, - const ProfileRecordArray &ProfileRecords) { - auto NextPtr = static_cast<char *>( + const ProfileRecordArray &ProfileRecords) + XRAY_NEVER_INSTRUMENT { + auto NextPtr = static_cast<uint8_t *>( internal_memcpy(Buffer->Data, &Header, sizeof(Header))) + sizeof(Header); for (const auto &Record : ProfileRecords) { // List of IDs follow: for (const auto FId : Record.Path) NextPtr = - static_cast<char *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) + + static_cast<uint8_t *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) + sizeof(FId); // Add the sentinel here. constexpr int32_t SentinelFId = 0; - NextPtr = static_cast<char *>( + NextPtr = static_cast<uint8_t *>( internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) + sizeof(SentinelFId); // Add the node data here. NextPtr = - static_cast<char *>(internal_memcpy(NextPtr, &Record.Node->CallCount, - sizeof(Record.Node->CallCount))) + + static_cast<uint8_t *>(internal_memcpy( + NextPtr, &Record.Node->CallCount, sizeof(Record.Node->CallCount))) + sizeof(Record.Node->CallCount); - NextPtr = static_cast<char *>( + NextPtr = static_cast<uint8_t *>( internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime, sizeof(Record.Node->CumulativeLocalTime))) + sizeof(Record.Node->CumulativeLocalTime); } - DCHECK_EQ(NextPtr - static_cast<char *>(Buffer->Data), Buffer->Size); + DCHECK_EQ(NextPtr - static_cast<uint8_t *>(Buffer->Data), Buffer->Size); } } // namespace -void serialize() { +void serialize() XRAY_NEVER_INSTRUMENT { SpinMutexLock Lock(&GlobalMutex); if (GlobalAllocators == nullptr || ThreadTries == nullptr || @@ -216,7 +203,7 @@ void serialize() { // Clear out the global ProfileBuffers, if it's not empty. for (auto &B : *ProfileBuffers) - deallocateBuffer(B.Data, B.Size); + deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size); ProfileBuffers->trim(ProfileBuffers->size()); if (ThreadTries->empty()) @@ -266,13 +253,13 @@ void serialize() { } } -void reset() { +void reset() XRAY_NEVER_INSTRUMENT { SpinMutexLock Lock(&GlobalMutex); if (ProfileBuffers != nullptr) { // Clear out the profile buffers that have been serialized. for (auto &B : *ProfileBuffers) - deallocateBuffer(B.Data, B.Size); + deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size); ProfileBuffers->trim(ProfileBuffers->size()); } @@ -316,7 +303,7 @@ void reset() { new (ProfileBuffers) ProfileBufferArray(*ProfileBuffersAllocator); } -XRayBuffer nextBuffer(XRayBuffer B) { +XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { SpinMutexLock Lock(&GlobalMutex); if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0) diff --git a/lib/xray/xray_profiling.cc b/lib/xray/xray_profiling.cc index d4b4345d7..6615de1a3 100644 --- a/lib/xray/xray_profiling.cc +++ b/lib/xray/xray_profiling.cc @@ -19,7 +19,6 @@ #include "sanitizer_common/sanitizer_flags.h" #include "xray/xray_interface.h" #include "xray/xray_log_interface.h" - #include "xray_flags.h" #include "xray_profile_collector.h" #include "xray_profiling_flags.h" @@ -40,16 +39,30 @@ atomic_sint32_t ProfilerLogStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; SpinMutex ProfilerOptionsMutex; struct alignas(64) ProfilingData { - FunctionCallTrie::Allocators *Allocators = nullptr; - FunctionCallTrie *FCT = nullptr; + FunctionCallTrie::Allocators *Allocators; + FunctionCallTrie *FCT; }; static pthread_key_t ProfilingKey; +thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators)>::type + AllocatorsStorage; +thread_local std::aligned_storage<sizeof(FunctionCallTrie)>::type + FunctionCallTrieStorage; thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage{}; + static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { thread_local auto ThreadOnce = [] { new (&ThreadStorage) ProfilingData{}; + auto *Allocators = + reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage); + new (Allocators) FunctionCallTrie::Allocators(); + *Allocators = FunctionCallTrie::InitAllocators(); + auto *FCT = reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage); + new (FCT) FunctionCallTrie(*Allocators); + auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage); + TLD.Allocators = Allocators; + TLD.FCT = FCT; pthread_setspecific(ProfilingKey, &ThreadStorage); return false; }(); @@ -57,25 +70,18 @@ static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage); - // We need to check whether the global flag to finalizing/finalized has been - // switched. If it is, then we ought to not actually initialise the data. - auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); - if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || - Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) - return TLD; - - // If we're live, then we re-initialize TLD if the pointers are not null. - if (UNLIKELY(TLD.Allocators == nullptr && TLD.FCT == nullptr)) { - TLD.Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>( - InternalAlloc(sizeof(FunctionCallTrie::Allocators))); - new (TLD.Allocators) FunctionCallTrie::Allocators(); - *TLD.Allocators = FunctionCallTrie::InitAllocators(); - TLD.FCT = reinterpret_cast<FunctionCallTrie *>( - InternalAlloc(sizeof(FunctionCallTrie))); - new (TLD.FCT) FunctionCallTrie(*TLD.Allocators); + if (UNLIKELY(TLD.Allocators == nullptr || TLD.FCT == nullptr)) { + auto *Allocators = + reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage); + new (Allocators) FunctionCallTrie::Allocators(); + *Allocators = FunctionCallTrie::InitAllocators(); + auto *FCT = reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage); + new (FCT) FunctionCallTrie(*Allocators); + TLD.Allocators = Allocators; + TLD.FCT = FCT; } - return TLD; + return *reinterpret_cast<ProfilingData *>(&ThreadStorage); } static void cleanupTLD() XRAY_NEVER_INSTRUMENT { @@ -83,8 +89,6 @@ static void cleanupTLD() XRAY_NEVER_INSTRUMENT { if (TLD.Allocators != nullptr && TLD.FCT != nullptr) { TLD.FCT->~FunctionCallTrie(); TLD.Allocators->~Allocators(); - InternalFree(TLD.FCT); - InternalFree(TLD.Allocators); TLD.FCT = nullptr; TLD.Allocators = nullptr; } @@ -162,11 +166,13 @@ namespace { thread_local atomic_uint8_t ReentranceGuard{0}; -static void postCurrentThreadFCT(ProfilingData &TLD) { +static void postCurrentThreadFCT(ProfilingData &TLD) XRAY_NEVER_INSTRUMENT { if (TLD.Allocators == nullptr || TLD.FCT == nullptr) return; - profileCollectorService::post(*TLD.FCT, GetTid()); + if (!TLD.FCT->getRoots().empty()) + profileCollectorService::post(*TLD.FCT, GetTid()); + cleanupTLD(); } @@ -181,13 +187,14 @@ void profilingHandleArg0(int32_t FuncId, return; auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); - auto &TLD = getThreadLocalData(); if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { + auto &TLD = getThreadLocalData(); postCurrentThreadFCT(TLD); return; } + auto &TLD = getThreadLocalData(); switch (Entry) { case XRayEntryType::ENTRY: case XRayEntryType::LOG_ARGS_ENTRY: @@ -235,15 +242,8 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { } XRayLogInitStatus -profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, - size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - if (BufferSize != 0 || BufferMax != 0) { - if (Verbosity()) - Report("__xray_log_init() being used, and is unsupported. Use " - "__xray_log_init_mode(...) instead. Bailing out."); - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } - +profilingLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax, + void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING, diff --git a/lib/xray/xray_segmented_array.h b/lib/xray/xray_segmented_array.h index dbbfc8e78..c723c7de0 100644 --- a/lib/xray/xray_segmented_array.h +++ b/lib/xray/xray_segmented_array.h @@ -88,7 +88,7 @@ private: // segments when elements are trimmed off the end. SegmentBase *Freelist = &SentinelSegment; - Segment *NewSegment() { + Segment *NewSegment() XRAY_NEVER_INSTRUMENT { // We need to handle the case in which enough elements have been trimmed to // allow us to re-use segments we've allocated before. For this we look into // the Freelist, to see whether we need to actually allocate new blocks or @@ -111,7 +111,7 @@ private: return S; } - Segment *InitHeadAndTail() { + Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT { DCHECK_EQ(Head, &SentinelSegment); DCHECK_EQ(Tail, &SentinelSegment); auto Segment = NewSegment(); @@ -123,7 +123,7 @@ private: return Segment; } - Segment *AppendNewSegment() { + Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT { auto S = NewSegment(); if (S == nullptr) return nullptr; @@ -144,16 +144,18 @@ private: size_t Size = 0; public: - Iterator(SegmentBase *IS, size_t Off, size_t S) - : S(IS), Offset(Off), Size(S) {} - Iterator(const Iterator &) noexcept = default; - Iterator() noexcept = default; - Iterator(Iterator &&) noexcept = default; - Iterator &operator=(const Iterator &) = default; - Iterator &operator=(Iterator &&) = default; - ~Iterator() = default; - - Iterator &operator++() { + Iterator(SegmentBase *IS, size_t Off, size_t S) XRAY_NEVER_INSTRUMENT + : S(IS), + Offset(Off), + Size(S) {} + Iterator(const Iterator &) NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator() NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator(Iterator &&) NOEXCEPT XRAY_NEVER_INSTRUMENT = default; + Iterator &operator=(const Iterator &) XRAY_NEVER_INSTRUMENT = default; + Iterator &operator=(Iterator &&) XRAY_NEVER_INSTRUMENT = default; + ~Iterator() XRAY_NEVER_INSTRUMENT = default; + + Iterator &operator++() XRAY_NEVER_INSTRUMENT { if (++Offset % ElementsPerSegment || Offset == Size) return *this; @@ -168,7 +170,7 @@ private: return *this; } - Iterator &operator--() { + Iterator &operator--() XRAY_NEVER_INSTRUMENT { DCHECK_NE(S, &SentinelSegment); DCHECK_GT(Offset, 0); @@ -181,29 +183,31 @@ private: return *this; } - Iterator operator++(int) { + Iterator operator++(int) XRAY_NEVER_INSTRUMENT { Iterator Copy(*this); ++(*this); return Copy; } - Iterator operator--(int) { + Iterator operator--(int) XRAY_NEVER_INSTRUMENT { Iterator Copy(*this); --(*this); return Copy; } template <class V, class W> - friend bool operator==(const Iterator<V> &L, const Iterator<W> &R) { + friend bool operator==(const Iterator<V> &L, + const Iterator<W> &R) XRAY_NEVER_INSTRUMENT { return L.S == R.S && L.Offset == R.Offset; } template <class V, class W> - friend bool operator!=(const Iterator<V> &L, const Iterator<W> &R) { + friend bool operator!=(const Iterator<V> &L, + const Iterator<W> &R) XRAY_NEVER_INSTRUMENT { return !(L == R); } - U &operator*() const { + U &operator*() const XRAY_NEVER_INSTRUMENT { DCHECK_NE(S, &SentinelSegment); auto RelOff = Offset % ElementsPerSegment; @@ -214,11 +218,11 @@ private: return *reinterpret_cast<U *>(AlignedOffset); } - U *operator->() const { return &(**this); } + U *operator->() const XRAY_NEVER_INSTRUMENT { return &(**this); } }; public: - explicit Array(AllocatorType &A) : Alloc(&A) {} + explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT : Alloc(&A) {} Array(const Array &) = delete; Array(Array &&O) NOEXCEPT : Alloc(O.Alloc), @@ -230,16 +234,16 @@ public: O.Size = 0; } - bool empty() const { return Size == 0; } + bool empty() const XRAY_NEVER_INSTRUMENT { return Size == 0; } - AllocatorType &allocator() const { + AllocatorType &allocator() const XRAY_NEVER_INSTRUMENT { DCHECK_NE(Alloc, nullptr); return *Alloc; } - size_t size() const { return Size; } + size_t size() const XRAY_NEVER_INSTRUMENT { return Size; } - T *Append(const T &E) { + T *Append(const T &E) XRAY_NEVER_INSTRUMENT { if (UNLIKELY(Head == &SentinelSegment)) if (InitHeadAndTail() == nullptr) return nullptr; @@ -257,7 +261,8 @@ public: return Position; } - template <class... Args> T *AppendEmplace(Args &&... args) { + template <class... Args> + T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT { if (UNLIKELY(Head == &SentinelSegment)) if (InitHeadAndTail() == nullptr) return nullptr; @@ -281,7 +286,7 @@ public: return reinterpret_cast<T *>(Position); } - T &operator[](size_t Offset) const { + T &operator[](size_t Offset) const XRAY_NEVER_INSTRUMENT { DCHECK_LE(Offset, Size); // We need to traverse the array enough times to find the element at Offset. auto S = Head; @@ -296,13 +301,13 @@ public: return *reinterpret_cast<T *>(Position); } - T &front() const { + T &front() const XRAY_NEVER_INSTRUMENT { DCHECK_NE(Head, &SentinelSegment); DCHECK_NE(Size, 0u); return *begin(); } - T &back() const { + T &back() const XRAY_NEVER_INSTRUMENT { DCHECK_NE(Tail, &SentinelSegment); DCHECK_NE(Size, 0u); auto It = end(); @@ -310,7 +315,8 @@ public: return *It; } - template <class Predicate> T *find_element(Predicate P) const { + template <class Predicate> + T *find_element(Predicate P) const XRAY_NEVER_INSTRUMENT { if (empty()) return nullptr; @@ -324,7 +330,7 @@ public: /// Remove N Elements from the end. This leaves the blocks behind, and not /// require allocation of new blocks for new elements added after trimming. - void trim(size_t Elements) { + void trim(size_t Elements) XRAY_NEVER_INSTRUMENT { if (Elements == 0) return; @@ -360,10 +366,18 @@ public: } // Provide iterators. - Iterator<T> begin() const { return Iterator<T>(Head, 0, Size); } - Iterator<T> end() const { return Iterator<T>(Tail, Size, Size); } - Iterator<const T> cbegin() const { return Iterator<const T>(Head, 0, Size); } - Iterator<const T> cend() const { return Iterator<const T>(Tail, Size, Size); } + Iterator<T> begin() const XRAY_NEVER_INSTRUMENT { + return Iterator<T>(Head, 0, Size); + } + Iterator<T> end() const XRAY_NEVER_INSTRUMENT { + return Iterator<T>(Tail, Size, Size); + } + Iterator<const T> cbegin() const XRAY_NEVER_INSTRUMENT { + return Iterator<const T>(Head, 0, Size); + } + Iterator<const T> cend() const XRAY_NEVER_INSTRUMENT { + return Iterator<const T>(Tail, Size, Size); + } }; // We need to have this storage definition out-of-line so that the compiler can diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S index 99ad3966e..9dffae048 100644 --- a/lib/xray/xray_trampoline_x86_64.S +++ b/lib/xray/xray_trampoline_x86_64.S @@ -19,6 +19,7 @@ .macro SAVE_REGISTERS + pushfq subq $240, %rsp CFI_DEF_CFA_OFFSET(248) movq %rbp, 232(%rsp) @@ -69,6 +70,7 @@ movq 8(%rsp), %r14 movq 0(%rsp), %r15 addq $240, %rsp + popfq CFI_DEF_CFA_OFFSET(8) .endm diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc index 51dc4ce43..508f749c2 100644 --- a/lib/xray/xray_x86_64.cc +++ b/lib/xray/xray_x86_64.cc @@ -3,7 +3,7 @@ #include "xray_defs.h" #include "xray_interface_internal.h" -#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC #include <sys/types.h> #if SANITIZER_OPENBSD #include <sys/time.h> @@ -81,13 +81,16 @@ uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { } return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); } -#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD +#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { long long TSCFrequency = -1; size_t tscfreqsz = sizeof(TSCFrequency); #if SANITIZER_OPENBSD int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ }; if (sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) { +#elif SANITIZER_MAC + if (sysctlbyname("machdep.tsc.frequency", &TSCFrequency, &tscfreqsz, + NULL, 0) != -1 ) { #else if (sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, |