summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/base/low_level_alloc.cc89
-rw-r--r--src/base/low_level_alloc.h22
-rw-r--r--src/google/malloc_hook.h6
-rw-r--r--src/malloc_hook.cc28
-rwxr-xr-xsrc/pprof7
-rw-r--r--src/stacktrace_x86-inl.h5
-rw-r--r--src/tcmalloc.cc4
-rwxr-xr-xsrc/tests/sampling_test.sh16
-rw-r--r--src/thread_cache.cc6
-rw-r--r--src/windows/patch_functions.cc264
10 files changed, 333 insertions, 114 deletions
diff --git a/src/base/low_level_alloc.cc b/src/base/low_level_alloc.cc
index 900ae4e..2bbce54 100644
--- a/src/base/low_level_alloc.cc
+++ b/src/base/low_level_alloc.cc
@@ -67,7 +67,7 @@ namespace {
// first. Valid in both allocated and unallocated blocks
intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this
LowLevelAlloc::Arena *arena; // pointer to parent arena
- void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*)
+ void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*)
} header;
// Next two fields: in unallocated blocks: freelist skiplist data
@@ -197,15 +197,57 @@ struct LowLevelAlloc::Arena {
// pointer.
static struct LowLevelAlloc::Arena default_arena;
-// A non-malloc-hooked arena: used only to allocate metadata for arenas that
+// Non-malloc-hooked arenas: used only to allocate metadata for arenas that
// do not want malloc hook reporting, so that for them there's no malloc hook
// reporting even during arena creation.
static struct LowLevelAlloc::Arena unhooked_arena;
+static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena;
// magic numbers to identify allocated and unallocated blocks
static const intptr_t kMagicAllocated = 0x4c833e95;
static const intptr_t kMagicUnallocated = ~kMagicAllocated;
+namespace {
+ class ArenaLock {
+ public:
+ explicit ArenaLock(LowLevelAlloc::Arena *arena) :
+ left_(false), mask_valid_(false), arena_(arena) {
+ if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+ // We've decided not to support async-signal-safe arena use until
+ // there a demonstrated need. Here's how one could do it though
+ // (would need to be made more portable).
+#if 0
+ sigset_t all;
+ sigfillset(&all);
+ this->mask_valid_ =
+ (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0);
+#else
+ RAW_CHECK(false, "We do not yet support async-signal-safe arena.");
+#endif
+ }
+ this->arena_->mu.Lock();
+ }
+ ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); }
+ void Leave() {
+ this->arena_->mu.Unlock();
+#if 0
+ if (this->mask_valid_) {
+ pthread_sigmask(SIG_SETMASK, &this->mask_, 0);
+ }
+#endif
+ this->left_ = true;
+ }
+ private:
+ bool left_; // whether left region
+ bool mask_valid_;
+#if 0
+ sigset_t mask_; // old mask of blocked signals
+#endif
+ LowLevelAlloc::Arena *arena_;
+ DISALLOW_COPY_AND_ASSIGN(ArenaLock);
+ };
+} // anonymous namespace
+
// create an appropriate magic number for an object at "ptr"
// "magic" should be kMagicAllocated or kMagicUnallocated
inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) {
@@ -235,6 +277,8 @@ static void ArenaInit(LowLevelAlloc::Arena *arena) {
// Default arena should be hooked, e.g. for heap-checker to trace
// pointer chains through objects in the default arena.
arena->flags = LowLevelAlloc::kCallMallocHook;
+ } else if (arena == &unhooked_async_sig_safe_arena) {
+ arena->flags = LowLevelAlloc::kAsyncSignalSafe;
} else {
arena->flags = 0; // other arenas' flags may be overridden by client,
// but unhooked_arena will have 0 in 'flags'.
@@ -246,9 +290,12 @@ static void ArenaInit(LowLevelAlloc::Arena *arena) {
LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags,
Arena *meta_data_arena) {
RAW_CHECK(meta_data_arena != 0, "must pass a valid arena");
- if (meta_data_arena == &default_arena &&
- (flags & LowLevelAlloc::kCallMallocHook) == 0) {
- meta_data_arena = &unhooked_arena;
+ if (meta_data_arena == &default_arena) {
+ if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+ meta_data_arena = &unhooked_async_sig_safe_arena;
+ } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) {
+ meta_data_arena = &unhooked_arena;
+ }
}
// Arena(0) uses the constructor for non-static contexts
Arena *result =
@@ -262,9 +309,9 @@ LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags,
bool LowLevelAlloc::DeleteArena(Arena *arena) {
RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena,
"may not delete default arena");
- arena->mu.Lock();
+ ArenaLock section(arena);
bool empty = (arena->allocation_count == 0);
- arena->mu.Unlock();
+ section.Leave();
if (empty) {
while (arena->freelist.next[0] != 0) {
AllocList *region = arena->freelist.next[0];
@@ -279,7 +326,13 @@ bool LowLevelAlloc::DeleteArena(Arena *arena) {
"empty arena has non-page-aligned block size");
RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0,
"empty arena has non-page-aligned block");
- RAW_CHECK(munmap(region, size) == 0,
+ int munmap_result;
+ if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) {
+ munmap_result = munmap(region, size);
+ } else {
+ munmap_result = MallocHook::UnhookedMUnmap(region, size);
+ }
+ RAW_CHECK(munmap_result == 0,
"LowLevelAlloc::DeleteArena: munmap failed address");
}
Free(arena);
@@ -363,21 +416,21 @@ void LowLevelAlloc::Free(void *v) {
if ((arena->flags & kCallMallocHook) != 0) {
MallocHook::InvokeDeleteHook(v);
}
- arena->mu.Lock();
+ ArenaLock section(arena);
AddToFreelist(v, arena);
RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free");
arena->allocation_count--;
- arena->mu.Unlock();
+ section.Leave();
}
}
// allocates and returns a block of size bytes, to be freed with Free()
// L < arena->mu
-void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
+static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
void *result = 0;
if (request != 0) {
AllocList *s; // will point to region that satisfies request
- arena->mu.Lock();
+ ArenaLock section(arena);
ArenaInit(arena);
// round up with header
size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup);
@@ -399,8 +452,14 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
// mmap generous 64K chunks to decrease
// the chances/impact of fragmentation:
size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16);
- void *new_pages = mmap(0, new_pages_size,
- PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ void *new_pages;
+ if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
+ new_pages = MallocHook::UnhookedMMap(0, new_pages_size,
+ PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ } else {
+ new_pages = mmap(0, new_pages_size,
+ PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ }
RAW_CHECK(new_pages != MAP_FAILED, "mmap error");
arena->mu.Lock();
s = reinterpret_cast<AllocList *>(new_pages);
@@ -425,7 +484,7 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) {
s->header.magic = Magic(kMagicAllocated, &s->header);
RAW_CHECK(s->header.arena == arena, "");
arena->allocation_count++;
- arena->mu.Unlock();
+ section.Leave();
result = &s->levels;
}
ANNOTATE_NEW_MEMORY(result, request);
diff --git a/src/base/low_level_alloc.h b/src/base/low_level_alloc.h
index 0df1298..393b3d2 100644
--- a/src/base/low_level_alloc.h
+++ b/src/base/low_level_alloc.h
@@ -72,14 +72,20 @@ class LowLevelAlloc {
// meta_data_arena; the DefaultArena() can be passed for meta_data_arena.
// These values may be ored into flags:
enum {
- // Calls to Alloc() and Free() will be reported
- // via the MallocHook interface.
- // The DefaultArena() has this flag on.
- // NewArena(flags, DefaultArena()) w/o this bit in 'flags',
- // on the other hand, will not cause any MallocHook
- // calls even during the NewArena call itself
- // (it will in fact use meta_data_arena different from DefaultArena()).
- kCallMallocHook = 0x0001
+ // Report calls to Alloc() and Free() via the MallocHook interface.
+ // Set in the DefaultArena.
+ kCallMallocHook = 0x0001,
+
+ // Make calls to Alloc(), Free() be async-signal-safe. Not set in
+ // DefaultArena().
+ kAsyncSignalSafe = 0x0002,
+
+ // When used with DefaultArena(), the NewArena() and DeleteArena() calls
+ // obey the flags given explicitly in the NewArena() call, even if those
+ // flags differ from the settings in DefaultArena(). So the call
+ // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe,
+ // as well as generatating an arena that provides async-signal-safe
+ // Alloc/Free.
};
static Arena *NewArena(int32 flags, Arena *meta_data_arena);
diff --git a/src/google/malloc_hook.h b/src/google/malloc_hook.h
index f2713e9..48d92da 100644
--- a/src/google/malloc_hook.h
+++ b/src/google/malloc_hook.h
@@ -191,6 +191,12 @@ class PERFTOOLS_DLL_DECL MallocHook {
int skip_count) {
return MallocHook_GetCallerStackTrace(result, max_depth, skip_count);
}
+
+ // Unhooked versions of mmap() and munmap(). These should be used
+ // only by experts, since they bypass heapchecking, etc.
+ static void* UnhookedMMap(void *start, size_t length, int prot, int flags,
+ int fd, off_t offset);
+ static int UnhookedMUnmap(void *start, size_t length);
};
#endif /* _MALLOC_HOOK_H_ */
diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc
index d1ad12a..2a7f542 100644
--- a/src/malloc_hook.cc
+++ b/src/malloc_hook.cc
@@ -423,7 +423,7 @@ static inline void* do_mmap64(void *start, size_t length,
return result;
}
-# endif
+# endif // defined(__x86_64__)
// We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook
// calls right into mmap and mmap64, so that the stack frames in the caller's
@@ -472,7 +472,7 @@ extern "C" void* mmap(void *start, size_t length, int prot, int flags,
return result;
}
-#endif
+#endif // !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH)
extern "C" int munmap(void* start, size_t length) __THROW {
MallocHook::InvokeMunmapHook(start, length);
@@ -501,4 +501,26 @@ extern "C" void* sbrk(ptrdiff_t increment) __THROW {
return result;
}
-#endif
+/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
+ int flags, int fd, off_t offset) {
+ return do_mmap64(start, length, prot, flags, fd, offset);
+}
+
+/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
+ return sys_munmap(start, length);
+}
+
+#else // defined(__linux) &&
+ // (defined(__i386__) || defined(__x86_64__) || defined(__PPC__))
+
+/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
+ int flags, int fd, off_t offset) {
+ return mmap(start, length, prot, flags, fd, offset);
+}
+
+/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
+ return munmap(start, length);
+}
+
+#endif // defined(__linux) &&
+ // (defined(__i386__) || defined(__x86_64__) || defined(__PPC__))
diff --git a/src/pprof b/src/pprof
index b7c7d07..62e19c7 100755
--- a/src/pprof
+++ b/src/pprof
@@ -1977,7 +1977,12 @@ sub RemoveUninterestingFrames {
'__builtin_vec_delete',
'__builtin_vec_new',
'operator new',
- 'operator new[]') {
+ 'operator new[]',
+ # These mark the beginning/end of our custom sections
+ '__start_google_malloc',
+ '__stop_google_malloc',
+ '__start_malloc_hook',
+ '__stop_malloc_hook') {
$skip{$name} = 1;
$skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything
}
diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h
index 5650c86..05701e7 100644
--- a/src/stacktrace_x86-inl.h
+++ b/src/stacktrace_x86-inl.h
@@ -49,6 +49,11 @@
#elif defined(HAVE_UCONTEXT_H)
#include <ucontext.h> // for ucontext_t
#elif defined(HAVE_CYGWIN_SIGNAL_H)
+// cygwin/signal.h has a buglet where it uses pthread_attr_t without
+// #including <pthread.h> itself. So we have to do it.
+# ifdef HAVE_PTHREAD
+# include <pthread.h>
+# endif
#include <cygwin/signal.h>
typedef ucontext ucontext_t;
#endif
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 70dd8c6..625301e 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -762,9 +762,9 @@ TCMallocGuard::TCMallocGuard() {
// patch the windows VirtualAlloc, etc.
PatchWindowsFunctions(); // defined in windows/patch_functions.cc
#endif
- free(malloc(1));
+ tc_free(tc_malloc(1));
ThreadCache::InitTSD();
- free(malloc(1));
+ tc_free(tc_malloc(1));
MallocExtension::Register(new TCMallocImplementation);
}
}
diff --git a/src/tests/sampling_test.sh b/src/tests/sampling_test.sh
index 149d27b..8c96bc1 100755
--- a/src/tests/sampling_test.sh
+++ b/src/tests/sampling_test.sh
@@ -62,10 +62,10 @@ if ! cat "$SAMPLING_TEST_BINARY" >/dev/null 2>&1; then
SAMPLING_TEST_BINARY="$SAMPLING_TEST_BINARY".exe
fi
-die() {
- echo "FAILED"
- echo "reason:"
- echo "$@"
+die() { # runs the command given as arguments, and then dies.
+ echo "FAILED. Output from $@"
+ echo "----"
+ "$@"
echo "----"
exit 1
}
@@ -79,16 +79,16 @@ mkdir "$OUTDIR" || die "Unable to create $OUTDIR"
# 50M to 99M.
"$SAMPLING_TEST" "$OUTDIR/out"
-echo -n "Testing heap output..."
+echo "Testing heap output..."
"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \
| grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
- || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"`
+ || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"
echo "OK"
-echo -n "Testing growth output..."
+echo "Testing growth output..."
"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \
| grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
- || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"`
+ || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"
echo "OK"
echo "PASS"
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index fd44a70..64f4deb 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -299,12 +299,6 @@ int ThreadCache::GetSamplePeriod() {
}
void ThreadCache::InitModule() {
- // There is a slight potential race here because of double-checked
- // locking idiom. However, as long as the program does a small
- // allocation before switching to multi-threaded mode, we will be
- // fine. We increase the chances of doing such a small allocation
- // by doing one in the constructor of the module_enter_exit_hook
- // object declared below.
SpinLockHolder h(Static::pageheap_lock());
if (!phinited) {
Static::InitStaticVars();
diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc
index 992161f..d13b7b8 100644
--- a/src/windows/patch_functions.cc
+++ b/src/windows/patch_functions.cc
@@ -72,6 +72,11 @@
#error This file is intended for patching allocators - use override_functions.cc instead.
#endif
+// We use psapi. Non-MSVC systems will have to link this in themselves.
+#ifdef _MSC_VER
+#pragma comment(lib, "Psapi.lib")
+#endif
+
// Make sure we always use the 'old' names of the psapi functions.
#ifndef PSAPI_VERSION
#define PSAPI_VERSION 1
@@ -80,6 +85,8 @@
#include <windows.h>
#include <malloc.h> // for _msize and _expand
#include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
+#include <set>
+#include <map>
#include <vector>
#include <base/logging.h>
#include "base/spinlock.h"
@@ -122,29 +129,7 @@ typedef void (*GenericFnPtr)();
using sidestep::PreamblePatcher;
-// This is a subset of MODDULEENTRY32, that we need for patching
-struct ModuleEntryCopy {
- LPVOID modBaseAddr;
- DWORD modBaseSize;
- HMODULE hModule;
- TCHAR szModule[kMaxModuleNameSize];
-
- ModuleEntryCopy() {
- modBaseAddr = NULL;
- modBaseSize = 0;
- hModule = NULL;
- strcpy(szModule, "<executable>");
- }
- ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) {
- this->modBaseAddr = mi.lpBaseOfDll;
- this->modBaseSize = mi.SizeOfImage;
- this->hModule = hmodule;
- // TODO(csilvers): we could make more efficient by calling this
- // lazily (not until this->szModule is needed, which is often never).
- GetModuleBaseNameA(hprocess, hmodule,
- this->szModule, sizeof(this->szModule));
- }
-};
+struct ModuleEntryCopy; // defined below
// These functions are how we override the memory allocation
// functions, just like tcmalloc.cc and malloc_hook.cc do.
@@ -160,33 +145,19 @@ class LibcInfo {
LibcInfo() {
memset(this, 0, sizeof(*this)); // easiest way to initialize the array
}
- bool SameAs(const LibcInfo& that) const {
- return (is_valid() &&
- module_base_address_ == that.module_base_address_ &&
- module_base_size_ == that.module_base_size_);
- }
- bool SameAsME32(const ModuleEntryCopy& me32) const {
- return (is_valid() &&
- module_base_address_ == me32.modBaseAddr &&
- module_base_size_ == me32.modBaseSize);
- }
+ bool SameAs(const LibcInfo& that) const;
+ bool SameAsModuleEntry(const ModuleEntryCopy& module_entry) const;
+
bool patched() const { return is_valid() && module_name_[0] != '\0'; }
const char* module_name() const { return is_valid() ? module_name_ : ""; }
void set_is_valid(bool b) { is_valid_ = b; }
- // These shouldn't have to be public, since only subclasses of
- // LibcInfo need it, but they do. Maybe something to do with
- // templates. Shrug.
- bool is_valid() const { return is_valid_; }
- GenericFnPtr windows_fn(int ifunction) const {
- return windows_fn_[ifunction];
- }
-
// Populates all the windows_fn_[] vars based on our module info.
// Returns false if windows_fn_ is all NULL's, because there's
- // nothing to patch. Also populates the me32 info.
- bool PopulateWindowsFn(const ModuleEntryCopy& me32);
+ // nothing to patch. Also populates the rest of the module_entry
+ // info, such as the module's name.
+ bool PopulateWindowsFn(const ModuleEntryCopy& module_entry);
protected:
void CopyFrom(const LibcInfo& that) {
@@ -237,6 +208,24 @@ class LibcInfo {
const void *module_base_address_;
size_t module_base_size_;
char module_name_[kMaxModuleNameSize];
+
+ public:
+ // These shouldn't have to be public, since only subclasses of
+ // LibcInfo need it, but they do. Maybe something to do with
+ // templates. Shrug. I hide them down here so users won't see
+ // them. :-) (OK, I also need to define ctrgProcAddress late.)
+ bool is_valid() const { return is_valid_; }
+ GenericFnPtr windows_fn(int ifunction) const {
+ return windows_fn_[ifunction];
+ }
+ // These three are needed by ModuleEntryCopy.
+ static const int ctrgProcAddress = kNumFunctions;
+ static GenericFnPtr static_fn(int ifunction) {
+ return static_fn_[ifunction];
+ }
+ static const char* const function_name(int ifunction) {
+ return function_name_[ifunction];
+ }
};
// Template trickiness: logically, a LibcInfo would include
@@ -294,6 +283,43 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo {
// But they seem pretty obscure, and I'm fine not overriding them for now.
};
+// This is a subset of MODDULEENTRY32, that we need for patching.
+struct ModuleEntryCopy {
+ LPVOID modBaseAddr;
+ DWORD modBaseSize;
+ HMODULE hModule;
+ TCHAR szModule[kMaxModuleNameSize];
+ // This is not part of MODDULEENTRY32, but is needed to avoid making
+ // windows syscalls while we're holding patch_all_modules_lock (see
+ // lock-inversion comments at patch_all_modules_lock definition, below).
+ GenericFnPtr rgProcAddresses[LibcInfo::ctrgProcAddress];
+
+ ModuleEntryCopy() {
+ modBaseAddr = NULL;
+ modBaseSize = 0;
+ hModule = NULL;
+ strcpy(szModule, "<executable>");
+ for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
+ rgProcAddresses[i] = LibcInfo::static_fn(i);
+ }
+ ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) {
+ this->modBaseAddr = mi.lpBaseOfDll;
+ this->modBaseSize = mi.SizeOfImage;
+ this->hModule = hmodule;
+ // TODO(csilvers): we could make more efficient by calling these
+ // lazily (not until the vars are needed, which is often never).
+ // However, there's tricky business with calling windows functions
+ // inside the patch_all_modules_lock (see the lock inversion
+ // comments with the patch_all_modules_lock definition, below), so
+ // it's safest to do it all here, where no lock is needed.
+ ::GetModuleBaseNameA(hprocess, hmodule,
+ this->szModule, sizeof(this->szModule));
+ for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
+ rgProcAddresses[i] =
+ (GenericFnPtr)::GetProcAddress(hModule, LibcInfo::function_name(i));
+ }
+};
+
// This class is easier because there's only one of them.
class WindowsInfo {
public:
@@ -347,6 +373,11 @@ class WindowsInfo {
// If you run out, just add a few more to the array. You'll also need
// to update the switch statement in PatchOneModule(), and the list in
// UnpatchWindowsFunctions().
+// main_executable and main_executable_windows are two windows into
+// the same executable. One is responsible for patching the libc
+// routines that live in the main executable (if any) to use tcmalloc;
+// the other is responsible for patching the windows routines like
+// HeapAlloc/etc to use tcmalloc.
static LibcInfoWithPatchFunctions<0> main_executable;
static LibcInfoWithPatchFunctions<1> libc1;
static LibcInfoWithPatchFunctions<2> libc2;
@@ -448,20 +479,28 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = {
{ "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary },
};
-bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& me32) {
+bool LibcInfo::SameAs(const LibcInfo& that) const {
+ return (is_valid() &&
+ module_base_address_ == that.module_base_address_ &&
+ module_base_size_ == that.module_base_size_);
+}
+
+bool LibcInfo::SameAsModuleEntry(const ModuleEntryCopy& module_entry) const {
+ return (is_valid() &&
+ module_base_address_ == module_entry.modBaseAddr &&
+ module_base_size_ == module_entry.modBaseSize);
+}
+
+bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) {
// First, store the location of the function to patch before
// patching it. If none of these functions are found in the module,
// then this module has no libc in it, and we just return false.
for (int i = 0; i < kNumFunctions; i++) {
if (!function_name_[i]) // we can turn off patching by unsetting name
continue;
- GenericFnPtr fn = NULL;
- if (me32.hModule == NULL) { // used for the main executable
- // This is used only for a statically-linked-in libc.
- fn = static_fn_[i];
- } else {
- fn = (GenericFnPtr)::GetProcAddress(me32.hModule, function_name_[i]);
- }
+ // The ::GetProcAddress calls were done in the ModuleEntryCopy
+ // constructor, so we don't have to make any windows calls here.
+ const GenericFnPtr fn = module_entry.rgProcAddresses[i];
if (fn) {
windows_fn_[i] = PreamblePatcher::ResolveTarget(fn);
}
@@ -514,15 +553,15 @@ bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& me32) {
CHECK(windows_fn_[kRealloc]);
// OK, we successfully patched. Let's store our member information.
- module_base_address_ = me32.modBaseAddr;
- module_base_size_ = me32.modBaseSize;
- strcpy(module_name_, me32.szModule);
+ module_base_address_ = module_entry.modBaseAddr;
+ module_base_size_ = module_entry.modBaseSize;
+ strcpy(module_name_, module_entry.szModule);
return true;
}
template<int T>
bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) {
- CopyFrom(me_info); // copies the me32 and the windows_fn_ array
+ CopyFrom(me_info); // copies the module_entry and the windows_fn_ array
for (int i = 0; i < kNumFunctions; i++) {
if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) {
// if origstub_fn_ is not NULL, it's left around from a previous
@@ -530,7 +569,10 @@ bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) {
// Since we've patched Unpatch() not to delete origstub_fn_ (it
// causes problems in some contexts, though obviously not this
// one), we should delete it now, before setting it to NULL.
- delete[] reinterpret_cast<char*>(origstub_fn_[i]);
+ // NOTE: casting from a function to a pointer is contra the C++
+ // spec. It's not safe on IA64, but is on i386. We use
+ // a C-style cast here to emphasize this is not legal C++.
+ delete[] (char*)(origstub_fn_[i]);
origstub_fn_[i] = NULL; // Patch() will fill this in
CHECK_EQ(sidestep::SIDESTEP_SUCCESS,
PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i],
@@ -569,7 +611,10 @@ void WindowsInfo::Patch() {
// Since we've patched Unpatch() not to delete origstub_fn_ (it
// causes problems in some contexts, though obviously not this
// one), we should delete it now, before setting it to NULL.
- delete[] reinterpret_cast<char*>(function_info_[i].origstub_fn);
+ // NOTE: casting from a function to a pointer is contra the C++
+ // spec. It's not safe on IA64, but is on i386. We use
+ // a C-style cast here to emphasize this is not legal C++.
+ delete[] (char*)(function_info_[i].origstub_fn);
function_info_[i].origstub_fn = NULL; // Patch() will fill this in
CHECK_EQ(sidestep::SIDESTEP_SUCCESS,
PreamblePatcher::Patch(function_info_[i].windows_fn,
@@ -594,7 +639,7 @@ void PatchOneModuleLocked(const LibcInfo& me_info) {
// Double-check we haven't seen this module before.
for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
if (g_module_libcs[i]->SameAs(me_info)) {
- fprintf(stderr, "%s:%d: FATAL ERROR: %s double-patched somehow.\n",
+ fprintf(stderr, "%s:%d: FATAL PERFTOOLS ERROR: %s double-patched somehow.\n",
__FILE__, __LINE__, g_module_libcs[i]->module_name());
CHECK(false);
}
@@ -617,34 +662,53 @@ void PatchOneModuleLocked(const LibcInfo& me_info) {
}
}
}
- printf("ERROR: Too many modules containing libc in this executable\n");
+ printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n");
}
void PatchMainExecutableLocked() {
if (main_executable.patched())
return; // main executable has already been patched
- ModuleEntryCopy fake_me32; // we make a fake one to pass into Patch()
- main_executable.PopulateWindowsFn(fake_me32);
+ ModuleEntryCopy fake_module_entry; // make a fake one to pass into Patch()
+ // No need to call PopulateModuleEntryProcAddresses on the main executable.
+ main_executable.PopulateWindowsFn(fake_module_entry);
main_executable.Patch(main_executable);
}
+// This lock is subject to a subtle and annoying lock inversion
+// problem: it may interact badly with unknown internal windows locks.
+// In particular, windows may be holding a lock when it calls
+// LoadLibraryExW and FreeLibrary, which we've patched. We have those
+// routines call PatchAllModules, which acquires this lock. If we
+// make windows system calls while holding this lock, those system
+// calls may need the internal windows locks that are being held in
+// the call to LoadLibraryExW, resulting in deadlock. The solution is
+// to be very careful not to call *any* windows routines while holding
+// patch_all_modules_lock, inside PatchAllModules().
static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED);
// Iterates over all the modules currently loaded by the executable,
// and makes sure they're all patched. For ones that aren't, we patch
// them in. We also check that every module we had patched in the
// past is still loaded, and update internal data structures if so.
-void PatchAllModules() {
+// We return true if this PatchAllModules did any work, false else.
+bool PatchAllModules() {
std::vector<ModuleEntryCopy> modules;
+ bool made_changes = false;
const HANDLE hCurrentProcess = GetCurrentProcess();
MODULEINFO mi;
DWORD cbNeeded = 0;
HMODULE hModules[kMaxModules]; // max # of modules we support in one process
- if (EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
- &cbNeeded)) {
+ if (::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
+ &cbNeeded)) {
for (int i = 0; i < cbNeeded / sizeof(*hModules); ++i) {
- if (GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
+ if (i >= kMaxModules) {
+ printf("PERFTOOLS ERROR: Too many modules in this executable to try"
+ " to patch them all (if you need to, raise kMaxModules in"
+ " patch_functions.cc).\n");
+ break;
+ }
+ if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
modules.push_back(ModuleEntryCopy(hCurrentProcess, hModules[i], mi));
}
}
@@ -658,7 +722,7 @@ void PatchAllModules() {
bool still_loaded = false;
for (std::vector<ModuleEntryCopy>::iterator it = modules.begin();
it != modules.end(); ++it) {
- if (g_module_libcs[i]->SameAsME32(*it)) {
+ if (g_module_libcs[i]->SameAsModuleEntry(*it)) {
// Both g_module_libcs[i] and it are still valid. Mark it by
// removing it from the vector; mark g_module_libcs[i] by
// setting a bool.
@@ -672,23 +736,30 @@ void PatchAllModules() {
// We could call Unpatch() here, but why bother? The module
// has gone away, so nobody is going to call into it anyway.
g_module_libcs[i]->set_is_valid(false);
+ made_changes = true;
}
}
// We've handled all the g_module_libcs. Now let's handle the rest
- // of the me32's: those that haven't already been loaded.
+ // of the module-entries: those that haven't already been loaded.
for (std::vector<ModuleEntryCopy>::const_iterator it = modules.begin();
it != modules.end(); ++it) {
LibcInfo libc_info;
- if (libc_info.PopulateWindowsFn(*it)) // true==module has libc routines
- PatchOneModuleLocked(libc_info); // updates num_patched_modules
+ if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines
+ PatchOneModuleLocked(libc_info); // updates num_patched_modules
+ made_changes = true;
+ }
}
// Now that we've dealt with the modules (dlls), update the main
// executable. We do this last because PatchMainExecutableLocked
// wants to look at how other modules were patched.
- PatchMainExecutableLocked();
+ if (!main_executable.patched()) {
+ PatchMainExecutableLocked();
+ made_changes = true;
+ }
}
+ return made_changes;
}
@@ -697,6 +768,8 @@ void PatchAllModules() {
// ---------------------------------------------------------------------
// PatchWindowsFunctions()
// This is the function that is exposed to the outside world.
+// It should be called before the program becomes multi-threaded,
+// since main_executable_windows.Patch() is not thread-safe.
// ---------------------------------------------------------------------
void PatchWindowsFunctions() {
@@ -956,19 +1029,68 @@ BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) {
lpBaseAddress);
}
+// * nopatching_set holds modules (specified by full file-path) that
+// don't contain any libc routines, nor depend on any DLLs that do:
+// that is, files where PatchAllModules ended up being a no-op. We
+// know we don't need to patch those files if we see them again in the
+// future (which can happen in windows apps, which can load+unload a
+// given module many times during a single program run).
+// * patching_set holds modules that *do* require us to do patching
+// work when we load them. (This map is keyed by handle, so we need
+// to be careful to clear each entry in FreeLibrary; the map data is
+// the necessary refcount.) This set is used as an optimization in
+// FreeLibrary to tell if we had done any patching in LoadLibraryExW.
+// If we didn't, FreeLibrary doesn't need to clear up the
+// patching-work, but if we did, it does.
+static std::map<HMODULE, int>* patching_map = NULL;
+static std::set<std::string>* nopatching_set = NULL;
+
HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName,
HANDLE hFile,
DWORD dwFlags) {
HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD))
function_info_[kLoadLibraryExW].origstub_fn)(
lpFileName, hFile, dwFlags);
- PatchAllModules(); // this will patch any newly loaded libraries
+ // This will patch any newly loaded libraries, if patching needs to
+ // be done. If so, PatchAllModules() will return true. If no
+ // patching needs to be done (this library didn't contain a libc),
+ // we'll mark it in a set, so we can skip patching next time.
+ char szFullPath[PATH_MAX];
+ if (::GetModuleFileNameA(rv, szFullPath, sizeof(szFullPath))) {
+ SpinLockHolder h(&patch_all_modules_lock);
+ if (nopatching_set && nopatching_set->count(szFullPath) > 0)
+ return rv;
+ }
+ const bool made_changes = PatchAllModules();
+ {
+ SpinLockHolder h(&patch_all_modules_lock);
+ if (made_changes) {
+ if (!patching_map) patching_map = new std::map<HMODULE, int>;
+ ++(*patching_map)[rv];
+ } else {
+ if (!nopatching_set) nopatching_set = new std::set<std::string>;
+ nopatching_set->insert(szFullPath);
+ }
+ }
return rv;
}
BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) {
BOOL rv = ((BOOL (WINAPI *)(HMODULE))
function_info_[kFreeLibrary].origstub_fn)(hLibModule);
+ {
+ // This is an optimization: if we know this module never did any work
+ // in LoadLibraryEx (because it didn't get put into patching_map), we
+ // know we don't need to revisit the patching here.
+ SpinLockHolder h(&patch_all_modules_lock);
+ if (patching_map) {
+ std::map<HMODULE, int>::iterator it = patching_map->find(hLibModule);
+ if (it == patching_map->end())
+ return rv;
+ if (--it->second <= 0) // decrement the refcount
+ patching_map->erase(it); // hLibModule is now a dangling handle
+ }
+ }
PatchAllModules(); // this will fix up the list of patched libraries
return rv;
}