diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2007-07-18 18:30:50 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2007-07-18 18:30:50 +0000 |
commit | c437e1fcdd1e6ff3f032928d460cbfc115e2324f (patch) | |
tree | 482360b82db4bc64880e187b9b5af82e840df47c /src/tcmalloc.cc | |
parent | 6878379d5bab87c787cdd3487b5620a9c8adf376 (diff) | |
download | gperftools-c437e1fcdd1e6ff3f032928d460cbfc115e2324f.tar.gz |
Tue Jul 17 22:26:27 2007 Google Inc. <opensource@google.com>
* google-perftools: version 0.92 release
* PERFORMANCE: use a packed cache to speed up tcmalloc
* PORTING: preliminary windows support! (see README.windows)
* PORTING: better support for solaris, OS X, FreeBSD (see INSTALL)
* Envvar support for running the heap-checker under gdb
* Add weak declarations to maybe_threads to fix no-pthreads compile bugs
* Some 64bit fixes, especially with pprof
* Better heap-checker support for some low-level allocations
* Fix bug where heap-profiles would sometimes get truncated
* New documentation about how to handle common heap leak situations
* Use computed includes for hash_map/set: easier config
* Added all used .m4 templates to the distribution
git-svn-id: http://gperftools.googlecode.com/svn/trunk@36 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src/tcmalloc.cc')
-rw-r--r-- | src/tcmalloc.cc | 258 |
1 files changed, 175 insertions, 83 deletions
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index a23449b..08ae2fa 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -45,12 +45,24 @@ // 4. The pagemap (which maps from page-number to descriptor), // can be read without holding any locks, and written while holding // the "pageheap_lock". +// 5. To improve performance, a subset of the information one can get +// from the pagemap is cached in a data structure, pagemap_cache_, +// that atomically reads and writes its entries. This cache can be +// read and written without locking. // // This multi-threaded access to the pagemap is safe for fairly // subtle reasons. We basically assume that when an object X is // allocated by thread A and deallocated by thread B, there must // have been appropriate synchronization in the handoff of object -// X from thread A to thread B. +// X from thread A to thread B. The same logic applies to pagemap_cache_. +// +// THE PAGEID-TO-SIZECLASS CACHE +// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache +// returns 0 for a particular PageID then that means "no information," not that +// the sizeclass is 0. The cache may have stale information for pages that do +// not hold the beginning of any free()'able object. Staleness is eliminated +// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and +// do_memalign() for all other relevant pages. // // TODO: Bias reclamation to larger addresses // TODO: implement mallinfo/mallopt @@ -72,26 +84,39 @@ #else #include <sys/types.h> #endif -#ifdef HAVE_STRUCT_MALLINFO +#if defined(HAVE_MALLOC_H) && defined(HAVE_STRUCT_MALLINFO) #include <malloc.h> // for struct mallinfo #endif #include <string.h> +#ifdef HAVE_PTHREAD #include <pthread.h> +#endif +#ifdef HAVE_UNISTD_H #include <unistd.h> +#endif #include <errno.h> #include <stdarg.h> +#include "packed-cache-inl.h" #include "base/commandlineflags.h" #include "base/basictypes.h" // gets us PRIu64 #include "base/sysinfo.h" #include "base/spinlock.h" #include <google/malloc_hook.h> #include <google/malloc_extension.h> -#include <google/stacktrace.h> #include "internal_logging.h" #include "pagemap.h" #include "system-alloc.h" #include "maybe_threads.h" +// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if +// you're porting to a system where you really can't get a stacktrace. +#ifdef NO_TCMALLOC_SAMPLES + // We use #define so code compiles even if you #include stacktrace.h somehow. +# define GetStackTrace(stack, depth, skip) (0) +#else +# include <google/stacktrace.h> +#endif + // Even if we have support for thread-local storage in the compiler // and linker, the OS may not support it. We need to check that at // runtime. Right now, we have to keep a manual set of "bad" OSes. @@ -101,9 +126,9 @@ return kernel_supports_tls; } # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS - static void CheckIfKernelSupportsTLS() { - kernel_supports_tls = false; - } + static void CheckIfKernelSupportsTLS() { + kernel_supports_tls = false; + } # else # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too static void CheckIfKernelSupportsTLS() { @@ -201,11 +226,17 @@ static unsigned int primes_list[] = { // tcmalloc_sample_parameter/2 // bytes of allocation, i.e., ~ once every 128KB. // Must be a prime number. +#ifdef NO_TCMALLOC_SAMPLES +DEFINE_int64(tcmalloc_sample_parameter, 0, + "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); +static size_t sample_period = 0; +#else DEFINE_int64(tcmalloc_sample_parameter, 262147, "Twice the approximate gap between sampling actions." " Must be a prime number. Otherwise will be rounded up to a " " larger prime number"); static size_t sample_period = 262147; +#endif // Protects sample_period above static SpinLock sample_period_lock(SpinLock::LINKER_INITIALIZED); @@ -227,35 +258,28 @@ DEFINE_double(tcmalloc_release_rate, 1, // So for these larger sizes we have an array indexed by ceil(size/128). // // We flatten both logical arrays into one physical array and use -// arithmetic to compute an appropriate index. The "base_index[]" -// array contains the bases of the two logical arrays. -// -// base_index[] contains non-obvious values. We always add 127 to the -// size before dividing it by either 8 or 128 to implement ceil() -// efficiently. Therefore base_index[0] is -15 to compensate for the -// extra 127/8 we added to small sizes. Similarly base_index[1] is -// 120, so that the first index used by the second logical array is -// just past the last index used by the first logical array. +// arithmetic to compute an appropriate index. The constants used by +// ClassIndex() were selected to make the flattening work. // // Examples: // Size Expression Index // ------------------------------------------------------- -// 0 -15 + ((0+127) / 8) 0 -// 1 -15 + ((1+127) / 8) 1 +// 0 (0 + 7) / 8 0 +// 1 (1 + 7) / 8 1 // ... -// 1024 -15 + ((1024+127) / 8) 128 -// 1025 120 + ((1025+127) / 128) 129 +// 1024 (1024 + 7) / 8 128 +// 1025 (1025 + 127 + (120<<7)) / 128 129 // ... -// 32768 120 + ((32768+127) / 128) 376 +// 32768 (32768 + 127 + (120<<7)) / 128 376 static const int kMaxSmallSize = 1024; static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128 -static const int base_index[2] = { -15, 120 }; // For finding array bases +static const int add_amount[2] = { 7, 127 + (120 << 7) }; static unsigned char class_array[377]; // Compute index of the class_array[] entry for a given size static inline int ClassIndex(size_t s) { const int i = (s > kMaxSmallSize); - return base_index[i] + ((s+127) >> shift_amount[i]); + return (s + add_amount[i]) >> shift_amount[i]; } // Mapping from size class to max size storable in that class @@ -392,7 +416,7 @@ static int NumMoveSize(size_t size) { // Initialize the mapping arrays static void InitSizeClasses() { - // Do some sanity checking on base_index[]/shift_amount[]/class_array[] + // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] if (ClassIndex(0) < 0) { MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0)); abort(); @@ -600,9 +624,13 @@ typedef uintptr_t PageID; // Type that can hold the length of a run of pages typedef uintptr_t Length; -// Convert byte size into pages +static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; + +// Convert byte size into pages. This won't overflow, but may return +// an unreasonably large value if bytes is huge enough. static inline Length pages(size_t bytes) { - return ((bytes + kPageSize - 1) >> kPageShift); + return (bytes >> kPageShift) + + ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); } // Convert a user size into the number of bytes that will actually be @@ -610,6 +638,7 @@ static inline Length pages(size_t bytes) { static size_t AllocationSize(size_t bytes) { if (bytes > kMaxSize) { // Large object: we allocate an integral number of pages + ASSERT(bytes <= (kMaxValidPages << kPageShift)); return pages(bytes) << kPageShift; } else { // Small object: find the size class to which it belongs @@ -722,10 +751,12 @@ static void DLL_Prepend(Span* list, Span* span) { // The following state is protected by pageheap_lock_. // ------------------------------------------------------------------------- +// size/depth are made the same size as a pointer so that some generic +// code below can conveniently cast them back and forth to void*. static const int kMaxStackDepth = 31; struct StackTrace { uintptr_t size; // Size of object - int depth; // Number of PC values stored in array below + uintptr_t depth; // Number of PC values stored in array below void* stack[kMaxStackDepth]; }; static PageHeapAllocator<StackTrace> stacktrace_allocator; @@ -742,17 +773,21 @@ static StackTrace* growth_stacks = NULL; // ------------------------------------------------------------------------- // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. +// We also use a simple one-level cache for hot PageID-to-sizeclass mappings, +// because sometimes the sizeclass is all the information we need. // Selector class -- general selector uses 3-level map template <int BITS> class MapSelector { public: typedef TCMalloc_PageMap3<BITS-kPageShift> Type; + typedef PackedCache<BITS, uint64> CacheType; }; // A two-level map for 32-bit machines template <> class MapSelector<32> { public: typedef TCMalloc_PageMap2<32-kPageShift> Type; + typedef PackedCache<32-kPageShift, uint16> CacheType; }; // ------------------------------------------------------------------------- @@ -815,10 +850,22 @@ class TCMalloc_PageHeap { // Release all pages on the free list for reuse by the OS: void ReleaseFreePages(); + // Return 0 if we have no information, or else the correct sizeclass for p. + // Reads and writes to pagemap_cache_ do not require locking. + // The entries are 64 bits on 64-bit hardware and 16 bits on + // 32-bit hardware, and we don't mind raciness as long as each read of + // an entry yields a valid entry, not a partially updated entry. + size_t GetSizeClassIfCached(PageID p) const { + return pagemap_cache_.GetOrDefault(p, 0); + } + void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } + private: - // Pick the appropriate map type based on pointer size + // Pick the appropriate map and cache types based on pointer size typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; + typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; PageMap pagemap_; + mutable PageMapCache pagemap_cache_; // We segregate spans of a given size into two circular linked // lists: one for normal spans, and one for spans whose memory @@ -875,11 +922,13 @@ class TCMalloc_PageHeap { TCMalloc_PageHeap::TCMalloc_PageHeap() : pagemap_(MetaDataAlloc), + pagemap_cache_(0), free_pages_(0), system_bytes_(0), scavenge_counter_(0), // Start scavenging at kMaxPages list scavenge_index_(kMaxPages-1) { + COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); DLL_Init(&large_.normal); DLL_Init(&large_.returned); for (int i = 0; i < kMaxPages; i++) { @@ -890,9 +939,7 @@ TCMalloc_PageHeap::TCMalloc_PageHeap() Span* TCMalloc_PageHeap::New(Length n) { ASSERT(Check()); - - // n==0 occurs iff pages() overflowed when we added kPageSize-1 to n - if (n == 0) return NULL; + ASSERT(n > 0); // Find first size >= n that has a non-empty list for (Length s = n; s < kMaxPages; s++) { @@ -1211,6 +1258,7 @@ static void RecordGrowth(size_t growth) { bool TCMalloc_PageHeap::GrowHeap(Length n) { ASSERT(kMaxPages >= kMinSystemAlloc); + if (n > kMaxValidPages) return false; Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, kPageSize); if (ptr == NULL) { @@ -1812,6 +1860,13 @@ void TCMalloc_Central_FreeList::Populate() { lock_.Lock(); return; } + ASSERT(span->length == npages); + // Cache sizeclass info eagerly. Locking is not necessary. + // (Instead of being eager, we could just replace any stale info + // about this span, but that seems to be no better in practice.) + for (int i = 0; i < npages; i++) { + pageheap->CacheSizeClass(span->start + i, size_class_); + } // Split the block into pieces and add to the free-list // TODO: coloring of objects to avoid cache conflicts? @@ -1980,7 +2035,7 @@ void TCMalloc_ThreadCache::PickNextSample(size_t k) { } bytes_until_sample_ += rnd_ % sample_period; - + if (k > (static_cast<size_t>(-1) >> 2)) { // If the user has asked for a huge allocation then it is possible // for the code below to loop infinitely. Just return (note that @@ -2044,8 +2099,8 @@ inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { if (KernelSupportsTLS()) return threadlocal_heap; #endif - return - reinterpret_cast<TCMalloc_ThreadCache *>(perftools_pthread_getspecific(heap_key)); + return reinterpret_cast<TCMalloc_ThreadCache *>( + perftools_pthread_getspecific(heap_key)); } inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() { @@ -2337,7 +2392,7 @@ static void** DumpStackTraces() { break; } - result[used_slots+0] = reinterpret_cast<void*>(1); + result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); result[used_slots+1] = reinterpret_cast<void*>(stack->size); result[used_slots+2] = reinterpret_cast<void*>(stack->depth); for (int d = 0; d < stack->depth; d++) { @@ -2345,7 +2400,7 @@ static void** DumpStackTraces() { } used_slots += 3 + stack->depth; } - result[used_slots] = reinterpret_cast<void*>(0); + result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); return result; } @@ -2381,7 +2436,7 @@ static void** DumpHeapGrowthStackTraces() { break; } - result[used_slots+0] = reinterpret_cast<void*>(1); + result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); result[used_slots+1] = reinterpret_cast<void*>(t->size); result[used_slots+2] = reinterpret_cast<void*>(t->depth); for (int d = 0; d < t->depth; d++) { @@ -2389,7 +2444,7 @@ static void** DumpHeapGrowthStackTraces() { } used_slots += 3 + t->depth; } - result[used_slots] = reinterpret_cast<void*>(0); + result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); return result; } @@ -2541,7 +2596,7 @@ static Span* DoSampledAllocation(size_t size) { if (span == NULL) { return NULL; } - + // Allocate stack trace StackTrace *stack = stacktrace_allocator.New(); if (stack == NULL) { @@ -2557,6 +2612,25 @@ static Span* DoSampledAllocation(size_t size) { return span; } +static inline bool CheckCachedSizeClass(void *ptr) { + PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cached_value = pageheap->GetSizeClassIfCached(p); + return cached_value == 0 || + cached_value == pageheap->GetDescriptor(p)->sizeclass; +} + +static inline void* CheckedMallocResult(void *result) +{ + ASSERT(result == 0 || CheckCachedSizeClass(result)); + return result; +} + +static inline void* SpanToMallocResult(Span *span) { + pageheap->CacheSizeClass(span->start, 0); + return + CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); +} + static inline void* do_malloc(size_t size) { void* ret = NULL; @@ -2565,17 +2639,19 @@ static inline void* do_malloc(size_t size) { if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { Span* span = DoSampledAllocation(size); if (span != NULL) { - ret = reinterpret_cast<void*>(span->start << kPageShift); + ret = SpanToMallocResult(span); } } else if (size > kMaxSize) { // Use page-level allocator SpinLockHolder h(&pageheap_lock); Span* span = pageheap->New(pages(size)); if (span != NULL) { - ret = reinterpret_cast<void*>(span->start << kPageShift); + ret = SpanToMallocResult(span); } } else { - ret = heap->Allocate(size); + // The common case, and also the simplest. This just pops the + // size-appropriate freelist, afer replenishing it if it's empty. + ret = CheckedMallocResult(heap->Allocate(size)); } if (ret == NULL) errno = ENOMEM; return ret; @@ -2585,13 +2661,16 @@ static inline void do_free(void* ptr) { if (ptr == NULL) return; ASSERT(pageheap != NULL); // Should not call free() before malloc() const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; - Span* span = pageheap->GetDescriptor(p); + Span* span = NULL; + size_t cl = pageheap->GetSizeClassIfCached(p); - ASSERT(span != NULL); - ASSERT(!span->free); - const size_t cl = span->sizeclass; + if (cl == 0) { + span = pageheap->GetDescriptor(p); + cl = span->sizeclass; + pageheap->CacheSizeClass(p, cl); + } if (cl != 0) { - ASSERT(!span->sample); + ASSERT(!pageheap->GetDescriptor(p)->sample); TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent(); if (heap != NULL) { heap->Deallocate(ptr, cl); @@ -2603,7 +2682,7 @@ static inline void do_free(void* ptr) { } else { SpinLockHolder h(&pageheap_lock); ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); - ASSERT(span->start == p); + ASSERT(span != NULL && span->start == p); if (span->sample) { DLL_Remove(span); stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects)); @@ -2643,7 +2722,7 @@ static void* do_memalign(size_t align, size_t size) { } if (cl < kNumClasses) { TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); - return heap->Allocate(class_to_size[cl]); + return CheckedMallocResult(heap->Allocate(class_to_size[cl])); } } @@ -2655,17 +2734,16 @@ static void* do_memalign(size_t align, size_t size) { // TODO: We could put the rest of this page in the appropriate // TODO: cache but it does not seem worth it. Span* span = pageheap->New(pages(size)); - if (span == NULL) return NULL; - return reinterpret_cast<void*>(span->start << kPageShift); + return span == NULL ? NULL : SpanToMallocResult(span); } // Allocate extra pages and carve off an aligned portion - const int alloc = pages(size + align); + const Length alloc = pages(size + align); Span* span = pageheap->New(alloc); if (span == NULL) return NULL; // Skip starting portion so that we end up aligned - int skip = 0; + Length skip = 0; while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { skip++; } @@ -2677,13 +2755,13 @@ static void* do_memalign(size_t align, size_t size) { } // Skip trailing portion that we do not need to return - const int needed = pages(size); + const Length needed = pages(size); ASSERT(span->length >= needed); if (span->length > needed) { Span* trailer = pageheap->Split(span, needed); pageheap->Delete(trailer); } - return reinterpret_cast<void*>(span->start << kPageShift); + return SpanToMallocResult(span); } // Helpers for use by exported routines below: @@ -2732,53 +2810,53 @@ static inline struct mallinfo do_mallinfo() { // the call to the (de)allocation function. // Put all callers of MallocHook::Invoke* in this module into -// ATTRIBUTE_SECTION(google_malloc_allocators) section, +// ATTRIBUTE_SECTION(google_malloc) section, // so that MallocHook::GetCallerStackTrace can function accurately: // NOTE: __THROW expands to 'throw()', which means 'never throws.' Urgh. extern "C" { void* malloc(size_t size) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void free(void* ptr) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void* realloc(void* ptr, size_t size) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void* calloc(size_t nmemb, size_t size) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void cfree(void* ptr) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void* memalign(size_t __alignment, size_t __size) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); int posix_memalign(void** ptr, size_t align, size_t size) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void* valloc(size_t __size) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void* pvalloc(size_t __size) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); } static void *MemalignOverride(size_t align, size_t size, const void *caller) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void* operator new(size_t size) - ATTRIBUTE_SECTION(google_malloc_allocators); + ATTRIBUTE_SECTION(google_malloc); void operator delete(void* p) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void* operator new[](size_t size) - ATTRIBUTE_SECTION(google_malloc_allocators); + ATTRIBUTE_SECTION(google_malloc); void operator delete[](void* p) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); // And the nothrow variants of these: void* operator new(size_t size, const std::nothrow_t&) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void operator delete(void* p, const std::nothrow_t&) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void* operator new[](size_t size, const std::nothrow_t&) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); void operator delete[](void* p, const std::nothrow_t&) - __THROW ATTRIBUTE_SECTION(google_malloc_allocators); + __THROW ATTRIBUTE_SECTION(google_malloc); extern "C" void* malloc(size_t size) __THROW { void* result = do_malloc(size); @@ -2823,11 +2901,18 @@ extern "C" void* realloc(void* old_ptr, size_t new_size) __THROW { // Get the size of the old entry const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift; - Span* span = pageheap->GetDescriptor(p); + size_t cl = pageheap->GetSizeClassIfCached(p); + Span *span = NULL; size_t old_size; - if (span->sizeclass != 0) { - old_size = ByteSizeForClass(span->sizeclass); + if (cl == 0) { + span = pageheap->GetDescriptor(p); + cl = span->sizeclass; + pageheap->CacheSizeClass(p, cl); + } + if (cl != 0) { + old_size = ByteSizeForClass(cl); } else { + ASSERT(span != NULL); old_size = span->length << kPageShift; } @@ -2842,6 +2927,9 @@ extern "C" void* realloc(void* old_ptr, size_t new_size) __THROW { MallocHook::InvokeNewHook(new_ptr, new_size); memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); MallocHook::InvokeDeleteHook(old_ptr); + // We could use a variant of do_free() that leverages the fact + // that we already know the sizeclass of old_ptr. The benefit + // would be small, so don't bother. do_free(old_ptr); return new_ptr; } else { @@ -3004,13 +3092,16 @@ extern "C" struct mallinfo mallinfo(void) { // Some library routines on RedHat 9 allocate memory using malloc() // and free it using __libc_free() (or vice-versa). Since we provide // our own implementations of malloc/free, we need to make sure that -// the __libc_XXX variants also point to the same implementations. +// the __libc_XXX variants (defined as part of glibc) also point to +// the same implementations. //------------------------------------------------------------------- +#if defined(__GLIBC__) extern "C" { -#if defined(__GNUC__) && defined(HAVE___ATTRIBUTE__) - // Potentially faster variants that use the gcc alias extension -#define ALIAS(x) __attribute__ ((weak, alias (x))) +# if defined(__GNUC__) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__) + // Potentially faster variants that use the gcc alias extension. + // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check. +# define ALIAS(x) __attribute__ ((weak, alias (x))) void* __libc_malloc(size_t size) ALIAS("malloc"); void __libc_free(void* ptr) ALIAS("free"); void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc"); @@ -3020,8 +3111,8 @@ extern "C" { void* __libc_valloc(size_t size) ALIAS("valloc"); void* __libc_pvalloc(size_t size) ALIAS("pvalloc"); int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign"); -#undef ALIAS -#else +# undef ALIAS +# else /* not __GNUC__ */ // Portable wrappers void* __libc_malloc(size_t size) { return malloc(size); } void __libc_free(void* ptr) { free(ptr); } @@ -3034,8 +3125,9 @@ extern "C" { int __posix_memalign(void** r, size_t a, size_t s) { return posix_memalign(r, a, s); } -#endif +# endif /* __GNUC__ */ } +#endif /* __GLIBC__ */ // Override __libc_memalign in libc on linux boxes specially. // They have a bug in libc that causes them to (very rarely) allocate |