44 files changed, 12041 insertions, 0 deletions
diff --git a/src/addressmap-inl.h b/src/addressmap-inl.h
new file mode 100644
index 0000000..1daaeb2
--- /dev/null
+++ b/src/addressmap-inl.h
@@ -0,0 +1,325 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// A fast map from addresses to values.  Assumes that addresses are
+// clustered.  The main use is intended to be for heap-profiling.
+// May be too memory-hungry for other uses.
+//
+// We use a user-defined allocator/de-allocator so that we can use
+// this data structure during heap-profiling.
+//
+// IMPLEMENTATION DETAIL:
+//
+// Some default definitions/parameters:
+//  * Block      -- aligned 128-byte region of the address space
+//  * Cluster    -- aligned 1-MB region of the address space
+//  * Block-ID   -- block-number within a cluster
+//  * Cluster-ID -- Starting address of cluster divided by cluster size
+//
+// We use a three-level map to represent the state:
+//  1. A hash-table maps from a cluster-ID to the data for that cluster.
+//  2. For each non-empty cluster we keep an array indexed by
+//     block-ID tht points to the first entry in the linked-list
+//     for the block.
+//  3. At the bottom, we keep a singly-linked list of all
+//     entries in a block (for non-empty blocks).
+//
+//    hash table
+//  +-------------+
+//  | id->cluster |---> ...
+//  |     ...     |
+//  | id->cluster |--->  Cluster
+//  +-------------+     +-------+    Data for one block
+//                      |  nil  |   +------------------------------------+
+//                      |   ----+---|->[addr/value]-->[addr/value]-->... |
+//                      |  nil  |   +------------------------------------+
+//                      |   ----+--> ...
+//                      |  nil  |
+//                      |  ...  |
+//                      +-------+
+//
+// Note that we require zero-bytes of overhead for completely empty
+// clusters.  The minimum space requirement for a cluster is the size
+// of the hash-table entry plus a pointer value for each block in
+// the cluster.  Empty blocks impose no extra space requirement.
+//
+// The cost of a lookup is:
+//      a. A hash-table lookup to find the cluster
+//      b. An array access in the cluster structure
+//      c. A traversal over the linked-list for a block
+
+#ifndef _ADDRESSMAP_H
+#define _ADDRESSMAP_H
+
+#include "google/perftools/config.h"
+#include <stddef.h>
+#include <string.h>
+#if defined HAVE_STDINT_H
+#include <stdint.h>             // to get uint16_t (ISO naming madness)
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>           // another place uint16_t might be defined
+#else
+#include <sys/types.h>          // our last best hope
+#endif
+
+template <class Value>
+class AddressMap {
+ public:
+  typedef void* (*Allocator)(size_t);
+  typedef void  (*DeAllocator)(void*);
+  typedef void* Key;
+
+  // Create an AddressMap that uses the specified allocator/deallocator.
+  // The allocator/deallocator should behave like malloc/free.
+  // For instance, the allocator does not need to return initialized memory.
+  AddressMap(Allocator alloc, DeAllocator dealloc);
+  ~AddressMap();
+
+  // If the map contains an entry for "key", store the associated
+  // value in "*result" and return true.  Else return false.
+  bool Find(Key key, Value* result);
+
+  // Insert <key,value> into the map.  Any old value associated
+  // with key is forgotten.
+  void Insert(Key key, Value value);
+
+  // Remove any entry for key in the map.  If an entry was found
+  // and removed, stores the associated value in "*removed_value"
+  // and returns true.  Else returns false.
+  bool FindAndRemove(Key key, Value* removed_value);
+
+ private:
+  typedef uintptr_t Number;
+
+  // The implementation assumes that addresses inserted into the map
+  // will be clustered.  We take advantage of this fact by splitting
+  // up the address-space into blocks and using a linked-list entry
+  // for each block.
+
+  // Size of each block.  There is one linked-list for each block, so
+  // do not make the block-size too big.  Oterwise, a lot of time
+  // will be spent traversing linked lists.
+  static const int kBlockBits = 7;
+  static const int kBlockSize = 1 << kBlockBits;
+
+  // Entry kept in per-block linked-list
+  struct Entry {
+    Entry* next;
+    Key    key;
+    Value  value;
+  };
+
+  // We further group a sequence of consecutive blocks into a cluster.
+  // The data for a cluster is represented as a dense array of
+  // linked-lists, one list per contained block.
+  static const int kClusterBits = 13;
+  static const int kClusterSize = 1 << (kBlockBits + kClusterBits);
+  static const int kClusterBlocks = 1 << kClusterBits;
+
+  // We use a simple chaining hash-table to represent the clusters.
+  struct Cluster {
+    Cluster* next;                      // Next cluster in chain
+    Number   id;                        // Cluster ID
+    Entry*   blocks[kClusterBlocks];    // Per-block linked-lists
+  };
+
+  // Number of hash-table entries.  With the block-size/cluster-size
+  // defined above, each cluster covers 1 MB, so an 4K entry
+  // hash-table will give an average hash-chain length of 1 for 4GB of
+  // in-use memory.
+  static const int kHashBits = 12;
+  static const int kHashSize = 1 << 12;
+
+  // Number of entry objects allocated at a time
+  static const int ALLOC_COUNT = 64;
+
+  Cluster**     hashtable_;             // The hash-table
+  Entry*        free_;                  // Free list of unused Entry objects
+
+  // Multiplicative hash function:
+  // The value "kHashMultiplier" is the bottom 32 bits of
+  //    int((sqrt(5)-1)/2 * 2^32)
+  // This is a good multiplier as suggested in CLR, Knuth.  The hash
+  // value is taken to be the top "k" bits of the bottom 32 bits
+  // of the muliplied value.
+  static const uint32_t kHashMultiplier = 2654435769u;
+  static int HashInt(Number x) {
+    // Multiply by a constant and take the top bits of the result.
+    const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier;
+    return static_cast<int>(m >> (32 - kHashBits));
+  }
+
+  // Find cluster object for specified address.  If not found
+  // and "create" is true, create the object.  If not found
+  // and "create" is false, return NULL.
+  Cluster* FindCluster(Number address, bool create) {
+    // Look in hashtable
+    const Number cluster_id = address >> (kBlockBits + kClusterBits);
+    const int h = HashInt(cluster_id);
+    for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
+      if (c->id == cluster_id) {
+        return c;
+      }
+    }
+
+    // Create cluster if necessary
+    if (create) {
+      Cluster* c = New<Cluster>(1);
+      c->id = cluster_id;
+      c->next = hashtable_[h];
+      hashtable_[h] = c;
+      return c;
+    }
+    return NULL;
+  }
+
+  // Return the block ID for an address within its cluster
+  static int BlockID(Number address) {
+    return (address >> kBlockBits) & (kClusterBlocks - 1);
+  }
+
+  //--------------------------------------------------------------
+  // Memory management -- we keep all objects we allocate linked
+  // together in a singly linked list so we can get rid of them
+  // when we are all done.  Furthermore, we allow the client to
+  // pass in custom memory allocator/deallocator routines.
+  //--------------------------------------------------------------
+  struct Object {
+    Object* next;
+    // The real data starts here
+  };
+
+  Allocator     alloc_;                 // The allocator
+  DeAllocator   dealloc_;               // The deallocator
+  Object*       allocated_;             // List of allocated objects
+
+  // Allocates a zeroed array of T with length "num".  Also inserts
+  // the allocated block into a linked list so it can be deallocated
+  // when we are all done.
+  template <class T> T* New(int num) {
+    void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T));
+    memset(ptr, 0, sizeof(Object) + num*sizeof(T));
+    Object* obj = reinterpret_cast<Object*>(ptr);
+    obj->next = allocated_;
+    allocated_ = obj;
+    return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1);
+  }
+};
+
+// More implementation details follow:
+
+template <class Value>
+AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc)
+  : free_(NULL),
+    alloc_(alloc),
+    dealloc_(dealloc),
+    allocated_(NULL) {
+  hashtable_ = New<Cluster*>(kHashSize);
+}
+
+template <class Value>
+AddressMap<Value>::~AddressMap() {
+  // De-allocate all of the objects we allocated
+  for (Object* obj = allocated_; obj != NULL; ) {
+    Object* next = obj->next;
+    (*dealloc_)(obj);
+    obj = next;
+  }
+}
+
+template <class Value>
+bool AddressMap<Value>::Find(Key key, Value* result) {
+  const Number num = reinterpret_cast<Number>(key);
+  const Cluster* const c = FindCluster(num, false/*do not create*/);
+  if (c != NULL) {
+    for (const Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) {
+      if (e->key == key) {
+        *result = e->value;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+template <class Value>
+void AddressMap<Value>::Insert(Key key, Value value) {
+  const Number num = reinterpret_cast<Number>(key);
+  Cluster* const c = FindCluster(num, true/*create*/);
+
+  // Look in linked-list for this block
+  const int block = BlockID(num);
+  for (Entry* e = c->blocks[block]; e != NULL; e = e->next) {
+    if (e->key == key) {
+      e->value = value;
+      return;
+    }
+  }
+
+  // Create entry
+  if (free_ == NULL) {
+    // Allocate a new batch of entries and add to free-list
+    Entry* array = New<Entry>(ALLOC_COUNT);
+    for (int i = 0; i < ALLOC_COUNT-1; i++) {
+      array[i].next = &array[i+1];
+    }
+    array[ALLOC_COUNT-1].next = free_;
+    free_ = &array[0];
+  }
+  Entry* e = free_;
+  free_ = e->next;
+  e->key = key;
+  e->value = value;
+  e->next = c->blocks[block];
+  c->blocks[block] = e;
+}
+
+template <class Value>
+bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) {
+  const Number num = reinterpret_cast<Number>(key);
+  Cluster* const c = FindCluster(num, false/*do not create*/);
+  if (c != NULL) {
+    for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) {
+      Entry* e = *p;
+      if (e->key == key) {
+        *removed_value = e->value;
+        *p = e->next;         // Remove e from linked-list
+        e->next = free_;      // Add e to free-list
+        free_ = e;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+#endif /* _ADDRESSMAP_H */
diff --git a/src/base/commandlineflags.h b/src/base/commandlineflags.h
new file mode 100644
index 0000000..a9aceba
--- /dev/null
+++ b/src/base/commandlineflags.h
@@ -0,0 +1,110 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// This file is a compatibility layer that defines Google's version of
+// command line flags that are used for configuration.
+//
+// We put flags into their own namespace.  It is purposefully
+// named in an opaque way that people should have trouble typing
+// directly.  The idea is that DEFINE puts the flag in the weird
+// namespace, and DECLARE imports the flag from there into the
+// current namespace.  The net result is to force people to use
+// DECLARE to get access to a flag, rather than saying
+//   extern bool FLAGS_logtostderr;
+// or some such instead.  We want this so we can put extra
+// functionality (like sanity-checking) in DECLARE if we want,
+// and make sure it is picked up everywhere.
+//
+// We also put the type of the variable in the namespace, so that
+// people can't DECLARE_int32 something that they DEFINE_bool'd
+// elsewhere.
+#ifndef BASE_COMMANDLINEFLAGS_H__
+#define BASE_COMMANDLINEFLAGS_H__
+
+#include <string>
+#include <google/perftools/basictypes.h>
+
+#define DECLARE_VARIABLE(type, name)                                          \
+  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
+  extern type FLAGS_##name;                                                   \
+  }                                                                           \
+  using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
+
+#define DEFINE_VARIABLE(type, name, value, meaning) \
+  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
+  type FLAGS_##name(value);                                                   \
+  char FLAGS_no##name;                                                        \
+  }                                                                           \
+  using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
+
+// bool specialization
+#define DECLARE_bool(name) \
+  DECLARE_VARIABLE(bool, name)
+#define DEFINE_bool(name, value, meaning) \
+  DEFINE_VARIABLE(bool, name, value, meaning)
+
+// int32 specialization
+#define DECLARE_int32(name) \
+  DECLARE_VARIABLE(int32, name)
+#define DEFINE_int32(name, value, meaning) \
+  DEFINE_VARIABLE(int32, name, value, meaning)
+
+// int64 specialization
+#define DECLARE_int64(name) \
+  DECLARE_VARIABLE(int64, name)
+#define DEFINE_int64(name, value, meaning) \
+  DEFINE_VARIABLE(int64, name, value, meaning)
+
+#define DECLARE_uint64(name) \
+  DECLARE_VARIABLE(uint64, name)
+#define DEFINE_uint64(name, value, meaning) \
+  DEFINE_VARIABLE(uint64, name, value, meaning)
+
+// double specialization
+#define DECLARE_double(name) \
+  DECLARE_VARIABLE(double, name)
+#define DEFINE_double(name, value, meaning) \
+  DEFINE_VARIABLE(double, name, value, meaning)
+
+// Special case for string, because we have to specify the namespace
+// std::string, which doesn't play nicely with our FLAG__namespace hackery.
+#define DECLARE_string(name)                                          \
+  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead {  \
+  extern std::string FLAGS_##name;                                                   \
+  }                                                                           \
+  using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name
+#define DEFINE_string(name, value, meaning) \
+  namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead {  \
+  std::string FLAGS_##name(value);                                                   \
+  char FLAGS_no##name;                                                        \
+  }                                                                           \
+  using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name
+
+#endif  // BASE_COMMANDLINEFLAGS_H__
diff --git a/src/base/googleinit.h b/src/base/googleinit.h
new file mode 100644
index 0000000..029bc81
--- /dev/null
+++ b/src/base/googleinit.h
@@ -0,0 +1,49 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Jacob Hoffman-Andrews
+
+#ifndef _GOOGLEINIT_H
+#define _GOOGLEINIT_H
+
+class GoogleInitializer {
+ public:
+  typedef void (*void_function)(void);
+  GoogleInitializer(const char* name, void_function f) {
+    f();
+  }
+};
+
+#define REGISTER_MODULE_INITIALIZER(name,body)                     \
+  static void google_init_module_##name () { body; }               \
+  GoogleInitializer google_initializer_module_##name(#name,        \
+          google_init_module_##name)
+
+#endif /* _GOOGLEINIT_H */
diff --git a/src/base/logging.h b/src/base/logging.h
new file mode 100644
index 0000000..693926b
--- /dev/null
+++ b/src/base/logging.h
@@ -0,0 +1,88 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// This file contains #include information about logging-related stuff.
+// Pretty much everybody needs to #include this file so that they can
+// log various happenings.
+//
+#ifndef _LOGGING_H_
+#define _LOGGING_H_
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// CHECK dies with a fatal error if condition is not true.  It is *not*
+// controlled by NDEBUG, so the check will be executed regardless of
+// compilation mode.  Therefore, it is safe to do things like:
+//    CHECK(fp->Write(x) == 4)
+#define CHECK(condition)  \
+  do { \
+    if (!(condition)) { \
+      fprintf(stderr, "Check failed: %s\n", #condition); \
+      exit(1); \
+    } \
+  } while (0) \
+
+// Helper macro for binary operators; prints the two values on error
+// Don't use this macro directly in your code, use CHECK_EQ et al below
+
+// WARNING: These don't compile correctly if one of the arguments is a pointer
+// and the other is NULL. To work around this, simply static_cast NULL to the
+// type of the desired pointer.
+
+// TODO(jandrews): Also print the values in case of failure.  Requires some
+// sort of type-sensitive ToString() function.
+#define CHECK_OP(op, val1, val2)  \
+  do { \
+    if (!((val1) op (val2))) { \
+      fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2); \
+      exit(1); \
+    } \
+  } while (0)
+
+#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2)
+#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2)
+#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2)
+#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2)
+#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2)
+#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2)
+
+enum {INFO, WARNING, ERROR, FATAL, NUM_SEVERITIES};
+
+inline void LogPrintf(int severity, const char* pat, ...) {
+  va_list ap;
+  vfprintf(stderr, pat, ap);
+  fprintf(stderr, "\n");
+  if ((severity) == FATAL)
+    exit(1);
+}
+
+#endif // _LOGGING_H_
diff --git a/src/config.h.in b/src/config.h.in
new file mode 100644
index 0000000..4699bee
--- /dev/null
+++ b/src/config.h.in
@@ -0,0 +1,173 @@
+/* src/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* the namespace of hash_map */
+#undef HASH_NAMESPACE
+
+/* Define to 1 if compiler supports __builtin_stack_pointer */
+#undef HAVE_BUILTIN_STACK_POINTER
+
+/* Define to 1 if you have the <conflict-signal.h> header file. */
+#undef HAVE_CONFLICT_SIGNAL_H
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <execinfo.h> header file. */
+#undef HAVE_EXECINFO_H
+
+/* define if the compiler has hash_map */
+#undef HAVE_EXT_HASH_MAP
+
+/* define if the compiler has hash_set */
+#undef HAVE_EXT_HASH_SET
+
+/* Define to 1 if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define to 1 if you have the <grp.h> header file. */
+#undef HAVE_GRP_H
+
+/* define if the compiler has hash_map */
+#undef HAVE_HASH_MAP
+
+/* define if the compiler has hash_set */
+#undef HAVE_HASH_SET
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <libunwind.h> header file. */
+#undef HAVE_LIBUNWIND_H
+
+/* Define to 1 if you have the <linux/ptrace.h> header file. */
+#undef HAVE_LINUX_PTRACE_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have a working `mmap' system call. */
+#undef HAVE_MMAP
+
+/* Define to 1 if you have the `munmap' function. */
+#undef HAVE_MUNMAP
+
+/* define if the compiler implements namespaces */
+#undef HAVE_NAMESPACES
+
+/* define if libc has program_invocation_name */
+#undef HAVE_PROGRAM_INVOCATION_NAME
+
+/* Define if you have POSIX threads libraries and header files. */
+#undef HAVE_PTHREAD
+
+/* Define to 1 if you have the <pwd.h> header file. */
+#undef HAVE_PWD_H
+
+/* Define to 1 if you have the `sbrk' function. */
+#undef HAVE_SBRK
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if `eip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_EIP
+
+/* Define to 1 if `regs->nip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_REGS__NIP
+
+/* Define to 1 if `rip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_RIP
+
+/* Define to 1 if `sc_eip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_SC_EIP
+
+/* Define to 1 if `sc_ip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_SC_IP
+
+/* Define to 1 if `si_faddr' is member of `struct siginfo'. */
+#undef HAVE_STRUCT_SIGINFO_SI_FADDR
+
+/* Define to 1 if `uc_mcontext' is member of `struct ucontext'. */
+#undef HAVE_STRUCT_UCONTEXT_UC_MCONTEXT
+
+/* Define to 1 if you have the <syscall.h> header file. */
+#undef HAVE_SYSCALL_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if compiler supports __thread */
+#undef HAVE_TLS
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the <unwind.h> header file. */
+#undef HAVE_UNWIND_H
+
+/* define if your compiler has __attribute__ */
+#undef HAVE___ATTRIBUTE__
+
+/* Define to 1 if the system has the type `__int64'. */
+#undef HAVE___INT64
+
+/* prefix where we look for installed files */
+#undef INSTALL_PREFIX
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* printf format code for printing a size_t and ssize_t */
+#undef PRIdS
+
+/* printf format code for printing a size_t and ssize_t */
+#undef PRIuS
+
+/* printf format code for printing a size_t and ssize_t */
+#undef PRIxS
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+#undef PTHREAD_CREATE_JOINABLE
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* the namespace where STL code like vector<> is defined */
+#undef STL_NAMESPACE
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+#undef inline
+#endif
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
new file mode 100644
index 0000000..323da5d
--- /dev/null
+++ b/src/google/heap-checker.h
@@ -0,0 +1,475 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Maxim Lifantsev (with design ideas by Sanjay Ghemawat)
+//
+// Heap memory leak checker (utilizes heap-profiler and pprof).
+//
+
+#ifndef BASE_HEAP_CHECKER_H__
+#define BASE_HEAP_CHECKER_H__
+
+#include <google/perftools/basictypes.h>
+#include <vector>
+
+// TODO(jandrews): rewrite this documentation
+// HeapLeakChecker, a memory leak checking class.
+//
+// Verifies that there are no memory leaks between its
+// construction and call to its *NoLeaks() or *SameHeap() member.
+//
+// It will dump two profiles at these two events
+// (named <prefix>.<name>-beg.heap and <prefix>.<name>-end.heap
+//  where <prefix> is given by --heap_profile= and <name> by our costructor)
+// and will return false in case the amount of in-use memory
+// is more at the time of *NoLeaks() call than
+// (or respectively differs at the time of *SameHeap() from)
+// what it was at the time of our construction.
+// It will also in this case print a message on how to process the dumped
+// profiles to locate leaks.
+//
+// GUIDELINE: In addition to the local heap leak checking between two arbitrary
+// points in program's execution, we provide a way for overall
+// whole-program heap leak checking, which is WHAT ONE SHOULD NORMALLY USE.
+//
+// In order to enable the recommended whole-program heap leak checking
+// in the BUILD rule for your binary, just depend on "//base:heapcheck"
+// Alternatively you can call your binary with e.g. "--heap_check=normal"
+// as one of the *early* command line arguments.
+//
+// CAVEAT: Doing the following alone will not work in many cases
+//   int main(int argc, char** argv) {
+//     FLAGS_heap_check = "normal";
+//     InitGoogle(argv[0], &argc, &argv, true);
+//     <do things>
+//   }
+// The reason is that the program must know that it's going to be
+// heap leak checking itself before construction of
+// its global variables happens and before main() is executed.
+// NOTE: Once "--heap_check=<smth>" is in the command line or //base:heapcheck
+// is linked in, you can change the value of FLAGS_heap_check in your program
+// any way you wish but before InitGoogle() exits
+// (which includes any REGISTER_MODULE_INITIALIZER).
+//
+// GUIDELINE CONT.: Depending on the value of the FLAGS_heap_check
+// -- as well as other flags of this module --
+// different modifications of leak checking between different points in
+// program's execution take place.
+// Currently supported values from less strict to more strict are:
+// "minimal", "normal", "strict", "draconian".
+// The "as-is" value leaves control to the other flags of this module.
+// The "local" value does not start whole-program heap leak checking
+// but activates all our Disable*() methods
+// for the benefit of local heap leak checking via HeapLeakChecker objects.
+//
+// For the case of FLAGS_heap_check == "normal"
+// everything from before execution of all global variable constructors
+// to normal program exit
+// (namely after main() returns and after all REGISTER_HEAPCHECK_CLEANUP's
+//  are executed, but before any global variable destructors are executed)
+// is checked for absense of heap memory leaks.
+//
+// NOTE: For all but "draconian" whole-program leak check we also
+// ignore all heap objects reachable (a the time of the check)
+// from any global variable or any live thread stack variable
+// or from any object identified by a HeapLeakChecker::IgnoreObject() call.
+// The liveness check we do is not very portable and is not 100% exact
+// (it might ignore real leaks occasionally
+//  -- it might potentially not find some global data region to start from
+//     but we consider such cases to be our bugs to fix),
+// but it works in most cases and saves us from
+// writing a lot of explicit clean up code.
+//
+// THREADS and heap leak checking: At the beginning of HeapLeakChecker's
+// construction and during *NoLeaks()/*SameHeap() calls we grab a lock so that
+// heap activity in other threads is paused for the time
+// we are recording or analyzing the state of the heap.
+// To make non whole-program heap leak check meaningful there should be
+// no heap activity in other threads at the these times.
+//
+// For the whole-program heap leak check it is possible to have
+// other threads active and working with the heap when the program exits.
+//
+// HINT: If you are debugging detected leaks, you can try different
+// (e.g. less strict) values for FLAGS_heap_check
+// to determine the cause of the reported leaks
+// (see the code of HeapLeakChecker::InternalInitStart for details).
+//
+// GUIDELINE: Below are the preferred ways of making your (test) binary
+// pass the above recommended overall heap leak check
+// in the order of decreasing preference:
+//
+// 1. Fix the leaks if they are real leaks.
+//
+// 2. If you are sure that the reported leaks are not dangerous
+//    and there is no good way to fix them, then you can use
+//    HeapLeakChecker::DisableChecks(Up|In|At) calls (see below)
+//    in the relevant modules to disable certain stack traces
+//    for the purpose of leak checking.
+//    You can also use HeapLeakChecker::IgnoreObject() call
+//    to ignore certain leaked heap objects and everythign reachable from them.
+//
+// 3. If the leaks are due to some initialization in a third-party package,
+//    you might be able to force that initialization before the
+//    heap checking starts.
+//
+//    I.e. if FLAGS_heap_check == "minimal" or less strict, it is before
+//    calling InitGoogle or within some REGISTER_MODULE_INITIALIZER.
+//    If FLAGS_heap_check == "normal" or stricter, only
+//    HeapLeakChecker::LibCPreallocate() happens before heap checking starts.
+//
+// CAVEAT: Most Google (test) binaries are expected to pass heap leak check
+// at the FLAGS_heap_check == "normal" level.
+// In certain cases reverting to FLAGS_heap_check == "minimal" level is also
+// fine (provided there's no easy way to make it pass at the "normal" level).
+// Making a binary pass at "strict" or "draconian" level is not necessary
+// or even desirable in the numerous cases when it requires adding
+// a lot of (otherwise unused) heap cleanup code to various core libraries.
+//
+// NOTE: the following should apply only if
+//       FLAGS_heap_check == "strict" or stricter
+//
+// 4. If the found leaks are due to incomplete cleanup
+//    in destructors of global variables,
+//    extend or add those destructors
+//    or use a REGISTER_HEAPCHECK_CLEANUP to do the deallocations instead
+//    to avoid cleanup overhead during normal execution.
+//    This type of leaks get reported when one goes
+//    from "normal" to "strict" checking.
+//
+// NOTE: the following should apply only if
+//       FLAGS_heap_check == "draconian" or stricter
+//
+// 5. If the found leaks are for global static pointers whose values are
+//    allocated/grown (e.g on-demand) and never deallocated,
+//    then you should be able to add REGISTER_HEAPCHECK_CLEANUP's
+//    or appropriate destructors into these modules
+//    to free those objects.
+//
+//
+// Example of local usage (anywhere in the program) -- but read caveat below:
+//
+//   HeapLeakChecker heap_checker("test_foo");
+//
+//   { <code that exercises some foo functionality
+//      that should preserve memory allocation state> }
+//
+//   CHECK(heap_checker.SameHeap());
+//
+// NOTE: One should set FLAGS_heap_check to a non-empty value e.g. "local"
+// to help suppress some false leaks for these local checks.
+// CAVEAT: The problem with the above example of local checking
+// is that you can easily get false leak reports if the checked code
+// (indirectly) causes initialization or growth of some global structures
+// like caches or reused global temporaries.
+// In such cases you should either
+// switch to the above *preferred* whole-program checking,
+// or somehow *reliably* ensure that false leaks do not happen
+// in the portion of the code you are checking.
+//
+// IMPORTANT: One way people have been using in unit-tests
+// is to run some test functionality once
+// and then run it again under a HeapLeakChecker object.
+// While this helped in many cases, it is not guaranteed to always work
+// -- read it will someday break for some hard to debug reason.
+// These tricks are no longer needed and are now DEPRECATED
+// in favor of using the whole-program checking by just
+// adding a dependency on //base:heapcheck.
+//
+// CONCLUSION: Use the preferred checking via //base:heapcheck
+// in your tests even when it means fixing (or bugging someone to fix)
+// the leaks in the libraries the test depends on.
+//
+
+// A macro to declare module heap check cleanup tasks
+// (they run only if we are doing heap leak checking.)
+// Use
+//  public:
+//   void Class::HeapCleanup();
+// if you need to do heap check cleanup on private members of a class.
+#define REGISTER_HEAPCHECK_CLEANUP(name, body)  \
+  namespace { \
+  void heapcheck_cleanup_##name() { body; } \
+  static HeapCleaner heapcheck_cleaner_##name(&heapcheck_cleanup_##name); \
+  }
+
+// A class that exists solely to run its destructor.  This class should not be
+// used directly, but instead by the REGISTER_HEAPCHECK_CLEANUP macro above.
+class HeapCleaner {
+ public:
+  typedef void (*void_function)(void);
+  HeapCleaner(void_function f);
+  static void RunHeapCleanups();
+ private:
+  static std::vector<void_function>* heap_cleanups_;
+};
+
+class HeapLeakChecker {
+ public:  // Non-static functions for starting and doing leak checking.
+
+  // Start checking and name the leak check performed.
+  // The name is used in naming dumped profiles
+  // and needs to be unique only within your binary.
+  // It must also be a string that can be a part of a file name,
+  // in particular not contain path expressions.
+  explicit HeapLeakChecker(const char *name);
+
+  // Return true iff the heap does not have more objects allocated
+  // w.r.t. its state at the time of our construction.
+  // This does full pprof heap change checking and reporting.
+  // To detect tricky leaks it depends on correct working pprof implementation
+  // referred by FLAGS_heap_profile_pprof.
+  // (By 'tricky leaks' we mean a change of heap state that e.g. for SameHeap
+  //  preserves the number of allocated objects and bytes
+  //  -- see TestHeapLeakCheckerTrick in heap-checker_unittest.cc --
+  //  and thus is not detected by BriefNoLeaks.)
+  // CAVEAT: pprof will do no checking over stripped binaries
+  // (our automatic test binaries are stripped)
+  bool NoLeaks() { return DoNoLeaks(false, true, true); }
+
+  // Return true iff the heap does not seem to have more objects allocated
+  // w.r.t. its state at the time of our construction
+  // by looking at the number of objects & bytes allocated.
+  // This also tries to do pprof reporting of detected leaks.
+  bool QuickNoLeaks() { return DoNoLeaks(false, false, true); }
+
+  // Return true iff the heap does not seem to have more objects allocated
+  // w.r.t. its state at the time of our construction
+  // by looking at the number of objects & bytes allocated.
+  // This does not try to use pprof at all.
+  bool BriefNoLeaks() { return DoNoLeaks(false, false, false); }
+
+  // These are similar to their *NoLeaks counterparts,
+  // but they in addition require no negative leaks,
+  // i.e. the state of the heap must be exactly the same
+  // as at the time of our construction.
+  bool SameHeap() { return DoNoLeaks(true, true, true); }
+  bool QuickSameHeap() { return DoNoLeaks(true, false, true); }
+  bool BriefSameHeap() { return DoNoLeaks(true, false, false); }
+
+  // Destructor (verifies that some *NoLeaks method has been called).
+  ~HeapLeakChecker();
+
+  // Accessors to determine various internal parameters.  These should
+  // be set as early as possible.
+
+  // If overall heap check reports found leaks via pprof.  Default: true
+  static void set_heap_check_report(bool);
+  // Location of pprof script.  Default: $prefix/bin/pprof
+  static void set_pprof_path(const char*);
+  // Location to write profile dumps.  Default: /tmp
+  static void set_dump_directory(const char*);
+
+  static bool heap_check_report();
+  static const char* pprof_path();
+  static const char* dump_directory();
+
+ private:  // data
+
+  char* name_;  // our remembered name
+  size_t name_length_;  // length of the base part of name_
+  int64 start_inuse_bytes_;  // bytes in use at our construction
+  int64 start_inuse_allocs_;  // allocations in use at our construction
+
+  static pid_t main_thread_pid_;  // For naming output files
+  static const char* invocation_name_; // For naming output files
+  static const char* invocation_path_; // For running 'pprof'
+  static std::string dump_directory_; // Location to write profile dumps
+
+ public:  // Static helpers to make us ignore certain leaks.
+
+  // NOTE: All calls to DisableChecks* affect all later heap profile generation
+  // that happens in our construction and inside of *NoLeaks().
+  // They do nothing when heap leak checking is turned off.
+
+  // CAVEAT: Disabling via all the DisableChecks* functions happens only
+  // up to kMaxStackTrace (see heap-profiler.cc)
+  // stack frames down from the stack frame identified by the function.
+  // Hence, this disabling will stop working for very deep call stacks
+  // and you might see quite wierd leak profile dumps in such cases.
+
+  // Register 'pattern' as another variant of a regular expression to match
+  // function_name, file_name:line_number, or function_address
+  // of function call/return points for which allocations below them should be
+  // ignored during heap leak checking.
+  // (This becomes a part of pprof's '--ignore' argument.)
+  // Usually this should be caled from a REGISTER_HEAPCHECK_CLEANUP
+  // in the source file that is causing the leaks being ignored.
+  // CAVEAT: Disabling via DisableChecksIn works only with non-strip'ped
+  // binaries, but Google's automated unit tests currently run strip'ped.
+  static void DisableChecksIn(const char* pattern);
+
+  // A pair of functions to disable heap checking between them.
+  // For example
+  //    ...
+  //    void* start_address = HeapLeakChecker::GetDisableChecksStart();
+  //    <do things>
+  //    HeapLeakChecker::DisableChecksToHereFrom(start_address);
+  //    ...
+  // will disable heap leak checking for everything that happens
+  // during any execution of <do things> (including any calls from it).
+  // Each such pair of function calls must be from the same function,
+  // because this disabling works by remembering the range of
+  // return addresses for the two calls.
+  static void* GetDisableChecksStart();
+  static void DisableChecksToHereFrom(void* start_address);
+
+  // Register the function call point (address) 'stack_frames' above us for
+  // which allocations below it should be ignored during heap leak checking.
+  // 'stack_frames' must be >= 1 (in most cases one would use the value of 1).
+  // For example
+  //    void Foo() {  // Foo() should not get inlined
+  //      HeapLeakChecker::DisableChecksUp(1);
+  //      <do things>
+  //    }
+  // will disable heap leak checking for everything that happens
+  // during any execution of <do things> (including any calls from it).
+  // CAVEAT: If Foo() is inlined this will disable heap leak checking
+  // under all processing of all functions Foo() is inlined into.
+  // Hence, for potentially inlined functions, use the GetDisableChecksStart,
+  // DisableChecksToHereFrom calls instead.
+  // (In the above example we store and use the return addresses
+  //  from Foo to do the disabling.)
+  static void DisableChecksUp(int stack_frames);
+
+  // Same as DisableChecksUp,
+  // but the function return address is given explicitly.
+  static void DisableChecksAt(void* address);
+
+  // Ignore an object at 'ptr'
+  // (as well as all heap objects (transitively) referenced from it)
+  // for the purposes of heap leak checking.
+  // If 'ptr' does not point to an active allocated object
+  // at the time of this call, it is ignored;
+  // but if it does, the object must not get deleted from the heap later on;
+  // it must also be not already ignored at the time of this call.
+  // CAVEAT: Use one of the DisableChecks* calls instead of this if possible
+  // if you want somewhat easier future heap leak check portability.
+  static void IgnoreObject(void* ptr);
+
+  // CAVEAT: DisableChecks* calls will not help you in such cases
+  // when you disable only e.g. "new vector<int>", but later grow
+  // this vector forcing it to allocate more memory.
+
+  // NOTE: All calls to *IgnoreObject affect only
+  // the overall whole-program heap leak check, not local checks with
+  // explicit HeapLeakChecker objects.
+  // They do nothing when heap leak checking is turned off.
+
+  // Undo what an earlier IgnoreObject() call promised and asked to do.
+  // At the time of this call 'ptr' must point to an active allocated object
+  // that was previously registered with IgnoreObject().
+  static void UnIgnoreObject(void* ptr);
+
+  // NOTE: One of the standard uses of IgnoreObject() and UnIgnoreObject()
+  //       is to ignore thread-specific objects allocated on heap.
+
+ public:  // Initializations; to be called from main() only.
+
+  // Full starting of recommended whole-program checking.  This runs after
+  // HeapChecker::BeforeConstructors and can do initializations which may
+  // depend on configuration parameters set by initialization code.
+  // Valid values of heap_check type are:
+  //  - "minimal"
+  //  - "normal"
+  //  - "strict"
+  //  - "draconian"
+  //  - "local"
+  static void StartFromMain(const std::string& heap_check_type);
+
+ private:  // Various helpers
+
+  // Helper for constructors
+  void Create(const char *name);
+  // Helper for *NoLeaks and *SameHeap
+  bool DoNoLeaks(bool same_heap, bool do_full, bool do_report);
+  // Helper for DisableChecksAt
+  static void DisableChecksAtLocked(void* address);
+  // Helper for DisableChecksIn
+  static void DisableChecksInLocked(const char* pattern);
+  // Helper for DisableChecksToHereFrom
+  static void DisableChecksFromTo(void* start_address,
+                                  void* end_address,
+                                  int max_depth);
+  // Helper for DoNoLeaks to ignore all objects reachable from all live data
+  static void IgnoreAllLiveObjectsLocked();
+  // Helper for IgnoreAllLiveObjectsLocked to ignore all heap objects
+  // reachable from currently considered live objects
+  static void IgnoreLiveObjectsLocked(const char* name, const char* name2);
+  // Preallocates some libc data
+  static void LibCPreallocate();
+  // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially
+  // calls DoMainHeapCheck
+  static void RunHeapCleanups(void);
+  // Do the overall whole-program heap leak check
+  static void DoMainHeapCheck();
+
+  // Type of task for UseProcMaps
+  enum ProcMapsTask { IGNORE_GLOBAL_DATA_LOCKED, DISABLE_LIBRARY_ALLOCS };
+  // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line.
+  static void UseProcMaps(ProcMapsTask proc_maps_task);
+  // A ProcMapsTask to disable allocations from 'library'
+  // that is mapped to [start_address..end_address)
+  // (only if library is a certain system library).
+  static void DisableLibraryAllocs(const char* library,
+                                   uint64 start_address,
+                                   uint64 end_address);
+  // A ProcMapsTask to ignore global data belonging to 'library'
+  // mapped at 'start_address' with 'file_offset'.
+  static void IgnoreGlobalDataLocked(const char* library,
+                                     uint64 start_address,
+                                     uint64 file_offset);
+
+ private:
+
+  // This gets to execute before constructors for all global objects
+  static void BeforeConstructors();
+  friend void HeapLeakChecker_BeforeConstructors();
+  // This gets to execute after destructors for all global objects
+  friend void HeapLeakChecker_AfterDestructors();
+
+ public: // TODO(maxim): make this private and remove 'Kind'
+         //              when all old clients are retired
+
+  // Kind of checker we want to create
+  enum Kind { MAIN, MAIN_DEBUG };
+
+  // Start whole-executable checking
+  // (this is public to support existing deprecated usage).
+  // This starts heap profiler with a good unique name for the dumped profiles.
+  // If kind == MAIN_DEBUG the checking and profiling
+  // happen only in the debug compilation mode.
+  explicit HeapLeakChecker(Kind kind);  // DEPRECATED
+
+ private:
+  DISALLOW_EVIL_CONSTRUCTORS(HeapLeakChecker);
+};
+
+#endif  // BASE_HEAP_CHECKER_H__
diff --git a/src/google/heap-profiler.h b/src/google/heap-profiler.h
new file mode 100644
index 0000000..565428d
--- /dev/null
+++ b/src/google/heap-profiler.h
@@ -0,0 +1,138 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Module for heap-profiling.
+//
+// This module is safe to link into any program you may wish to profile at some
+// point.  It will not cause any noticeable slowdowns unless you activate it at
+// some point in your program.  So, for instance, you can do something like
+// this (using GNU getopt-long extensions):
+//
+// int main (int argc, char **argv) {
+//   static struct option long_options[] = {
+//     {"heap-profile", 1, 0, 0},
+//   };
+//   int option_index = 0;
+//   int c = getopt_long (argc, argv, "", long_options, &option_index);
+//
+//   if (c == 0 && !strcmp(long_options[option_index].name, "heap-profile")) {
+//     HeapProfilerStart(optarg);
+//   }
+//
+//   /* ... */
+// }
+//
+// This allows you to easily profile your program at any time without having to
+// recompile, and doesn't slow things down if you are not profiling.
+//
+// Heap profiles will be written to a sequence of files whose name
+// starts with the supplied prefix.
+//
+// Example:
+//   % bin/programname --heap_profile=foo ...
+//   % ls foo.*
+//      foo.0000.heap
+//      foo.0001.heap
+//      foo.0002.heap
+//      ...
+//
+// If heap-profiling is turned on, a profile file is dumped every GB
+// of allocated data.  You can override this behavior by calling
+// HeapProfilerSetAllocationInterval() to a number of bytes N.  If
+// you do that, a profile file will be dumped after every N bytes of
+// allocations.
+//
+// If heap profiling is on, we also dump a profile when the
+// in-use-bytes reach a new high-water-mark.  Only increases of at
+// least 100MB are considered significant changes in the
+// high-water-mark.  This number can be changed by calling
+// HeapProfilerSetInuseInterval() with a different byte-value.
+//
+// STL WARNING: The HeapProfiler does not accurately track allocations in
+// many STL implementations.  This is because it is common for the default STL
+// allocator to keep an internal pool of memory and nevery return it to the
+// system.  This means that large allocations may be attributed to an object
+// that you know was destroyed.  For a simple example, see
+// TestHeapLeakCheckerSTL in src/tests/heap-checker_unittest.cc.
+//
+// This issue is resolved for GCC 3.3 and 3.4 by setting the environment
+// variable GLIBCXX_FORCE_NEW, which forces the STL allocator to call `new' and
+// `delete' explicitly for every allocation and deallocation.  For GCC 3.2 and
+// previous you will need to compile your source with -D__USE_MALLOC.  For
+// other compilers / STL libraries, there may be a similar solution;  See your
+// implementation's documentation for information.
+
+#ifndef _HEAP_PROFILER_H
+#define _HEAP_PROFILER_H
+
+#include <google/perftools/basictypes.h> // For int64 definition
+#include <stddef.h>
+
+// Start profiling and arrange to write profile data to file names
+// of the form: "prefix.0000", "prefix.0001", ...
+extern void HeapProfilerStart(const char* prefix);
+
+// Stop heap profiling.  Can be restarted again with HeapProfilerStart(),
+// but the currently accumulated profiling information will be cleared.
+extern void HeapProfilerStop();
+
+// Dump a profile now - can be used for dumping at a hopefully
+// quiescent state in your program, in order to more easily track down
+// memory leaks. Will include the reason in the logged message
+extern void HeapProfilerDump(const char *reason);
+
+// Generate current heap profiling information.  The returned pointer
+// is a null-terminated string allocated using malloc() and should be
+// free()-ed as soon as the caller does not need it anymore.
+extern char* GetHeapProfile();
+
+// ---- Configuration accessors ----
+
+// Prefix to which we dump heap profiles.  If empty, we do not dump.  This
+// must be set to your desired value before HeapProfiler::Init() is called.
+// Default: empty
+extern void HeapProfilerSetDumpPath(const char* path);
+
+// Level of logging used by the heap profiler and heap checker (if applicable)
+// Default: 0
+extern void HeapProfilerSetLogLevel(int level);
+
+// Dump heap profiling information once every specified number of bytes
+// allocated by the program.  Default: 1GB
+extern void HeapProfilerSetAllocationInterval(int64 interval);
+
+// Dump heap profiling information whenever the high-water 
+// memory usage mark increases by the specified number of
+// bytes.  Default: 100MB
+extern void HeapProfilerSetInuseInterval(int64 interval);
+
+#endif /* _HEAP_PROFILER_H */
diff --git a/src/google/malloc_hook.h b/src/google/malloc_hook.h
new file mode 100644
index 0000000..4da4593
--- /dev/null
+++ b/src/google/malloc_hook.h
@@ -0,0 +1,127 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Some of our malloc implementations can invoke the following hooks
+// whenever memory is allocated or deallocated.  If the hooks are
+// NULL, they are not invoked.
+//
+// One important user of these hooks is the heap profiler.
+
+#ifndef _GOOGLE_MALLOC_HOOK_H
+#define _GOOGLE_MALLOC_HOOK_H
+
+#include <google/perftools/config.h>
+#include <stddef.h>
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+#include <sys/types.h>
+
+class MallocHook {
+ public:
+  // The NewHook is invoked whenever an object is allocated.
+  // It may be passed NULL if the allocator returned NULL.
+  typedef void (*NewHook)(void* ptr, size_t size);
+  inline static NewHook GetNewHook() { return new_hook_; }
+  inline static NewHook SetNewHook(NewHook hook) {
+    NewHook result = new_hook_;
+    new_hook_ = hook;
+    return result;
+  }
+  inline static void InvokeNewHook(void* p, size_t s) {
+    if (new_hook_ != NULL) (*new_hook_)(p, s);
+  }
+
+  // The DeleteHook is invoked whenever an object is deallocated.
+  // It may be passed NULL if the caller is trying to delete NULL.
+  typedef void (*DeleteHook)(void* ptr);
+  inline static DeleteHook GetDeleteHook() { return delete_hook_; }
+  inline static DeleteHook SetDeleteHook(DeleteHook hook) {
+    DeleteHook result = delete_hook_;
+    delete_hook_ = hook;
+    return result;
+  }
+  inline static void InvokeDeleteHook(void* p) {
+    if (delete_hook_ != NULL) (*delete_hook_)(p);
+  }
+
+  // The MmapHook is invoked whenever a region of memory is mapped.
+  // It may be passed MAP_FAILED if the mmap failed.
+  typedef void (*MmapHook)(void* result, 
+                           void* start,
+                           size_t size,
+                           int protection,
+                           int flags,
+                           int fd,
+                           off_t offset);
+  inline static MmapHook GetMmapHook() { return mmap_hook_; }
+  inline static MmapHook SetMmapHook(MmapHook hook) {
+    MmapHook result = mmap_hook_;
+    mmap_hook_ = hook;
+    return result;
+  }
+  inline static void InvokeMmapHook(void* result,
+                                    void* start,
+                                    size_t size,
+                                    int protection,
+                                    int flags,
+                                    int fd,
+                                    off_t offset) {
+    if (mmap_hook_ != NULL) (*mmap_hook_)(result,
+                                          start, size,
+                                          protection, flags,
+                                          fd, offset);
+  }
+
+  // The MunmapHook is invoked whenever an object is deallocated.
+  typedef void (*MunmapHook)(void* ptr, size_t size);
+  inline static MunmapHook GetMunmapHook() { return munmap_hook_; }
+  inline static MunmapHook SetMunmapHook(MunmapHook hook) {
+    MunmapHook result = munmap_hook_;
+    munmap_hook_ = hook;
+    return result;
+  }
+  inline static void InvokeMunmapHook(void* p, size_t size) {
+    if (munmap_hook_ != NULL) (*munmap_hook_)(p, size);
+  }
+
+ private:
+  static NewHook     new_hook_;
+  static DeleteHook  delete_hook_;
+  static MmapHook    mmap_hook_;
+  static MunmapHook  munmap_hook_;
+
+};
+
+#endif /* _GOOGLE_MALLOC_HOOK_H */
diff --git a/src/google/malloc_interface.h b/src/google/malloc_interface.h
new file mode 100644
index 0000000..a6cfe17
--- /dev/null
+++ b/src/google/malloc_interface.h
@@ -0,0 +1,153 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat <opensource@google.com>
+//
+// Extra interfaces exported by some malloc implementations.  These
+// interfaces are accessed through a virtual base class so an
+// application can link against a malloc that does not implement these
+// interfaces, and it will get default versions that do nothing.
+
+#ifndef _GOOGLE_MALLOC_INTERFACE_H__
+#define _GOOGLE_MALLOC_INTERFACE_H__
+
+#include <google/perftools/config.h>
+#include <stddef.h>
+#include <string>
+
+static const int kMallocHistogramSize = 64;
+
+// The default implementations of the following routines do nothing.
+class MallocInterface {
+ public:
+  virtual ~MallocInterface();
+
+  // See "verify_memory.h" to see what these routines do
+  virtual bool VerifyAllMemory();
+  virtual bool VerifyNewMemory(void* p);
+  virtual bool VerifyArrayNewMemory(void* p);
+  virtual bool VerifyMallocMemory(void* p);
+  virtual bool MallocMemoryStats(int* blocks, size_t* total,
+                                 int histogram[kMallocHistogramSize]);
+
+  // Get a human readable description of the current state of the malloc
+  // data structures.  The state is stored as a null-terminated string
+  // in a prefix of "buffer[0,buffer_length-1]".
+  // REQUIRES: buffer_length > 0.
+  virtual void GetStats(char* buffer, int buffer_length);
+
+  // Get a string that contains a sample of live objects and the stack
+  // traces that allocated these objects.  The format of the returned
+  // string is equivalent to the output of the heap profiler and can
+  // therefore be passed to "pprof".
+  //
+  // The generated data is *appended* to "*result".  I.e., the old
+  // contents of "*result" are preserved.
+  virtual void GetHeapSample(STL_NAMESPACE::string* result);
+
+  // -------------------------------------------------------------------
+  // Control operations for getting and setting malloc implementation
+  // specific parameters.  Some currently useful properties:
+  //
+  // generic
+  // -------
+  // "generic.current_allocated_bytes"
+  //      Number of bytes currently allocated by application
+  //      This property is not writable.
+  //
+  // "generic.heap_size"
+  //      Number of bytes in the heap ==
+  //            current_allocated_bytes +
+  //            fragmentation +
+  //            freed memory regions
+  //      This property is not writable.
+  //
+  // tcmalloc
+  // --------
+  // "tcmalloc.max_total_thread_cache_bytes"
+  //      Upper limit on total number of bytes stored across all
+  //      per-thread caches.  Default: 16MB.
+  // 
+  // "tcmalloc.current_total_thread_cache_bytes"
+  //      Number of bytes used across all thread caches.
+  //      This property is not writable.
+  //
+  // "tcmalloc.slack_bytes"
+  //      Number of bytes allocated from system, but not currently
+  //      in use by malloced objects.  I.e., bytes available for
+  //      allocation without needing more bytes from system.
+  //      This property is not writable.
+  //
+  // TODO: Add more properties as necessary
+  // -------------------------------------------------------------------
+
+  // Get the named "property"'s value.  Returns true if the property
+  // is known.  Returns false if the property is not a valid property
+  // name for the current malloc implementation.
+  // REQUIRES: property != NULL; value != NULL
+  virtual bool GetNumericProperty(const char* property, size_t* value);
+
+  // Set the named "property"'s value.  Returns true if the property
+  // is known and writable.  Returns false if the property is not a
+  // valid property name for the current malloc implementation, or
+  // is not writable.
+  // REQUIRES: property != NULL
+  virtual bool SetNumericProperty(const char* property, size_t value);
+
+  // The current malloc implementation.  Always non-NULL.
+  static MallocInterface* instance();
+
+  // Change the malloc implementation.  Typically called by the
+  // malloc implementation during initialization.
+  static void Register(MallocInterface* implementation);
+
+ protected:
+  // Get a list of stack traces of sampled allocation points.
+  // Returns a pointer to a "new[]-ed" result array.
+  //
+  // The state is stored as a sequence of adjacent entries
+  // in the returned array.  Each entry has the following form:
+  //    uintptr_t count;        // Number of objects with following trace
+  //    uintptr_t size;         // Size of object
+  //    uintptr_t depth;        // Number of PC values in stack trace
+  //    void*     stack[depth]; // PC values that form the stack trace
+  //
+  // The list of entries is terminated by a "count" of 0.
+  //
+  // It is the responsibility of the caller to "delete[]" the returned array.
+  //
+  // May return NULL to indicate no results.
+  //
+  // This is an internal interface.  Callers should use the more
+  // convenient "GetHeapSample(string*)" method defined above.
+  virtual void** ReadStackTraces();
+};
+
+#endif  // _GOOGLE_MALLOC_INTERFACE_H__
diff --git a/src/google/perftools/basictypes.h b/src/google/perftools/basictypes.h
new file mode 100644
index 0000000..ed6af90
--- /dev/null
+++ b/src/google/perftools/basictypes.h
@@ -0,0 +1,97 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef _BASICTYPES_H_
+#define _BASICTYPES_H_
+
+#include <google/perftools/config.h>
+
+// To use this in an autoconf setting, make sure you run the following
+// autoconf macros:
+//    AC_HEADER_STDC              /* for stdint_h and inttypes_h */
+//    AC_CHECK_TYPES([__int64])   /* defined in some windows platforms */
+
+#if defined HAVE_STDINT_H
+#include <stdint.h>             // to get uint16_t (ISO naming madness)
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>           // another place uint16_t might be defined
+#else
+#include <sys/types.h>          // our last best hope
+#endif
+
+// Standard typedefs
+// All Google code is compiled with -funsigned-char to make "char"
+// unsigned.  Google code therefore doesn't need a "uchar" type.
+// TODO(csilvers): how do we make sure unsigned-char works on non-gcc systems?
+typedef signed char         schar;
+typedef int8_t              int8;
+typedef int16_t             int16;
+typedef int32_t             int32;
+#ifdef HAVE___INT64
+typedef __int64             int64;
+#else
+typedef int64_t             int64;
+#endif
+
+// NOTE: unsigned types are DANGEROUS in loops and other arithmetical
+// places.  Use the signed types unless your variable represents a bit
+// pattern (eg a hash value) or you really need the extra bit.  Do NOT
+// use 'unsigned' to express "this value should always be positive";
+// use assertions for this.
+
+typedef uint8_t            uint8;
+typedef uint16_t           uint16;
+typedef uint32_t           uint32;
+#ifdef HAVE___INT64
+typedef unsigned __int64   uint64;
+#else
+typedef uint64_t           uint64;
+#endif
+
+const uint16 kuint16max = (   (uint16) 0xFFFF);
+const uint32 kuint32max = (   (uint32) 0xFFFFFFFF);
+const uint64 kuint64max = ( (((uint64) kuint32max) << 32) | kuint32max );
+
+const  int8  kint8max   = (   (  int8) 0x7F);
+const  int16 kint16max  = (   ( int16) 0x7FFF);
+const  int32 kint32max  = (   ( int32) 0x7FFFFFFF);
+const  int64 kint64max =  ( ((( int64) kint32max) << 32) | kuint32max );
+
+const  int8  kint8min   = (   (  int8) 0x80);
+const  int16 kint16min  = (   ( int16) 0x8000);
+const  int32 kint32min  = (   ( int32) 0x80000000);
+const  int64 kint64min =  ( ((( int64) kint32min) << 32) | 0 );
+
+// A macro to disallow the evil copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define DISALLOW_EVIL_CONSTRUCTORS(TypeName)    \
+  TypeName(const TypeName&);                    \
+  void operator=(const TypeName&)
+
+#endif  // _BASICTYPES_H_
diff --git a/src/google/perftools/config.h.in b/src/google/perftools/config.h.in
new file mode 100644
index 0000000..1ba97f8
--- /dev/null
+++ b/src/google/perftools/config.h.in
@@ -0,0 +1,136 @@
+/* src/google/perftools/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* the namespace of hash_map */
+#undef HASH_NAMESPACE
+
+/* Define to 1 if you have the <conflict-signal.h> header file. */
+#undef HAVE_CONFLICT_SIGNAL_H
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <execinfo.h> header file. */
+#undef HAVE_EXECINFO_H
+
+/* define if the compiler has hash_map */
+#undef HAVE_EXT_HASH_MAP
+
+/* define if the compiler has hash_set */
+#undef HAVE_EXT_HASH_SET
+
+/* Define to 1 if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* define if the compiler has hash_map */
+#undef HAVE_HASH_MAP
+
+/* define if the compiler has hash_set */
+#undef HAVE_HASH_SET
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have a working `mmap' system call. */
+#undef HAVE_MMAP
+
+/* Define to 1 if you have the `munmap' function. */
+#undef HAVE_MUNMAP
+
+/* define if the compiler implements namespaces */
+#undef HAVE_NAMESPACES
+
+/* define if libc has program_invocation_name */
+#undef HAVE_PROGRAM_INVOCATION_NAME
+
+/* Define if you have POSIX threads libraries and header files. */
+#undef HAVE_PTHREAD
+
+/* Define to 1 if you have the `sbrk' function. */
+#undef HAVE_SBRK
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if `eip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_EIP
+
+/* Define to 1 if `sc_eip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_SC_EIP
+
+/* Define to 1 if `sc_ip' is member of `struct sigcontext'. */
+#undef HAVE_STRUCT_SIGCONTEXT_SC_IP
+
+/* Define to 1 if `si_faddr' is member of `struct siginfo'. */
+#undef HAVE_STRUCT_SIGINFO_SI_FADDR
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* define if your compiler has __attribute__ */
+#undef HAVE___ATTRIBUTE__
+
+/* Define to 1 if `uc_mcontext' is member of `# for the cpu-profiler struct
+   ucontext'. */
+#undef HAVE___FOR_THE_CPU_PROFILER___________________STRUCT_UCONTEXT_UC_MCONTEXT
+
+/* Define to 1 if the system has the type `__int64'. */
+#undef HAVE___INT64
+
+/* prefix where we look for installed files */
+#undef INSTALL_PREFIX
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* printf format code for printing a size_t */
+#undef PRIuS
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+#undef PTHREAD_CREATE_JOINABLE
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* the namespace where STL code like vector<> is defined */
+#undef STL_NAMESPACE
+
+/* Version number of package */
+#undef VERSION
+
+/* Define as `__inline' if that's what the C compiler calls it, or to nothing
+   if it is not supported. */
+#undef inline
diff --git a/src/google/profiler.h b/src/google/profiler.h
new file mode 100644
index 0000000..249d7be
--- /dev/null
+++ b/src/google/profiler.h
@@ -0,0 +1,106 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Module for CPU profiling based on periodic pc-sampling.
+//
+// To use this module, link it into your program.  To activate it
+// at runtime, set the environment variable "CPUPROFILE" to be the
+// name of the file in which the profile data should be written.
+// (If you don't set the environment variable, no profiling will
+// happen, and the program should run without any slowdowns.)
+//
+// Once you have done this, there are two ways to determine which
+// region(s) of code should be profiled:
+//
+// 1. If you set the "PROFILESELECTED" environment variable,
+//    only regions of code that are surrounded with "ProfilerEnable()"
+//    and "ProfilerDisable()" will be profiled.
+// 2. Otherwise, the main thread, and any thread that has had 
+//    ProfilerRegisterThread() called on it, will be profiled.
+//
+// Use pprof to view the resulting profile output.  If you have dot and
+// gv installed, you can also get a graphical representation of CPU usage.
+//    % pprof <path_to_executable> <profile_file_name>
+//    % pprof --dot <path_to_executable> <profile_file_name>
+//    % pprof --gv  <path_to_executable> <profile_file_name>
+
+#ifndef _GOOGLE_PROFILER_H
+#define _GOOGLE_PROFILER_H
+
+// Start profiling and write profile info into fname.
+extern bool ProfilerStart(const char* fname);
+
+// Stop profiling. Can be started again with ProfilerStart(), but
+// the currently accumulated profiling data will be cleared.
+extern void ProfilerStop();
+
+
+// These functions have no effect if profiling has not been activated
+// globally (by specifying the "CPUPROFILE" environment variable or by
+// calling ProfilerStart() ).
+
+// Profile in the given thread.  This is most usefully called when a
+// new thread is first entered.  Note this may not work if
+// PROFILESELECTED is set.
+extern void ProfilerRegisterThread();
+
+// Turn profiling on and off, if PROFILESELECTED has been called.
+extern void ProfilerEnable();
+extern void ProfilerDisable();
+
+// Write out the current profile information to disk.
+extern void ProfilerFlush();
+
+// ------------------------- ProfilerThreadState -----------------------
+// A small helper class that allows a thread to periodically check if
+// profiling has been enabled or disabled, and to react appropriately
+// to ensure that activity in the current thread is included in the
+// profile.  Usage:
+//
+//  ProfileThreadState profile_state;
+//  while (true) {
+//    ... do some thread work ...
+//    profile_state.ThreadCheck();
+//  }
+class ProfilerThreadState {
+public:
+  ProfilerThreadState();
+
+  // Called in a thread to enable or disable profiling on the thread
+  // based on whether profiling is currently on or off.
+  void ThreadCheck();
+
+private:
+  bool          was_enabled_;   // True if profiling was on in our last call
+};
+
+#endif /* _GOOGLE_PROFILER_H */
diff --git a/src/google/stacktrace.h b/src/google/stacktrace.h
new file mode 100644
index 0000000..6132d55
--- /dev/null
+++ b/src/google/stacktrace.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Routine to extract the current stack trace.
+
+#ifndef _GOOGLE_STACKTRACE_H
+#define _GOOGLE_STACKTRACE_H
+
+extern int GetStackTrace(void** result, int max_depth, int skip_count);
+// Skip the most recent "skip_count" stack frames (also skips the
+// frame generated for the "GetStackTrace" routine itself), and then
+// record the pc values for upto the next "max_depth" frames in
+// "result".  Returns the number of values recorded in "result".
+//
+// Example:
+//      main() { foo(); }
+//      foo() { bar(); }
+//      bar() {
+//        void* result[10];
+//        int depth = GetStackTrace(result, 10, 1);
+//      }
+//
+// The GetStackTrace call will skip the frame for "bar".  It will
+// return 2 and will produce pc values that map to the following
+// procedures:
+//      result[0]       foo
+//      result[1]       main
+// (Actually, there may be a few more entries after "main" to account for
+// startup procedures.)
+//
+// This routine currently produces non-empty stack traces only for
+// Linux/x86 machines.
+
+#endif /* _GOOGLE_STACKTRACE_H */
diff --git a/src/heap-checker-bcad.cc b/src/heap-checker-bcad.cc
new file mode 100644
index 0000000..878bc8f
--- /dev/null
+++ b/src/heap-checker-bcad.cc
@@ -0,0 +1,76 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// All Rights Reserved.
+//
+// Author: Maxim Lifantsev
+//
+// A file to ensure that components of heap leak checker run
+// before all global object constructors
+// and after all global object destructors.
+//
+// This file must be the last google library any google binary links against
+// (we achieve this by making //base:base depend
+//  on //base:heap-checker-bcad, the library containing this .cc)
+//
+
+#include <stdlib.h>      // for abort()
+
+// A dummy variable to refer from heap-checker.cc.
+// This is to make sure this file is not optimized out by the linker.
+bool heap_leak_checker_bcad_variable;
+
+extern void HeapLeakChecker_BeforeConstructors();  // in heap-checker.cc
+extern void HeapLeakChecker_AfterDestructors();  // in heap-checker.cc
+
+// A helper class to ensure that some components of heap leak checking
+// can happen before construction and after destruction
+// of all global/static objects.
+class HeapLeakCheckerGlobalPrePost {
+ public:
+  HeapLeakCheckerGlobalPrePost() {
+    if (count_ == 0)  HeapLeakChecker_BeforeConstructors();
+    ++count_;
+  }
+  ~HeapLeakCheckerGlobalPrePost() {
+    if (count_ <= 0)  abort();
+    --count_;
+    if (count_ == 0)  HeapLeakChecker_AfterDestructors();
+  }
+ private:
+  // Counter of constructions/destructions of objects of this class
+  // (just in case there are more than one of them).
+  static int count_;
+};
+
+int HeapLeakCheckerGlobalPrePost::count_ = 0;
+
+// The early-construction/late-destruction global object.
+static const HeapLeakCheckerGlobalPrePost heap_leak_checker_global_pre_post;
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
new file mode 100644
index 0000000..60701d3
--- /dev/null
+++ b/src/heap-checker.cc
@@ -0,0 +1,1153 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// All Rights Reserved.
+//
+// Author: Maxim Lifantsev
+//
+
+// NOTE: We almost never use CHECK and LOG in this module
+//       because we might be running before/after the logging susbystem
+//       is set up correctly.
+
+#include "google/perftools/config.h"
+
+#include <string>
+#include <vector>
+#include <map>
+#include <google/perftools/hash_set.h>
+#include <algorithm>
+
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <netinet/in.h>         // inet_ntoa
+#include <arpa/inet.h>          // inet_ntoa
+#include <execinfo.h>           // backtrace
+#include <sys/poll.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <assert.h>
+
+#include <google/stacktrace.h>
+#include <google/heap-profiler.h>
+#include <google/heap-checker.h>
+#include "heap-profiler-inl.h"
+
+#include "base/commandlineflags.h"
+#include "base/logging.h"
+
+#ifdef HAVE_INTTYPES_H
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+// TODO: have both SCNd64 and PRId64.  We don't bother since they're the same
+#define LLX    "%"SCNx64               // how to read 64-bit hex
+#define LLD    "%"SCNd64               // how to read 64-bit deciman
+#else
+#define LLX    "%llx"                  // hope for the best
+#define LLD    "%lld"
+#endif
+
+using std::string;
+using std::map;
+using std::vector;
+using std::swap;
+using std::make_pair;
+using std::min;
+using std::max;
+using HASH_NAMESPACE::hash_set;
+
+//----------------------------------------------------------------------
+// Flags that control heap-checking
+//----------------------------------------------------------------------
+
+DEFINE_bool(heap_check_report, true,
+            "If overall heap check reports the found leaks via pprof");
+
+// These are not so much flags as internal configuration parameters that
+// are set based on the argument to StartFromMain().
+DEFINE_bool(heap_check_before_constructors, true,
+            "deprecated; pretty much always true now");
+
+DEFINE_bool(heap_check_after_destructors, false,
+            "If overall heap check is to end after global destructors "
+            "or right after all REGISTER_HEAPCHECK_CLEANUP's");
+
+DEFINE_bool(heap_check_strict_check, true,
+            "If overall heap check is to be done "
+            "via HeapLeakChecker::*SameHeap "
+            "or HeapLeakChecker::*NoLeaks call");
+            // heap_check_strict_check == false
+            // is useful only when heap_check_before_constructors == false
+
+DEFINE_bool(heap_check_ignore_told_live, true,
+            "If overall heap check is to ignore heap objects reachable "
+            "from what was given to HeapLeakChecker::IgnoreObject");
+
+DEFINE_bool(heap_check_ignore_global_live, true,
+            "If overall heap check is to ignore heap objects reachable "
+            "from the global data");
+
+DEFINE_bool(heap_check_ignore_thread_live, true,
+            "If set to true, objects reachable from thread stacks "
+            "are not reported as leaks");
+
+DEFINE_string(heap_profile_pprof, INSTALL_PREFIX "/bin/pprof",
+              "Path to pprof to call for full leaks checking.");
+
+// External accessors for the above
+void HeapLeakChecker::set_heap_check_report(bool b) {
+  FLAGS_heap_check_report = b;
+}
+void HeapLeakChecker::set_pprof_path(const char* s) {
+  FLAGS_heap_profile_pprof = s;
+}
+void HeapLeakChecker::set_dump_directory(const char* s) {
+  dump_directory_ = s;
+}
+
+bool HeapLeakChecker::heap_check_report() {
+  return FLAGS_heap_check_report;
+}
+const char* HeapLeakChecker::pprof_path() {
+  return FLAGS_heap_profile_pprof.c_str();
+}
+const char* HeapLeakChecker::dump_directory() {
+  return dump_directory_.c_str();
+}
+
+//----------------------------------------------------------------------
+
+DECLARE_string(heap_profile);    // in heap-profiler.cc
+DECLARE_int32(heap_profile_log); // in heap-profiler.cc
+
+//----------------------------------------------------------------------
+// HeapLeakChecker global data
+//----------------------------------------------------------------------
+
+// Global lock for the global data of this module
+static pthread_mutex_t hc_lock = PTHREAD_MUTEX_INITIALIZER;
+
+// the disabled regexp accumulated
+// via HeapLeakChecker::DisableChecksIn
+static string* disabled_regexp = NULL;
+
+//----------------------------------------------------------------------
+
+// whole-program heap leak checker
+static HeapLeakChecker* main_heap_checker = NULL;
+// if we are doing (or going to do) any kind of heap-checking
+// heap_checker_on == true implies HeapProfiler::is_on_ == true
+static bool heap_checker_on = false;
+// pid of the process that does whole-program heap leak checking
+static pid_t heap_checker_pid = 0;
+
+// if we did heap profiling during global constructors execution
+static bool constructor_heap_profiling = false;
+
+//----------------------------------------------------------------------
+// HeapLeakChecker live object tracking components
+//----------------------------------------------------------------------
+
+// Cases of live object placement we distinguish
+enum ObjectPlacement {
+  MUST_BE_ON_HEAP,  // Must point to a live object of the matching size in the
+                    // map of the heap in HeapProfiler when we get to it.
+  WAS_ON_HEAP,      // Is a live object on heap, but now deleted from
+                    // the map of the heap objects in HeapProfiler.
+  IN_GLOBAL_DATA,   // Is part of global data region of the executable.
+  THREAD_STACK,     // Part of a thread stack
+};
+
+// Information about an allocated object
+struct AllocObject {
+  void* ptr;              // the object
+  uintptr_t size;         // its size
+  ObjectPlacement place;  // where ptr points to
+
+  AllocObject(void* p, size_t s, ObjectPlacement l)
+    : ptr(p), size(s), place(l) { }
+};
+
+// All objects (memory ranges) ignored via HeapLeakChecker::IgnoreObject
+// Key is the object's address; value is its size.
+typedef map<uintptr_t, size_t> IgnoredObjectsMap;
+static IgnoredObjectsMap* ignored_objects = NULL;
+
+// All objects (memory ranges) that we consider to be the sources of pointers
+// to live (not leaked) objects.
+// At different times this holds (what can be reached from) global data regions
+// and the objects we've been told to ignore.
+// For any AllocObject::ptr "live_objects" is supposed to contain at most one
+// record at any time. We maintain this by checking with HeapProfiler's map
+// of the heap and removing the live heap objects we've handled from it.
+// This vector is maintained as a stack and the frontier of reachable
+// live heap objects in our flood traversal of them.
+typedef vector<AllocObject> LiveObjectsStack;
+static LiveObjectsStack* live_objects = NULL;
+
+// This variable is set to non-NULL by thread/thread.cc if it has
+// threads whose stacks have to be scanned.
+typedef void (*StackRangeIterator)(void*, void*);
+int (*heap_checker_thread_stack_extractor)(StackRangeIterator) = NULL;
+
+
+// This routine is called by thread code for every thread stack it knows about
+static void RegisterStackRange(void* base, void* top) {
+  char* p1 = min(reinterpret_cast<char*>(base), reinterpret_cast<char*>(top));
+  char* p2 = max(reinterpret_cast<char*>(base), reinterpret_cast<char*>(top));
+  HeapProfiler::MESSAGE(1, "HeapChecker: Thread stack %p..%p (%d bytes)\n",
+                        p1, p2, int(p2-p1));
+  live_objects->push_back(AllocObject(p1, uintptr_t(p2-p1), THREAD_STACK));
+}
+
+static int GetStatusOutput(const char*  command, string* output) {
+  FILE* f = popen(command, "r");
+  if (f == NULL) {
+    fprintf(stderr, "popen returned NULL!!!\n");  // This shouldn't happen
+    exit(1);
+  }
+
+  const int kMaxOutputLine = 10000;
+  char line[kMaxOutputLine];
+  while (fgets(line, sizeof(line), f) != NULL) {
+    if (output)
+      *output += line;
+  }
+
+  return pclose(f);
+}
+
+void HeapLeakChecker::IgnoreGlobalDataLocked(const char* library,
+                                             uint64 start_address,
+                                             uint64 file_offset) {
+  HeapProfiler::MESSAGE(2, "HeapChecker: Looking into %s\n", library);
+  string command("/usr/bin/objdump -hw ");
+  command.append(library);
+  string output;
+  if (GetStatusOutput(command.c_str(), &output) != 0) {
+    HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                          "Failed executing %s\n", command.c_str());
+    abort();
+  }
+  const char* output_start = output.c_str();
+
+  if (FLAGS_heap_profile_log >= 5) {
+    HeapProfiler::MESSAGE(5, "HeapChecker: Looking at objdump\n");
+    write(STDERR_FILENO, output.data(), output.size());
+  }
+
+  while (1) {
+    char sec_name[11];
+    uint64 sec_size, sec_vmaddr, sec_lmaddr, sec_offset;
+    if (sscanf(output_start, "%*d .%10s "LLX" "LLX" "LLX" "LLX" ",
+               sec_name, &sec_size, &sec_vmaddr,
+               &sec_lmaddr, &sec_offset) == 5) {
+      if (strcmp(sec_name, "data") == 0 ||
+          strcmp(sec_name, "bss") == 0) {
+        uint64 real_start = start_address + sec_offset - file_offset;
+        HeapProfiler::MESSAGE(4, "HeapChecker: "
+                              "Got section %s: %p of "LLX" bytes\n",
+                              sec_name,
+                              reinterpret_cast<void*>(real_start),
+                              sec_size);
+        live_objects->push_back(AllocObject(reinterpret_cast<void*>(real_start),
+                                            sec_size, IN_GLOBAL_DATA));
+      }
+    }
+    // skip to the next line
+    const char* next = strpbrk(output_start, "\n\r");
+    if (next == NULL) break;
+    output_start = next + 1;
+  }
+  IgnoreLiveObjectsLocked("in globals of\n  ", library);
+}
+
+// See if 'library' from /proc/self/maps has base name 'library_base'
+// i.e. contains it and has '.' or '-' after it.
+static bool IsLibraryNamed(const char* library, const char* library_base) {
+  const char* p = strstr(library, library_base);
+  size_t sz = strlen(library_base);
+  return p != NULL  &&  (p[sz] == '.'  ||  p[sz] == '-');
+}
+
+void HeapLeakChecker::DisableLibraryAllocs(const char* library,
+                                           uint64 start_address,
+                                           uint64 end_address) {
+  // TODO(maxim): maybe this should be extended to also use objdump
+  //              and pick the text portion of the library more precisely.
+  if (IsLibraryNamed(library, "/libpthread")  ||
+        // pthread has a lot of small "system" leaks we don't care about
+      IsLibraryNamed(library, "/libdl")  ||
+        // library loaders leak some "system" heap that we don't care about
+      IsLibraryNamed(library, "/ld")) {
+    HeapProfiler::MESSAGE(1, "HeapChecker: "
+                          "Disabling direct allocations from %s :\n",
+                          library);
+    DisableChecksFromTo(reinterpret_cast<void*>(start_address),
+                        reinterpret_cast<void*>(end_address),
+                        1);  // only disable allocation calls directly
+                             // from the library code
+  }
+}
+
+void HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
+  FILE* const fp = fopen("/proc/self/maps", "r");
+  char proc_map_line[1024];
+  while (fgets(proc_map_line, sizeof(proc_map_line), fp) != NULL) {
+    // All lines starting like
+    // "401dc000-4030f000 r??p 00132000 03:01 13991972  lib/bin"
+    // identify a data and code sections of a shared library or our binary
+    uint64 start_address, end_address, file_offset, inode;
+    int size;
+    char permissions[5];
+    if (sscanf(proc_map_line, LLX"-"LLX" %4s "LLX" %*x:%*x "LLD" %n",
+               &start_address, &end_address, permissions,
+               &file_offset, &inode, &size) != 5) continue;
+    proc_map_line[strlen(proc_map_line) - 1] = '\0';  // zap the newline
+    HeapProfiler::MESSAGE(4, "HeapChecker: "
+                          "Looking at /proc/self/maps line:\n  %s\n",
+                          proc_map_line);
+    if (proc_maps_task == DISABLE_LIBRARY_ALLOCS  &&
+        strncmp(permissions, "r-xp", 4) == 0  &&  inode != 0) {
+      if (start_address >= end_address)  abort();
+      DisableLibraryAllocs(proc_map_line + size,
+                           start_address, end_address);
+    }
+    if (proc_maps_task == IGNORE_GLOBAL_DATA_LOCKED  && 
+        // grhat based on Red Hat Linux 9
+        (strncmp(permissions, "rw-p", 4) == 0 ||
+         // Fedora Core 3
+         strncmp(permissions, "rwxp", 4) == 0) &&
+        inode != 0) {
+      if (start_address >= end_address)  abort();
+      IgnoreGlobalDataLocked(proc_map_line + size, start_address, file_offset);
+    }
+  }
+  fclose(fp);
+}
+
+// Total number and size of live objects dropped from the profile.
+static int64 live_objects_total = 0;
+static int64 live_bytes_total = 0;
+
+// This pointer needs to be outside, rather than inside, the function
+// HeapLeakChecker::IgnoreAllLiveObjectsLocked() so that the compiler, in
+// this case gcc 3.4.1, does not complain that it is an unused variable.
+// Nevertheless, the value's not actually used elsewhere, just retained.
+static IgnoredObjectsMap* reach_ignored_objects = NULL;
+
+void HeapLeakChecker::IgnoreAllLiveObjectsLocked() {
+  // the leaks of building live_objects below are ignored in our caller
+  CHECK(live_objects == NULL);
+  live_objects = new LiveObjectsStack;
+  if (FLAGS_heap_check_ignore_thread_live &&
+      (heap_checker_thread_stack_extractor != NULL)) {
+    int drop = (*heap_checker_thread_stack_extractor)(&RegisterStackRange);
+    if (drop > 0) {
+      HeapProfiler::MESSAGE(0, "HeapChecker: Thread stacks not found "
+                            "for %d threads; may get false leak reports\n",
+                            drop);
+    }
+  }
+  if (FLAGS_heap_check_ignore_told_live && ignored_objects) {
+    HeapProfiler::AllocValue alloc_value;
+    for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin();
+         object != ignored_objects->end(); ++object) {
+      void* ptr = reinterpret_cast<void*>(object->first);
+      live_objects->
+        push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP));
+      // we do this liveness check for ignored_objects before doing any
+      // live heap walking to make sure it does not fail needlessly:
+      bool have_on_heap =
+        HeapProfiler::HaveOnHeapLocked(&ptr, &alloc_value);
+      if (!(have_on_heap  &&  object->second == alloc_value.bytes)) {
+        HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                              "%p of %"PRIuS" bytes "
+                              "from an IgnoreObject() disappeared\n",
+                              ptr, object->second);
+        abort();
+      }
+    }
+    IgnoreLiveObjectsLocked("ignored", "");
+  }
+  // Just a pointer for reachability of ignored_objects;
+  // we can't delete them here because the deletions won't be recorded
+  // by profiler, whereas the allocations might have been.
+  reach_ignored_objects = ignored_objects;
+  ignored_objects = NULL;
+  if (FLAGS_heap_check_ignore_global_live) {
+    UseProcMaps(IGNORE_GLOBAL_DATA_LOCKED);
+  }
+  if (live_objects_total) {
+    HeapProfiler::MESSAGE(0, "HeapChecker: "
+                          "Not reporting "LLD" reachable "
+                          "objects of "LLD" bytes\n",
+                          live_objects_total, live_bytes_total);
+  }
+  // Free these: we made them here and heap profiler never saw them
+  delete live_objects;
+  live_objects = NULL;
+}
+
+// This function irreparably changes HeapProfiler's state by dropping from it
+// the objects we consider live here.
+// But we don't care, since it is called only at program exit.
+void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name,
+                                              const char* name2) {
+  int64 live_object_count = 0;
+  int64 live_byte_count = 0;
+  while (!live_objects->empty()) {
+    void* object = live_objects->back().ptr;
+    size_t size = live_objects->back().size;
+    const ObjectPlacement place = live_objects->back().place;
+    live_objects->pop_back();
+    HeapProfiler::AllocValue alloc_value;
+    if (place == MUST_BE_ON_HEAP  &&
+        HeapProfiler::HaveOnHeapLocked(&object, &alloc_value)) {
+      HeapProfiler::RecordFreeLocked(object);  // drop it from the profile
+      live_object_count += 1;
+      live_byte_count += size;
+    }
+    HeapProfiler::MESSAGE(5, "HeapChecker: "
+                          "Looking for heap pointers "
+                          "in %p of %"PRIuS" bytes\n", object, size);
+    // Try interpretting any byte sequence in object,size as a heap pointer
+    const size_t alignment = sizeof(void*);
+      // alignment at which we should consider pointer positions here
+      // use 1 if any alignment is ok
+    const size_t remainder = reinterpret_cast<uintptr_t>(object) % alignment;
+    if (remainder) {
+      reinterpret_cast<char*&>(object) += alignment - remainder;
+      if (size >= alignment - remainder) {
+        size -= alignment - remainder;
+      } else {
+        size = 0;
+      }
+    }
+    while (size >= sizeof(void*)) {
+// TODO(jandrews): Make this part of the configure script.
+#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+      void* ptr = reinterpret_cast<void*>(UNALIGNED_LOAD32(object));
+      void* current_object = object;
+      reinterpret_cast<char*&>(object) += alignment;
+      size -= alignment;
+      HeapProfiler::MESSAGE(6, "HeapChecker: "
+                            "Trying pointer to %p at %p\n",
+                            ptr, current_object);
+      // Do not need the following since the data for live_objects
+      // is not recorded by heap-profiler:
+      // if (ptr == live_objects)  continue;
+      if (HeapProfiler::HaveOnHeapLocked(&ptr, &alloc_value)) {
+        // We take the (hopefully low) risk here of encountering by accident
+        // a byte sequence in memory that matches an address of
+        // a heap object which is in fact leaked.
+        // I.e. in very rare and probably not repeatable/lasting cases
+        // we might miss some real heap memory leaks.
+        HeapProfiler::MESSAGE(5, "HeapChecker: "
+                              "Found pointer to %p"
+                              " of %"PRIuS" bytes at %p\n",
+                              ptr, alloc_value.bytes, current_object);
+        HeapProfiler::RecordFreeLocked(ptr);  // drop it from the profile
+        live_object_count += 1;
+        live_byte_count += alloc_value.bytes;
+        live_objects->push_back(AllocObject(ptr, alloc_value.bytes,
+                                            WAS_ON_HEAP));
+      }
+    }
+  }
+  live_objects_total += live_object_count;
+  live_bytes_total += live_byte_count;
+  if (live_object_count) {
+    HeapProfiler::MESSAGE(1, "HeapChecker: "
+                          "Removed "LLD" live heap objects"
+                          " of "LLD" bytes: %s%s\n",
+                          live_object_count, live_byte_count, name, name2);
+  }
+}
+
+//----------------------------------------------------------------------
+// HeapLeakChecker leak check disabling components
+//----------------------------------------------------------------------
+
+void HeapLeakChecker::DisableChecksUp(int stack_frames) {
+  if (!heap_checker_on) return;
+  if (stack_frames < 1)  abort();
+  void* stack[1];
+  if (GetStackTrace(stack, 1, stack_frames) != 1)  abort();
+  DisableChecksAt(stack[0]);
+}
+
+void HeapLeakChecker::DisableChecksAt(void* address) {
+  if (!heap_checker_on) return;
+  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+  DisableChecksAtLocked(address);
+  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+}
+
+void HeapLeakChecker::DisableChecksIn(const char* pattern) {
+  if (!heap_checker_on) return;
+  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+  DisableChecksInLocked(pattern);
+  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+}
+
+void* HeapLeakChecker::GetDisableChecksStart() {
+  if (!heap_checker_on) return NULL;
+  void* start_address;
+  if (GetStackTrace(&start_address, 1, 0) != 1)  abort();
+  return start_address;
+}
+
+void HeapLeakChecker::DisableChecksToHereFrom(void* start_address) {
+  if (!heap_checker_on) return;
+  void* end_address;
+  if (GetStackTrace(&end_address, 1, 0) != 1)  abort();
+  if (start_address > end_address)  swap(start_address, end_address);
+  DisableChecksFromTo(start_address, end_address,
+                      10000);  // practically no stack depth limit:
+                               // heap profiler keeps much shorter stack traces
+}
+
+void HeapLeakChecker::IgnoreObject(void* ptr) {
+  if (!heap_checker_on) return;
+  HeapProfiler::AllocValue alloc_value;
+  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+  if (HeapProfiler::HaveOnHeap(&ptr, &alloc_value)) {
+    HeapProfiler::MESSAGE(1, "HeapChecker: "
+                          "Going to ignore live object "
+                          "at %p of %"PRIuS" bytes\n",
+                          ptr, alloc_value.bytes);
+    if (ignored_objects == NULL)  ignored_objects = new IgnoredObjectsMap;
+    if (!ignored_objects->insert(make_pair(reinterpret_cast<uintptr_t>(ptr),
+                                           alloc_value.bytes)).second) {
+      HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                            "%p is already being ignored\n", ptr);
+      abort();
+    }
+  }
+  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+}
+
+void HeapLeakChecker::UnIgnoreObject(void* ptr) {
+  if (!heap_checker_on) return;
+  HeapProfiler::AllocValue alloc_value;
+  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+  bool ok = HeapProfiler::HaveOnHeap(&ptr, &alloc_value);
+  if (ok) {
+    ok = false;
+    if (ignored_objects) {
+      IgnoredObjectsMap::iterator object =
+        ignored_objects->find(reinterpret_cast<uintptr_t>(ptr));
+      if (object != ignored_objects->end()  &&
+          alloc_value.bytes == object->second) {
+        ignored_objects->erase(object);
+        ok = true;
+        HeapProfiler::MESSAGE(1, "HeapChecker: "
+                              "Now not going to ignore live object "
+                              "at %p of %"PRIuS" bytes\n",
+                              ptr, alloc_value.bytes);
+      }
+    }
+  }
+  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+  if (!ok) {
+    HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                          "%p has not been ignored\n", ptr);
+    abort();
+  }
+}
+
+//----------------------------------------------------------------------
+// HeapLeakChecker non-static functions
+//----------------------------------------------------------------------
+
+void HeapLeakChecker::Create(const char *name) {
+  name_ = NULL;
+  if (!HeapProfiler::is_on_) return;  // fast escape
+  name_length_ = strlen(name);
+  char* n = new char[name_length_ + 4 + 1];
+  // Heap activity in other threads is paused for this whole function.
+  HeapProfiler::Lock();
+  if (HeapProfiler::is_on_  &&  HeapProfiler::filename_prefix_) {
+    if (!heap_checker_on) {
+      HeapProfiler::MESSAGE(0, "HeapChecker: "
+                            "Checking was not activated via "
+                            "the heap_check command line flag. "
+                            "You might hence get more false leak reports!\n");
+      heap_checker_on = true;
+    }
+    assert(!HeapProfiler::dumping_);  // not called from dumping code
+    assert(strchr(name, '/') == NULL);  // must be a simple name
+    name_ = n;
+    memcpy(name_, name, name_length_);
+    memcpy(name_ + name_length_, "-beg", 4 + 1);
+    // To make the profile let our thread work with the heap
+    // without profiling this while we hold the lock.
+    assert(!HeapProfiler::temp_disable_);
+    HeapProfiler::temp_disabled_tid_ = pthread_self();
+    HeapProfiler::temp_disable_ = true;
+    HeapProfiler::dump_for_leaks_ = true;
+    HeapProfiler::DumpLocked("leak check start", name_);
+    HeapProfiler::dump_for_leaks_ = false;
+    HeapProfiler::temp_disable_ = false;
+    start_inuse_bytes_ = HeapProfiler::profile_.alloc_size_ -
+                         HeapProfiler::profile_.free_size_;
+    start_inuse_allocs_ = HeapProfiler::profile_.allocs_ -
+                          HeapProfiler::profile_.frees_;
+  } else {
+    HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                          "Heap profiler is not active, "
+                          "hence checker \"%s\" will do nothing!\n", name);
+  }
+  HeapProfiler::Unlock();
+  if (name_ == NULL)  delete[] n;
+}
+
+HeapLeakChecker::HeapLeakChecker(const char *name) {
+  assert(strcmp(name, "_main_") != 0);  // reserved
+  Create(name);
+}
+
+DECLARE_int64(heap_profile_allocation_interval);
+DECLARE_int64(heap_profile_inuse_interval);
+
+// Save pid of main thread for using in naming dump files
+int32 HeapLeakChecker::main_thread_pid_ = getpid();
+// Directory in which to dump profiles
+string HeapLeakChecker::dump_directory_ = "/tmp";
+#ifdef HAVE_PROGRAM_INVOCATION_NAME
+extern char* program_invocation_name;
+extern char* program_invocation_short_name;
+const char* HeapLeakChecker::invocation_name_ = program_invocation_short_name;
+const char* HeapLeakChecker::invocation_path_ = program_invocation_name;
+#else
+const char* HeapLeakChecker::invocation_name_ = "heap-checker";
+const char* HeapLeakChecker::invocation_path_ = "heap-checker";  // I guess?
+#endif
+
+HeapLeakChecker::HeapLeakChecker(Kind kind) {
+  if (!(kind == MAIN  ||  kind == MAIN_DEBUG))  abort();
+  bool start = true;
+  if (kind == MAIN_DEBUG)  start = false;
+  if (start) {
+    if (FLAGS_heap_profile.empty()) {
+      // doing just leaks checking: no periodic dumps
+      FLAGS_heap_profile_allocation_interval = kint64max;
+      FLAGS_heap_profile_inuse_interval = kint64max;
+    }
+    char pid_buf[15];
+    snprintf(pid_buf, sizeof(pid_buf), ".%d", main_thread_pid_);
+    HeapProfilerStart((dump_directory_ + "/" +
+                       invocation_name_ +
+                       pid_buf).c_str());
+  }
+  Create("_main_");
+}
+
+// Copy of FLAGS_heap_profile_pprof.
+// Need this since DoNoLeaks can happen
+// after FLAGS_heap_profile_pprof is destroyed.
+static string* flags_heap_profile_pprof = &FLAGS_heap_profile_pprof;
+
+// CAVEAT: Threads, liveness, and heap leak check:
+// It might be possible for to have a race leak condition
+// for a whole-program leak check due to heap activity in other threads
+// when HeapLeakChecker::DoNoLeaks is called at program's exit.
+// It can occur if after allocating a heap object a thread does not
+// quickly make the object reachable from some global/static variable
+// or from the thread's own stack variable.
+// Good news is that the only way to achieve this for a thread seems to be
+// to keep the only pointer to an allocated object in a CPU register
+// (i.e. in particular not call any other functions).
+// Probably thread context switching and thread stack boundary
+// acquisition via heap_checker_thread_stack_extractor
+// do not make the above in-CPU-pointer scenario possible.
+
+bool HeapLeakChecker::DoNoLeaks(bool same_heap,
+                                bool do_full,
+                                bool do_report) {
+  // Heap activity in other threads is paused for this function
+  // until we got all profile difference info.
+  HeapProfiler::Lock();
+  if (HeapProfiler::is_on_  &&  this == main_heap_checker) {
+    // We do this only for the main atexit check
+    // not to distort the heap profile in the other cases.
+    if (FLAGS_heap_check_ignore_told_live  ||
+        FLAGS_heap_check_ignore_thread_live  ||
+        FLAGS_heap_check_ignore_global_live) {
+      // Give other threads some time (just in case)
+      // to make live-reachable the objects that they just allocated
+      // before we got the HeapProfiler's lock:
+      poll(NULL, 0, 100);
+      if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+      assert(!HeapProfiler::temp_disable_);
+      HeapProfiler::temp_disabled_tid_ = pthread_self();
+      HeapProfiler::temp_disable_ = true;
+      // For this call we are free to call new/delete from this thread:
+      // heap profiler will ignore them without acquiring its lock:
+      IgnoreAllLiveObjectsLocked();
+      HeapProfiler::temp_disable_ = false;
+      if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+    }
+  }
+  assert(!HeapProfiler::dumping_);  // not called from dumping code
+  if (HeapProfiler::is_on_  &&  HeapProfiler::filename_prefix_) {
+    if (name_ == NULL) {
+      HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                            "*NoLeaks|SameHeap must be called only once"
+                            " and profiling must be not turned on "
+                            "after construction of a HeapLeakChecker\n");
+      abort();
+    }
+    memcpy(name_ + name_length_, "-end", 4 + 1);
+    // To make the profile let our thread work with the heap
+    // without profiling this while we hold the lock.
+    assert(!HeapProfiler::temp_disable_);
+    HeapProfiler::temp_disabled_tid_ = pthread_self();
+    HeapProfiler::temp_disable_ = true;
+    HeapProfiler::dump_for_leaks_ = true;
+    HeapProfiler::DumpLocked("leak check end", name_);
+    HeapProfiler::dump_for_leaks_ = false;
+    HeapProfiler::temp_disable_ = false;
+    int64 disabled_bytes = HeapProfiler::disabled_.alloc_size_ -
+                           HeapProfiler::disabled_.free_size_;
+    int64 disabled_allocs = HeapProfiler::disabled_.allocs_ -
+                            HeapProfiler::disabled_.frees_;
+    if (disabled_bytes) {
+      HeapProfiler::MESSAGE(0, "HeapChecker: "
+                            "Not reporting "LLD" disabled objects"
+                            " of "LLD" bytes\n",
+                            disabled_allocs, disabled_bytes);
+    }
+    if (FLAGS_heap_check_before_constructors  &&  this == main_heap_checker) {
+      // compare against empty initial profile
+      start_inuse_bytes_ = 0;
+      start_inuse_allocs_ = 0;
+    }
+    int64 increase_bytes =
+      (HeapProfiler::profile_.alloc_size_ -
+       HeapProfiler::profile_.free_size_) - start_inuse_bytes_;
+    int64 increase_allocs =
+      (HeapProfiler::profile_.allocs_ -
+       HeapProfiler::profile_.frees_) - start_inuse_allocs_;
+    HeapProfiler::Unlock();
+    bool see_leaks =
+      (same_heap ? (increase_bytes != 0 || increase_allocs != 0)
+                 : (increase_bytes > 0 || increase_allocs > 0));
+    if (see_leaks || do_full) {
+      name_[name_length_] = '\0';
+      const char* gv_command_tail
+        = " --edgefraction=1e-10 --nodefraction=1e-10 --gv";
+      string ignore_re;
+      if (disabled_regexp) {
+        ignore_re += " --ignore=\"^";
+        ignore_re += *disabled_regexp;
+        ignore_re += "$\"";
+      }
+      // XXX(jandrews): This fix masks a bug where we detect STL leaks
+      // spuriously because the STL allocator allocates memory and never gives
+      // it back.  This did not occur before because we overrode the STL
+      // allocator to use tcmalloc, which called our hooks appropriately.
+      // The solution is probably to find a way to ignore memory held by the
+      // STL allocator, which may cause leaks in local variables to be ignored.
+      char command[6 * PATH_MAX + 200];
+      const char* drop_negative = same_heap ? "" : " --drop_negative";
+      if (this != main_heap_checker  ||
+          !FLAGS_heap_check_before_constructors) {
+        // compare against initial profile only if need to
+        snprintf(command, sizeof(command), "%s --base=\"%s.%s-beg.heap\" %s ",
+                 flags_heap_profile_pprof->c_str(),
+                 HeapProfiler::filename_prefix_,
+                 name_, drop_negative);
+      } else {
+        snprintf(command, sizeof(command), "%s",
+                 flags_heap_profile_pprof->c_str());
+      }
+      snprintf(command + strlen(command), sizeof(command) - strlen(command),
+               " %s \"%s.%s-end.heap\" %s --inuse_objects --lines",
+               invocation_path_, HeapProfiler::filename_prefix_,
+               name_, ignore_re.c_str());
+                   // --lines is important here to catch leaks when !see_leaks
+      char cwd[PATH_MAX+1];
+      if (getcwd(cwd, PATH_MAX) != cwd)  abort();
+      if (see_leaks) {
+        HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                              "Heap memory leaks of "LLD" bytes and/or "
+                              ""LLD" allocations detected by check \"%s\".\n\n"
+                              "To investigate leaks manually use e.g.\n"
+                              "cd %s; "  // for proper symbol resolution
+                              "%s%s\n\n",
+                              increase_bytes, increase_allocs, name_,
+                              cwd, command, gv_command_tail);
+      }
+      string output;
+      int checked_leaks = 0;
+      if ((see_leaks && do_report) || do_full) {
+        if (access(flags_heap_profile_pprof->c_str(), X_OK|R_OK) != 0) {
+          HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                                "WARNING: Skipping pprof check:"
+                                " could not run it at %s\n",
+                                flags_heap_profile_pprof->c_str());
+        } else {
+          checked_leaks = GetStatusOutput(command, &output);
+          if (checked_leaks != 0) {
+            HeapProfiler::MESSAGE(-1, "ERROR: Could not run pprof at %s\n",
+                                  flags_heap_profile_pprof->c_str());
+            abort();
+          }
+        }
+        if (see_leaks && output.empty() && checked_leaks == 0) {
+          HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                                "These must be leaks that we disabled"
+                                " (pprof succeded)!\n");
+          see_leaks = false;
+        }
+        // do not fail the check just due to us being a stripped binary
+        if (!see_leaks  &&  strstr(output.c_str(), "nm: ") != NULL  &&
+            strstr(output.c_str(), ": no symbols") != NULL)  output.resize(0);
+        if (!(see_leaks || checked_leaks == 0))  abort();
+      }
+      bool tricky_leaks = !output.empty();
+      if (!see_leaks && tricky_leaks) {
+        HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                              "Tricky heap memory leaks of"
+                              " no bytes and no allocations "
+                              "detected by check \"%s\".\n"
+                              "To investigate leaks manually uge e.g.\n"
+                              "cd %s; "  // for proper symbol resolution
+                              "%s%s\n\n",
+                              name_, cwd, command, gv_command_tail);
+        see_leaks = true;
+      }
+      if (see_leaks && do_report) {
+        if (checked_leaks == 0) {
+          HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                                "Below is this pprof's output:\n\n");
+          write(STDERR_FILENO, output.data(), output.size());
+          HeapProfiler::MESSAGE(-1, "\n\n");
+        } else {
+          HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                                "pprof has failed\n\n");
+        }
+      }
+    } else {
+      HeapProfiler::MESSAGE(0, "HeapChecker: No leaks found\n");
+    }
+    delete [] name_;
+    name_ = NULL;
+    return !see_leaks;
+  } else {
+    if (name_ != NULL) {
+      HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                            "Profiling must stay enabled "
+                            "during leak checking\n");
+      abort();
+    }
+    HeapProfiler::Unlock();
+    return true;
+  }
+}
+
+HeapLeakChecker::~HeapLeakChecker() {
+  if (name_ != NULL) {
+    HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                          "Some *NoLeaks|SameHeap method"
+                          " must be called on the checker\n");
+    abort();
+  }
+}
+
+//----------------------------------------------------------------------
+// HeapLeakChecker overall heap check components
+//----------------------------------------------------------------------
+
+vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL;
+
+// When a HeapCleaner object is intialized, add its function to the static list
+// of cleaners to be run before leaks checking.
+HeapCleaner::HeapCleaner(void_function f) {
+  if (heap_cleanups_ == NULL)
+    heap_cleanups_ = new vector<HeapCleaner::void_function>;
+  heap_cleanups_->push_back(f);
+}
+
+// Run all of the cleanup functions and delete the vector.
+void HeapCleaner::RunHeapCleanups() {
+  if (!heap_cleanups_)
+    return;
+  for (int i = 0; i < heap_cleanups_->size(); i++) {
+    void (*f)(void) = (*heap_cleanups_)[i];
+    f();
+  }
+  delete heap_cleanups_;
+  heap_cleanups_ = NULL;
+}
+
+// Program exit heap cleanup registered with atexit().
+// Will not get executed when we crash on a signal.
+void HeapLeakChecker::RunHeapCleanups(void) {
+  if (heap_checker_pid == getpid()) {  // can get here (via forks?)
+                                       // with other pids
+    HeapCleaner::RunHeapCleanups();
+    if (!FLAGS_heap_check_after_destructors) {
+      DoMainHeapCheck();
+      // Disable further dumping
+      if (HeapProfiler::is_on_)
+        HeapProfilerStop();
+    }
+  }
+}
+
+void HeapLeakChecker::LibCPreallocate() {
+  // force various C library static allocations before we start leak-checking
+  strerror(errno);
+  struct in_addr addr;
+  addr.s_addr = INADDR_ANY;
+  inet_ntoa(addr);
+  const time_t now = time(NULL);
+  ctime(&now);
+  void *stack[1];
+  backtrace(stack, 0);
+}
+
+// Called from main() immediately after setting any requisite parameters
+// from HeapChecker and HeapProfiler.
+void HeapLeakChecker::StartFromMain(const string& heap_check_type) {
+  if (heap_check_type != "") {
+    if (!constructor_heap_profiling) {
+      HeapProfiler::MESSAGE(-1, "HeapChecker: Can not start so late. "
+                            "You have to enable heap checking with\n"
+                            "             --heapcheck=..."
+                            " or a dependency on //base:heapcheck\n");
+      abort();
+    }
+    // make an indestructible copy for heap leak checking
+    // happening after global variable destruction
+    flags_heap_profile_pprof = new string(FLAGS_heap_profile_pprof);
+    // Set all flags
+    if (heap_check_type == "minimal") {
+      // The least we can check.
+      FLAGS_heap_check_before_constructors = false;  // (ignore more)
+      FLAGS_heap_check_after_destructors = false;  // to after cleanup
+                                                   // (most data is live)
+      FLAGS_heap_check_strict_check = false;  // < profile check (ignore more)
+      FLAGS_heap_check_ignore_told_live = true;  // ignore all live
+      FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
+      FLAGS_heap_check_ignore_global_live = true;  // ignore all live
+    } else if (heap_check_type == "normal") {
+      // Faster than 'minimal' and not much stricter.
+      FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
+      FLAGS_heap_check_after_destructors = false;  // to after cleanup
+                                                   // (most data is live)
+      FLAGS_heap_check_strict_check = true;  // == profile check (fast)
+      FLAGS_heap_check_ignore_told_live = true;  // ignore all live
+      FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
+      FLAGS_heap_check_ignore_global_live = true;  // ignore all live
+    } else if (heap_check_type == "strict") {
+      // A bit stricter than 'normal': global destructors must fully clean up
+      // after themselves if they are present.
+      FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
+      FLAGS_heap_check_after_destructors = true;  // to after destructors
+                                                  // (less data live)
+      FLAGS_heap_check_strict_check = true;  // == profile check (fast)
+      FLAGS_heap_check_ignore_told_live = true;  // ignore all live
+      FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
+      FLAGS_heap_check_ignore_global_live = true;  // ignore all live
+    } else if (heap_check_type == "draconian") {
+      // Drop not very portable and not very exact live heap flooding.
+      FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
+      FLAGS_heap_check_after_destructors = true;  // to after destructors
+                                                  // (need them)
+      FLAGS_heap_check_strict_check = true;  // == profile check (fast)
+      FLAGS_heap_check_ignore_told_live = false;  // no live flood (stricter)
+      FLAGS_heap_check_ignore_thread_live = false;  // no live flood (stricter)
+      FLAGS_heap_check_ignore_global_live = false;  // no live flood (stricter)
+    } else if (heap_check_type == "as-is") {
+      // do nothing: use other flags as is
+    } else if (heap_check_type == "local") {
+      // do nothing
+    } else {
+      LogPrintf(FATAL, "Unsupported heap_check flag: %s",
+                heap_check_type.c_str());
+    }
+    assert(heap_checker_pid == getpid());
+    heap_checker_on = true;
+    assert(HeapProfiler::is_on_);
+    UseProcMaps(DISABLE_LIBRARY_ALLOCS);
+    if (heap_check_type != "local") {
+      // Schedule registered heap cleanup
+      atexit(RunHeapCleanups);
+      assert(main_heap_checker == NULL);
+      main_heap_checker = new HeapLeakChecker(MAIN);
+      // make sure new/delete hooks are installed properly:
+      IgnoreObject(main_heap_checker);
+      UnIgnoreObject(main_heap_checker);
+      // **
+      // ** If we crash here, it's probably because the binary is not
+      // ** linked with an instrumented malloc, such as tcmalloc.
+      // ** "nm <this_binary> | grep new" to verify.  An instrumented
+      // ** malloc is necessary for using heap-checker.
+      // **
+    }
+  } else {
+    heap_checker_on = false;
+  }
+  if (!heap_checker_on  &&  constructor_heap_profiling) {
+    // turns out do not need checking in the end; stop profiling
+    HeapProfiler::MESSAGE(0, "HeapChecker: Turning itself off\n");
+    HeapProfilerStop();
+  }
+}
+
+void HeapLeakChecker::DoMainHeapCheck() {
+  assert(heap_checker_pid == getpid());
+  if (main_heap_checker) {
+    bool same_heap = FLAGS_heap_check_strict_check;
+    if (FLAGS_heap_check_before_constructors)  same_heap = true;
+      // false here just would make it slower in this case
+      // (we don't use the starting profile anyway)
+    bool do_full = !same_heap;  // do it if it can help ignore false leaks
+    bool do_report = FLAGS_heap_check_report;
+    HeapProfiler::MESSAGE(0, "HeapChecker: Checking for memory leaks\n");
+    if (!main_heap_checker->DoNoLeaks(same_heap, do_full, do_report)) {
+      HeapProfiler::MESSAGE(-1, "ERROR: Leaks found in main heap check, aborting\n");
+      abort();
+    }
+    delete main_heap_checker;
+    main_heap_checker = NULL;
+  }
+}
+
+//----------------------------------------------------------------------
+// HeapLeakChecker global constructor/destructor ordering components
+//----------------------------------------------------------------------
+
+void HeapLeakChecker::BeforeConstructors() {
+  if (constructor_heap_profiling)  abort();
+  constructor_heap_profiling = true;
+  LibCPreallocate();
+  HeapProfiler::Lock();
+  HeapProfiler::EarlyStartLocked();  // fire-up HeapProfiler hooks
+  heap_checker_on = true;
+  assert(HeapProfiler::is_on_);
+  HeapProfiler::Unlock();
+}
+
+extern bool heap_leak_checker_bcad_variable;  // in heap-checker-bcad.cc
+
+// Whenever the heap checker library is linked in, this should be called before
+// all global object constructors run.  This can be tricky and depends on
+// heap-checker-bcad.o being the last file linked in.
+void HeapLeakChecker_BeforeConstructors() {
+  heap_checker_pid = getpid();  // set it always
+  // just to reference it, so that heap-checker-bcad.o is linked in
+  heap_leak_checker_bcad_variable = true;
+  HeapLeakChecker::BeforeConstructors();
+}
+
+// This function is executed after all global object destructors run.
+void HeapLeakChecker_AfterDestructors() {
+  if (heap_checker_pid == getpid()) {  // can get here (via forks?)
+                                       // with other pids
+    if (FLAGS_heap_check_after_destructors && main_heap_checker) {
+      HeapLeakChecker::DoMainHeapCheck();
+      poll(NULL, 0, 500);
+        // Need this hack to wait for other pthreads to exit.
+        // Otherwise tcmalloc or debugallocation find errors
+        // on a free() call from pthreads.
+    }
+    if (main_heap_checker)  abort();
+  }
+}
+
+//----------------------------------------------------------------------
+// HeapLeakChecker disabling helpers
+//----------------------------------------------------------------------
+
+// These functions are at the end of the file to prevent their inlining:
+
+void HeapLeakChecker::DisableChecksInLocked(const char* pattern) {
+  // disable our leaks below for growing disabled_regexp
+  void* stack[1];
+  if (GetStackTrace(stack, 1, 1) != 1)  abort();
+  DisableChecksAtLocked(stack[0]);
+  // make disabled_regexp
+  if (disabled_regexp == NULL)  disabled_regexp = new string;
+  HeapProfiler::MESSAGE(1, "HeapChecker: "
+                        "Disabling leaks checking in stack traces "
+                        "under frames maching \"%s\"\n", pattern);
+  if (disabled_regexp->size())  *disabled_regexp += '|';
+  *disabled_regexp += pattern;
+}
+
+void HeapLeakChecker::DisableChecksFromTo(void* start_address,
+                                          void* end_address,
+                                          int max_depth) {
+  assert(start_address < end_address);
+  // disable our leaks for constructing disabled_ranges_
+  DisableChecksUp(1);
+  if (pthread_mutex_lock(&hc_lock) != 0)  abort();
+  if (HeapProfiler::disabled_ranges_ == NULL) {
+    HeapProfiler::disabled_ranges_ = new HeapProfiler::DisabledRangeMap;
+  }
+  HeapProfiler::RangeValue value;
+  value.start_address = reinterpret_cast<uintptr_t>(start_address);
+  value.max_depth = max_depth;
+  if (HeapProfiler::disabled_ranges_->
+        insert(make_pair(reinterpret_cast<uintptr_t>(end_address),
+                         value)).second) {
+    HeapProfiler::MESSAGE(1, "HeapChecker: "
+                          "Disabling leaks checking in stack traces "
+                          "under frame addresses between %p..%p\n",
+                          start_address, end_address);
+  }
+  if (pthread_mutex_unlock(&hc_lock) != 0)  abort();
+}
+
+void HeapLeakChecker::DisableChecksAtLocked(void* address) {
+  if (HeapProfiler::disabled_addresses_ == NULL) {
+    HeapProfiler::disabled_addresses_ = new HeapProfiler::DisabledAddressesSet;
+  }
+  // disable our leaks for constructing disabled_addresses_
+  void* stack[1];
+  if (GetStackTrace(stack, 1, 1) != 1)  abort();
+  HeapProfiler::disabled_addresses_->
+    insert(reinterpret_cast<uintptr_t>(stack[0]));
+  // disable the requested address
+  if (HeapProfiler::disabled_addresses_->
+      insert(reinterpret_cast<uintptr_t>(address)).second) {
+    HeapProfiler::MESSAGE(1, "HeapChecker: "
+                          "Disabling leaks checking in stack traces "
+                          "under frame address %p\n",
+                          address);
+  }
+}
diff --git a/src/heap-profiler-inl.h b/src/heap-profiler-inl.h
new file mode 100644
index 0000000..c42eaba
--- /dev/null
+++ b/src/heap-profiler-inl.h
@@ -0,0 +1,213 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// All Rights Reserved.
+//
+// Author: Maxim Lifantsev
+//
+// Some hooks into heap-profiler.cc
+// that are needed by heap-checker.cc
+//
+
+#ifndef BASE_HEAP_PROFILER_INL_H__
+#define BASE_HEAP_PROFILER_INL_H__
+
+#include <google/perftools/config.h>
+
+#if defined HAVE_STDINT_H
+#include <stdint.h>             // to get uint16_t (ISO naming madness)
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>           // another place uint16_t might be defined
+#else
+#include <sys/types.h>          // our last best hope
+#endif
+#include <pthread.h>
+#include <google/perftools/basictypes.h>
+#include <google/heap-profiler.h>
+#include <map>
+#include <google/perftools/hash_set.h>
+
+template<class T> class AddressMap; // in addressmap-inl.h
+class HeapLeakChecker;  // in heap-checker.h
+
+// namespace for heap profiler components
+class HeapProfiler {
+ public:  // data types
+
+  // Profile entry
+  struct Bucket {
+    uintptr_t hash_;      // Hash value
+    int     depth_;       // Depth of stack trace
+    void**  stack_;       // Stack trace
+    int32   allocs_;      // Number of allocs
+    int32   frees_;       // Number of frees
+    int64   alloc_size_;  // Total size of all allocated objects
+    int64   free_size_;   // Total size of all freed objects
+    Bucket* next_;        // Next entry in hash-table
+  };
+
+  // Info stored in the address map
+  struct AllocValue {
+    Bucket* bucket;  // The stack-trace bucket
+    size_t  bytes;   // Number of allocated bytes
+  };
+  typedef AddressMap<AllocValue> AllocationMap;
+
+  // Value stored in the map of disabled address ranges;
+  // its key is the end of the address range.
+  // We'll ignore allocations with a return address in a disabled range
+  // if the address occurs at 'max_depth' or less in the stack trace.
+  struct RangeValue {
+    uintptr_t start_address;  // the start of the range
+    int       max_depth;      // the maximal stack depth to disable at
+  };
+  typedef STL_NAMESPACE::map<uintptr_t, RangeValue> DisabledRangeMap;
+  typedef HASH_NAMESPACE::hash_set<uintptr_t> DisabledAddressesSet;
+
+ private:  // state variables
+           // NOTE: None of these have destructors that change their state.
+           //       Keep it this way: heap-checker depends on it.
+
+  // Is heap-profiling on as a subsytem
+  static bool is_on_;
+  // If we are disabling heap-profiling recording for incoming
+  // (de)allocation calls from the thread specified by temp_disabled_tid_.
+  // This is done for (de)allocations that are internal
+  // to heap profiler or heap checker, so that we can hold the global
+  // profiler's lock and pause heap activity from other threads.
+  static bool temp_disable_;
+  static pthread_t temp_disabled_tid_;
+  // The disabled addresses registered
+  // with HeapLeakChecker::DisableChecksUp
+  static DisabledAddressesSet* disabled_addresses_;
+  // The disabled address ranges registered
+  // with HeapLeakChecker::DisableChecksFromTo.
+  static DisabledRangeMap* disabled_ranges_;
+  // Flag if we are doing heap dump for leaks checking vs.
+  // for general memory profiling
+  static bool dump_for_leaks_;
+  // Prevents recursive dumping
+  static bool dumping_;
+  // Overall profile stats
+  static Bucket total_;
+  // Last dumped profile stats
+  static Bucket profile_;
+  // Stats for the disabled part of the last dumped profile
+  static Bucket disabled_;
+  // Prefix used for profile file names (NULL if not ready for dumping yet)
+  static char* filename_prefix_;
+  // Map of all currently allocated object we know about
+  static AllocationMap* allocation_;
+  // Number of frames to skip in stack traces.  This is the number of functions
+  // that are called between malloc() and RecordAlloc().  This can differ
+  // depending on the compiler and level of optimization under which we are
+  // running.
+  static int strip_frames_;
+  // Whether we have recorded our first allocation.  This is used to
+  // distinguish the magic first call of RecordAlloc that sets strip_frames_
+  static bool done_first_alloc_;
+  // Location of stack pointer in Init().  Also used to help determine
+  // strip_frames_.
+  static void* recordalloc_reference_stack_position_;
+
+  // Global lock for profile structure
+  static void Lock();
+  static void Unlock();
+
+ private:  // functions
+
+  // Own heap profiler's internal allocation mechanism
+  static void* Malloc(size_t bytes);
+  static void Free(void* p);
+  // Helper for HeapProfilerDump:
+  // second_prefix is not NULL when the dumped profile
+  // is to be named differently for leaks checking
+  static void DumpLocked(const char *reason, const char* second_prefix);
+
+ private:  // helpers of heap-checker.cc
+
+  // If "ptr" points to a heap object;
+  // we also fill alloc_value for this object then.
+  // If yes, we might move "ptr" to point to the very start of the object
+  // (this needs to happen for C++ class array allocations
+  // and for basic_string-s of C++ library that comes with gcc 3.4).
+  static bool HaveOnHeap(void** ptr, AllocValue* alloc_value);
+  static bool HaveOnHeapLocked(void** ptr, AllocValue* alloc_value);
+
+ private:  // helpers of heap-profiler.cc
+
+  // Get bucket for current stack trace (skip "skip_count" most recent frames)
+  static Bucket* GetBucket(int skip_count);
+  static int UnparseBucket(char* buf, int buflen, int bufsize, Bucket* b);
+  static void RecordAlloc(void* ptr, size_t bytes, int skip_count);
+  static void RecordFree(void* ptr);
+  static void RecordFreeLocked(void* ptr);
+  // Activates profile collection before profile dumping.
+  // Can be called before global object constructors.
+  static void EarlyStartLocked();
+  static void StartLocked(const char* prefix);
+  static void StopLocked();
+  static void NewHook(void* ptr, size_t size);
+  static void DeleteHook(void* ptr);
+  static void MmapHook(void* result,
+                       void* start, size_t size,
+                       int prot, int flags,
+                       int fd, off_t offset);
+  static void MunmapHook(void* ptr, size_t size);
+
+ private:  // intended users
+
+  friend class HeapLeakChecker;
+  friend void HeapProfilerStart(const char* prefix);
+  friend void HeapProfilerStop();
+  friend void HeapProfilerDump(const char *reason);
+  friend char* GetHeapProfile();
+
+ public:
+
+  // printing messages without using malloc
+  // Message levels (levels <= 0 are printed by default):
+  //    -1     Errors
+  //    0      Normal informational reports
+  //    1      Stuff users won't usually care about
+  static void MESSAGE(int logging_level, const char* format, ...)
+#ifdef _HAVE___ATTRIBUTE__
+    __attribute__ ((__format__ (__printf__, 2, 3)))
+#endif
+;
+
+  // Module initialization
+  static void Init();
+
+  // Are we running?
+  static bool IsOn() { return is_on_; }
+};
+
+#endif  // BASE_HEAP_PROFILER_INL_H__
diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc
new file mode 100644
index 0000000..2f476a8
--- /dev/null
+++ b/src/heap-profiler.cc
@@ -0,0 +1,905 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// TODO: Log large allocations
+
+#include <google/perftools/config.h>
+
+#include <malloc.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <algorithm>
+#include <string>
+#include <iostream>
+#include <map>
+#include <google/perftools/hash_set.h>
+
+#include <google/heap-profiler.h>
+#include <google/stacktrace.h>
+#include <google/malloc_hook.h>
+#include <google/perftools/basictypes.h>
+
+#include "heap-profiler-inl.h"
+#include "internal_spinlock.h"
+#include "addressmap-inl.h"
+
+#include "base/logging.h"
+#include "base/googleinit.h"
+#include "base/commandlineflags.h"
+
+#ifdef HAVE_INTTYPES_H
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#define LLD    PRId64               // how to write 64-bit numbers
+#else
+#define LLD    "lld"                // hope for the best
+#endif
+
+#define LOGF  STL_NAMESPACE::cout   // where we log to; LOGF is a historical name
+
+using HASH_NAMESPACE::hash_set;
+using std::string;
+using std::sort;
+
+//----------------------------------------------------------------------
+// Flags that control heap-profiling
+//----------------------------------------------------------------------
+
+DEFINE_string(heap_profile, "",
+              "If non-empty, turn heap-profiling on, and dump heap "
+              "profiles to a sequence of files prefixed with the "
+              "specified --heap_profile string.");
+DEFINE_int64(heap_profile_allocation_interval, 1 << 30 /*1GB*/,
+             "Dump heap profiling information once every specified "
+             "number of bytes allocated by the program.");
+DEFINE_int64(heap_profile_inuse_interval, 100 << 20 /*100MB*/,
+             "Dump heap profiling information whenever the high-water "
+             "memory usage mark increases by the specified number of "
+             "bytes.");
+DEFINE_bool(mmap_log, false, "Should mmap/munmap calls be logged?");
+DEFINE_bool(mmap_profile, false, "If heap-profiling on, also profile mmaps");
+DEFINE_int32(heap_profile_log, 0,
+             "Logging level for heap profiler/checker messages");
+
+// Prefix to which we dump heap profiles.  If empty, we do not dump.
+// Default: empty
+void HeapProfilerSetDumpPath(const char* path) {
+  if (HeapProfiler::IsOn()) {
+    HeapProfiler::MESSAGE(-1,
+      "Cannot set dump path to %s, heap profiler is already running!\n",
+      path);
+  } else {
+    FLAGS_heap_profile = path;
+  }
+}
+
+// Level of logging used by the heap profiler and heap checker (if applicable)
+// Default: 0
+void HeapProfilerSetLogLevel(int level) {
+  FLAGS_heap_profile_log = level;
+}
+
+// Dump heap profiling information once every specified number of bytes
+// allocated by the program.  Default: 1GB
+void HeapProfilerSetAllocationInterval(int64 interval) {
+  FLAGS_heap_profile_allocation_interval = interval;
+}
+
+// Dump heap profiling information whenever the high-water 
+// memory usage mark increases by the specified number of
+// bytes.  Default: 100MB
+void HeapProfilerSetInuseInterval(int64 interval) {
+  FLAGS_heap_profile_inuse_interval = interval;
+}
+
+//----------------------------------------------------------------------
+// For printing messages without using malloc
+//----------------------------------------------------------------------
+
+void HeapProfiler::MESSAGE(int level, const char* format, ...) {
+  if (FLAGS_heap_profile_log < level) return;
+
+  // We write directly to the stderr file descriptor and avoid FILE
+  // buffering because that may invoke malloc()
+  va_list ap;
+  va_start(ap, format);
+  char buf[500];
+  vsnprintf(buf, sizeof(buf), format, ap);
+  write(STDERR_FILENO, buf, strlen(buf));
+}
+
+//----------------------------------------------------------------------
+// Simple allocator
+//----------------------------------------------------------------------
+
+class HeapProfilerMemory {
+ private:
+  // Default unit of allocation from system
+  static const int kBlockSize = 1 << 20;
+
+  // Maximum number of blocks we can allocate
+  static const int kMaxBlocks = 1024;
+
+  // Info kept per allocated block
+  struct Block {
+    void*       ptr;
+    size_t      size;
+  };
+
+  // Alignment
+  union AlignUnion { double d; void* p; int64 i; size_t s; };
+  static const int kAlignment = sizeof(AlignUnion);
+
+  Block         blocks_[kMaxBlocks];    // List of allocated blocks
+  int           nblocks_;               // # of allocated blocks
+  char*         current_;               // Current block
+  int           pos_;                   // Position in current block
+
+  // Allocate a block with the specified size
+  void* AllocBlock(size_t size) {
+    // Round size upto a multiple of the page size
+    const size_t pagesize = getpagesize();
+    size = ((size + pagesize -1 ) / pagesize) * pagesize;
+
+    HeapProfiler::MESSAGE(0, "HeapProfiler: allocating %"PRIuS
+                          " bytes for internal use\n", size);
+    if (nblocks_ == kMaxBlocks) {
+      HeapProfiler::MESSAGE(-1, "HeapProfilerMemory: Alloc out of memory\n");
+      abort();
+    }
+
+    // Disable mmap hooks while calling mmap here to avoid recursive calls
+    MallocHook::MmapHook saved = MallocHook::SetMmapHook(NULL);
+    void* ptr = mmap(NULL, size,
+                     PROT_READ|PROT_WRITE,
+                     MAP_PRIVATE|MAP_ANONYMOUS,
+                     -1, 0);
+    MallocHook::SetMmapHook(saved);
+
+    if (ptr == reinterpret_cast<void*>(MAP_FAILED)) {
+      HeapProfiler::MESSAGE(-1, "HeapProfilerMemory: mmap %"PRIuS": %s\n",
+                            size, strerror(errno));
+      abort();
+    }
+    blocks_[nblocks_].ptr = ptr;
+    blocks_[nblocks_].size = size;
+    return ptr;
+  }
+
+ public:
+  void Init() {
+    nblocks_ = 0;
+    current_ = NULL;
+    pos_ = kBlockSize;
+  }
+
+  void Clear() {
+    // Disable munmap hooks while calling mmap here to avoid recursive calls
+    MallocHook::MunmapHook saved = MallocHook::SetMunmapHook(NULL);
+    for (int i = 0; i < nblocks_; ++i) {
+      if (munmap(blocks_[i].ptr, blocks_[i].size) != 0) {
+        HeapProfiler::MESSAGE(-1, "HeapProfilerMemory: munmap: %s\n",
+                              strerror(errno));
+        abort();
+      }
+    }
+    MallocHook::SetMunmapHook(saved);
+
+    nblocks_ = 0;
+    current_ = NULL;
+    pos_ = kBlockSize;
+  }
+
+  void* Alloc(size_t bytes) {
+    if (bytes >= kBlockSize / 8) {
+      // Too big for piecemeal allocation
+      return AllocBlock(bytes);
+    } else {
+      if (pos_ + bytes > kBlockSize) {
+        current_ = reinterpret_cast<char*>(AllocBlock(kBlockSize));
+        pos_ = 0;
+      }
+      void* result = current_ + pos_;
+      pos_ = (pos_ + bytes + kAlignment - 1) & ~(kAlignment-1);
+      return result;
+    }
+  }
+};
+static HeapProfilerMemory heap_profiler_memory;
+void* HeapProfiler::Malloc(size_t bytes) {
+  return heap_profiler_memory.Alloc(bytes);
+}
+void HeapProfiler::Free(void* p) {
+  // Do nothing -- all memory is released in one shot
+}
+
+//----------------------------------------------------------------------
+// Locking code
+//----------------------------------------------------------------------
+
+// A pthread_mutex has way too much lock contention to be used here.
+// In some applications we've run, pthread_mutex took >75% of the running
+// time.
+// I would like to roll our own mutex wrapper, but the obvious
+// solutions can call malloc(), which can lead to infinite recursion.
+//
+// So we use a simple spinlock (just like the spinlocks used in tcmalloc)
+
+static TCMalloc_SpinLock heap_lock;
+static struct timespec delay = { 0, 5000000 };  // Five milliseconds
+
+void HeapProfiler::Lock() {
+  heap_lock.Lock();
+}
+
+void HeapProfiler::Unlock() {
+  heap_lock.Unlock();
+}
+
+
+//----------------------------------------------------------------------
+// Profile-maintenance code
+//----------------------------------------------------------------------
+
+typedef HeapProfiler::Bucket Bucket;
+
+bool HeapProfiler::is_on_ = false;
+bool HeapProfiler::temp_disable_ = false;
+pthread_t HeapProfiler::temp_disabled_tid_;
+HeapProfiler::DisabledAddressesSet* HeapProfiler::disabled_addresses_ = NULL;
+HeapProfiler::DisabledRangeMap* HeapProfiler::disabled_ranges_ = NULL;
+bool HeapProfiler::dump_for_leaks_ = false;
+bool HeapProfiler::dumping_ = false;
+Bucket HeapProfiler::total_;
+Bucket HeapProfiler::disabled_;
+Bucket HeapProfiler::profile_;
+char* HeapProfiler::filename_prefix_ = NULL;
+
+// Hash-table: we hand-craft one instead of using one of the pre-written
+// ones because we do not want to use malloc when operating on the table.
+// It is only five lines of code, so no big deal.
+static const int kHashTableSize = 179999;
+static Bucket** table = NULL;
+HeapProfiler::AllocationMap* HeapProfiler::allocation_ = NULL;
+
+static int     num_buckets = 0;
+static int     total_stack_depth = 0;
+static int     dump_count = 0;      // How many dumps so far
+static int64   last_dump = 0;       // When did we last dump
+static int64   high_water_mark = 0; // In-use-bytes at last high-water dump
+
+int HeapProfiler::strip_frames_ = 0;
+bool HeapProfiler::done_first_alloc_ = false;
+void* HeapProfiler::recordalloc_reference_stack_position_ = NULL;
+
+// For sorting buckets by in-use space
+static bool ByAllocatedSpace(Bucket* a, Bucket* b) {
+  // Return true iff "a" has more allocated space than "b"
+  return (a->alloc_size_ - a->free_size_) > (b->alloc_size_ - b->free_size_);
+}
+
+// We return the amount of space in buf that we use.  We start printing
+// at buf + buflen, and promise not to go beyond buf + bufsize.
+int HeapProfiler::UnparseBucket(char* buf, int buflen, int bufsize, Bucket* b) {
+  // do not dump the address-disabled allocations
+  if (dump_for_leaks_  &&  (disabled_addresses_ || disabled_ranges_)) {
+    bool disable = false;
+    for (int depth = 0; !disable && depth < b->depth_; depth++) {
+      uintptr_t addr = reinterpret_cast<uintptr_t>(b->stack_[depth]);
+      if (disabled_addresses_  &&
+          disabled_addresses_->find(addr) != disabled_addresses_->end()) {
+        disable = true;  // found; dropping
+      }
+      if (disabled_ranges_) {
+        DisabledRangeMap::const_iterator iter
+          = disabled_ranges_->lower_bound(addr);
+        if (iter != disabled_ranges_->end()) {
+          assert(iter->first > addr);
+          if (iter->second.start_address < addr  &&
+              iter->second.max_depth > depth) {
+            disable = true;  // in range; dropping
+          }
+        }
+      }
+    }
+    if (disable) {
+      disabled_.allocs_ += b->allocs_;
+      disabled_.alloc_size_ += b->alloc_size_;
+      disabled_.frees_ += b->frees_;
+      disabled_.free_size_ += b->free_size_;
+      return buflen;
+    }
+  }
+  // count non-disabled allocations for leaks checking
+  profile_.allocs_ += b->allocs_;
+  profile_.alloc_size_ += b->alloc_size_;
+  profile_.frees_ += b->frees_;
+  profile_.free_size_ += b->free_size_;
+  int printed =
+    snprintf(buf + buflen, bufsize - buflen, "%6d: %8"LLD" [%6d: %8"LLD"] @",
+             b->allocs_ - b->frees_,
+             b->alloc_size_ - b->free_size_,
+             b->allocs_,
+             b->alloc_size_);
+  // If it looks like the snprintf failed, ignore the fact we printed anything
+  if (printed < 0 || printed >= bufsize - buflen)  return buflen;
+  buflen += printed;
+  for (int d = 0; d < b->depth_; d++) {
+    printed = snprintf(buf + buflen, bufsize - buflen, " 0x%08lx",
+                       (unsigned long)b->stack_[d]);
+    if (printed < 0 || printed >= bufsize - buflen)  return buflen;
+    buflen += printed;
+  }
+  printed = snprintf(buf + buflen, bufsize - buflen, "\n");
+  if (printed < 0 || printed >= bufsize - buflen)  return buflen;
+  buflen += printed;
+  return buflen;
+}
+
+char* GetHeapProfile() {
+  // We used to be smarter about estimating the required memory and
+  // then capping it to 1MB and generating the profile into that.
+  // However it should not cost us much to allocate 1MB every time.
+  static const int size = 1 << 20;
+  char* buf = reinterpret_cast<char*>(malloc(size));
+  if (buf == NULL) {
+    return NULL;
+  }
+
+  // Grab the lock and generate the profile
+  // (for leak checking the lock is acquired higher up).
+  if (!HeapProfiler::dump_for_leaks_)  HeapProfiler::Lock();
+  if (HeapProfiler::is_on_) {
+    // Get all buckets and sort
+    assert(table != NULL);
+    Bucket* list[num_buckets];
+    int n = 0;
+    for (int b = 0; b < kHashTableSize; b++) {
+      for (Bucket* x = table[b]; x != 0; x = x->next_) {
+        list[n++] = x;
+      }
+    }
+    assert(n == num_buckets);
+    sort(list, list + num_buckets, ByAllocatedSpace);
+
+    int buflen = snprintf(buf, size-1, "heap profile: ");
+    buflen =
+      HeapProfiler::UnparseBucket(buf, buflen, size-1, &HeapProfiler::total_);
+    memset(&HeapProfiler::profile_, 0, sizeof(HeapProfiler::profile_));
+    memset(&HeapProfiler::disabled_, 0, sizeof(HeapProfiler::disabled_));
+    for (int i = 0; i < num_buckets; i++) {
+      Bucket* b = list[i];
+      buflen = HeapProfiler::UnparseBucket(buf, buflen, size-1, b);
+    }
+    assert(buflen < size);
+    buf[buflen] = '\0';
+  }
+  if (!HeapProfiler::dump_for_leaks_)  HeapProfiler::Unlock();
+
+  return buf;
+}
+
+// We keep HeapProfile() as a backwards-compatible name for GetHeapProfile(),
+// but don't export the symbol, so you probably won't be able to call this.
+extern char* HeapProfile() {
+  return GetHeapProfile();
+}
+
+// second_prefix is not NULL when the dumped profile
+// is to be named differently for leaks checking
+void HeapProfiler::DumpLocked(const char *reason, const char* second_prefix) {
+  assert(is_on_);
+
+  if (filename_prefix_ == NULL)  return;
+    // we are not yet ready for dumping
+
+  dumping_ = true;
+
+  // Make file name
+  char fname[1000];
+  if (second_prefix == NULL) {
+    dump_count++;
+    snprintf(fname, sizeof(fname), "%s.%04d.heap",
+             filename_prefix_, dump_count);
+  } else {
+    snprintf(fname, sizeof(fname), "%s.%s.heap",
+             filename_prefix_, second_prefix);
+  }
+
+  // Release allocation lock around the meat of this routine
+  // when not leak checking thus not blocking other threads too much,
+  // but for leak checking we want to hold the lock to prevent heap activity.
+  if (!dump_for_leaks_)  HeapProfiler::Unlock();
+  {
+    // Dump the profile
+    HeapProfiler::MESSAGE(dump_for_leaks_ ? 1 : 0,
+                          "HeapProfiler: "
+                          "Dumping heap profile to %s (%s)\n",
+                          fname, reason);
+    FILE* f = fopen(fname, "w");
+    if (f != NULL) {
+      const char* profile = HeapProfile();
+      fputs(profile, f);
+      free(const_cast<char*>(profile));
+
+      // Dump "/proc/self/maps" so we get list of mapped shared libraries
+      fputs("\nMAPPED_LIBRARIES:\n", f);
+      int maps = open("/proc/self/maps", O_RDONLY);
+      if (maps >= 0) {
+        char buf[100];
+        ssize_t r;
+        while ((r = read(maps, buf, sizeof(buf))) > 0) {
+          fwrite(buf, 1, r, f);
+        }
+        close(maps);
+      }
+
+      fclose(f);
+      f = NULL;
+    } else {
+      HeapProfiler::MESSAGE(0, "HeapProfiler: "
+                            "FAILED Dumping heap profile to %s (%s)\n",
+                            fname, reason);
+      if (dump_for_leaks_)  abort();  // no sense to continue
+    }
+  }
+
+  if (!dump_for_leaks_)  HeapProfiler::Lock();
+
+  dumping_ = false;
+}
+
+void HeapProfilerDump(const char *reason) {
+  if (HeapProfiler::is_on_ && (num_buckets > 0)) {
+
+    HeapProfiler::Lock();
+    if(!HeapProfiler::dumping_) {
+      HeapProfiler::DumpLocked(reason, NULL);
+    }
+    HeapProfiler::Unlock();
+  }
+}
+
+// This is the number of bytes allocated by the first call to malloc() after
+// registering this handler.  We want to sanity check that our first call is
+// actually for this number of bytes.
+static const int kFirstAllocationNumBytes = 23;
+
+void HeapProfiler::RecordAlloc(void* ptr, size_t bytes, int skip_count) {
+  // Our first allocation is triggered in EarlyStartLocked and is intended
+  // solely to calibrate strip_frames_, which may be greater or smaller
+  // depending on the degree of optimization with which we were compiled.
+  if (!done_first_alloc_) {
+    done_first_alloc_ = true;
+    assert(bytes == kFirstAllocationNumBytes);
+    assert(strip_frames_ == 0);
+
+    static const int kMaxStackTrace = 32;
+    void* stack[kMaxStackTrace];
+    // We skip one frame here so that it's as if we are running from NewHook,
+    // which is where strip_frames_ is used.
+    int depth = GetStackTrace(stack, kMaxStackTrace, 1);
+
+    int i;
+    for (i = 0; i < depth; i++) {
+      if (stack[i] == recordalloc_reference_stack_position_) {
+        MESSAGE(-1, "Determined strip_frames_ to be %d\n", i - 1);
+        // Subtract one to offset the fact that
+        // recordalloc_reference_stack_position_ actually records the stack
+        // position one frame above the spot in EarlyStartLocked where we are
+        // called from.
+        strip_frames_ = i - 1;
+      }
+    }
+    // Fell through the loop without finding our parent
+    if (strip_frames_ == 0) {
+      MESSAGE(0, "Could not determine strip_frames_, aborting");
+      abort();
+    }
+
+    // Return without recording the allocation.  We will free the memory before
+    // registering a DeleteHook.
+    return;
+  }
+
+  // this locking before if (is_on_ ...)
+  // is not an overhead because with profiling off
+  // this hook is not called at all.
+
+  // Uncomment for debugging:
+  // HeapProfiler::MESSAGE(7, "HeapProfiler: Alloc %p : %"PRIuS"\n",
+  //                       ptr, bytes);
+
+  if (temp_disable_  &&  temp_disabled_tid_ == pthread_self())  return;
+  HeapProfiler::Lock();
+  if (is_on_) {
+    Bucket* b = GetBucket(skip_count+1);
+    b->allocs_++;
+    b->alloc_size_ += bytes;
+    total_.allocs_++;
+    total_.alloc_size_ += bytes;
+
+    AllocValue v;
+    v.bucket = b;
+    v.bytes = bytes;
+    allocation_->Insert(ptr, v);
+
+    const int64 inuse_bytes = total_.alloc_size_ - total_.free_size_;
+    if (!dumping_) {
+      bool need_dump = false;
+      char buf[128];
+      if(total_.alloc_size_ >=
+         last_dump + FLAGS_heap_profile_allocation_interval) {
+        snprintf(buf, sizeof(buf), "%"LLD" MB allocated",
+                 total_.alloc_size_ >> 20);
+        // Track that we made a "total allocation size" dump
+        last_dump = total_.alloc_size_;
+        need_dump = true;
+      } else if(inuse_bytes >
+                high_water_mark + FLAGS_heap_profile_inuse_interval) {
+        sprintf(buf, "%"LLD" MB in use", inuse_bytes >> 20);
+        // Track that we made a "high water mark" dump
+        high_water_mark = inuse_bytes;
+        need_dump = true;
+      }
+
+      if (need_dump) {
+        // Dump profile
+        DumpLocked(buf, NULL);
+      }
+    }
+  }
+  HeapProfiler::Unlock();
+}
+
+void HeapProfiler::RecordFreeLocked(void* ptr) {
+  assert(is_on_);
+  AllocValue v;
+  if (allocation_->FindAndRemove(ptr, &v)) {
+    Bucket* b = v.bucket;
+    b->frees_++;
+    b->free_size_ += v.bytes;
+    total_.frees_++;
+    total_.free_size_ += v.bytes;
+  }
+}
+
+void HeapProfiler::RecordFree(void* ptr) {
+  // All activity before if (is_on_)
+  // is not an overhead because with profiling turned off this hook
+  // is not called at all.
+
+  // Uncomment for debugging:
+  // HeapProfiler::MESSAGE(7, "HeapProfiler: Free %p\n", ptr);
+
+  if (temp_disable_  &&  temp_disabled_tid_ == pthread_self())  return;
+  HeapProfiler::Lock();
+  if (is_on_)  RecordFreeLocked(ptr);
+  HeapProfiler::Unlock();
+}
+
+bool HeapProfiler::HaveOnHeapLocked(void** ptr, AllocValue* alloc_value) {
+  assert(is_on_);
+  // Size of the C++ object array size integer
+  // (potentially compiler/dependent; 4 on i386 and gcc)
+  const int kArraySizeOffset = sizeof(int);
+  // sizeof(basic_string<...>::_Rep) for C++ library of gcc 3.4
+  // (basically three integer counters;
+  // library/compiler dependent; 12 on i386 and gcc)
+  const int kStringOffset = sizeof(int) * 3;
+  // NOTE: One can add more similar offset cases below
+  //       even when they do not happen for the used compiler/library;
+  //       all that's impacted is
+  //       - HeapLeakChecker's performace during live heap walking
+  //       - and a slightly greater chance to mistake random memory bytes
+  //         for a pointer and miss a leak in a particular run of a binary.
+  bool result = true;
+  if (allocation_->Find(*ptr, alloc_value)) {
+    // done
+  } else if (allocation_->Find(reinterpret_cast<char*>(*ptr)
+                               - kArraySizeOffset,
+                               alloc_value)  &&
+             alloc_value->bytes > kArraySizeOffset) {
+    // this case is to account for the array size stored inside of
+    // the memory allocated by new FooClass[size] for classes with destructors
+    *ptr = reinterpret_cast<char*>(*ptr) - kArraySizeOffset;
+  } else if (allocation_->Find(reinterpret_cast<char*>(*ptr)
+                               - kStringOffset,
+                               alloc_value)  &&
+             alloc_value->bytes > kStringOffset) {
+    // this case is to account for basic_string<> representation in
+    // newer C++ library versions when the kept pointer points to inside of
+    // the allocated region
+    *ptr = reinterpret_cast<char*>(*ptr) - kStringOffset;
+  } else {
+    result = false;
+  }
+  return result;
+}
+
+bool HeapProfiler::HaveOnHeap(void** ptr, AllocValue* alloc_value) {
+  HeapProfiler::Lock();
+  bool result = is_on_  &&  HaveOnHeapLocked(ptr, alloc_value);
+  HeapProfiler::Unlock();
+  return result;
+}
+
+//----------------------------------------------------------------------
+// Allocation/deallocation hooks
+//----------------------------------------------------------------------
+
+void HeapProfiler::NewHook(void* ptr, size_t size) {
+  if (ptr != NULL) RecordAlloc(ptr, size, strip_frames_);
+}
+
+void HeapProfiler::DeleteHook(void* ptr) {
+  if (ptr != NULL) RecordFree(ptr);
+}
+
+void HeapProfiler::MmapHook(void* result,
+                            void* start, size_t size,
+                            int prot, int flags,
+                            int fd, off_t offset) {
+  // Log the mmap if necessary
+  if (FLAGS_mmap_log) {
+    char buf[200];
+    snprintf(buf, sizeof(buf),
+             "mmap(start=%p, len=%"PRIuS", prot=0x%x, flags=0x%x, "
+             "fd=%d, offset=0x%x) = %p",
+             start, size, prot, flags, fd, (unsigned int) offset,
+             result);
+    LOGF << buf;
+    // TODO(jandrews): Re-enable stack tracing
+    //DumpStackTrace(1, DebugWriteToStream, &LOG(INFO));
+  }
+
+  // Record mmap in profile if appropriate
+  if (result != (void*) MAP_FAILED &&
+      FLAGS_mmap_profile &&
+      is_on_) {
+
+    RecordAlloc(result, size, strip_frames_);
+  }
+}
+
+void HeapProfiler::MunmapHook(void* ptr, size_t size) {
+  if (FLAGS_mmap_profile && is_on_) {
+    RecordFree(ptr);
+  }
+  if (FLAGS_mmap_log) {
+    char buf[200];
+    snprintf(buf, sizeof(buf), "munmap(start=%p, len=%"PRIuS")", ptr, size);
+    LOGF << buf;
+  }
+}
+
+//----------------------------------------------------------------------
+// Profiler maintenance
+//----------------------------------------------------------------------
+
+Bucket* HeapProfiler::GetBucket(int skip_count) {
+  // Get raw stack trace
+  static const int kMaxStackTrace = 32;
+  void* key[kMaxStackTrace];
+  int depth = GetStackTrace(key, kMaxStackTrace, skip_count+1);
+
+  // Make hash-value
+  uintptr_t h = 0;
+  for (int i = 0; i < depth; i++) {
+    uintptr_t pc = reinterpret_cast<uintptr_t>(key[i]);
+    h = (h << 8) | (h >> (8*(sizeof(h)-1)));
+    h += (pc * 31) + (pc * 7) + (pc * 3);
+  }
+
+  // Lookup stack trace in table
+  const size_t key_size = sizeof(key[0]) * depth;
+  unsigned int buck = ((unsigned int) h) % kHashTableSize;
+  for (Bucket* b = table[buck]; b != 0; b = b->next_) {
+    if ((b->hash_ == h) &&
+        (b->depth_ == depth) &&
+        (memcmp(b->stack_, key, key_size) == 0)) {
+      return b;
+    }
+  }
+
+  // Create new bucket
+  void** kcopy = reinterpret_cast<void**>(Malloc(key_size));
+  memcpy(kcopy, key, key_size);
+  Bucket* b = reinterpret_cast<Bucket*>(Malloc(sizeof(Bucket)));
+  memset(b, 0, sizeof(*b));
+  b->hash_      = h;
+  b->depth_     = depth;
+  b->stack_     = kcopy;
+  b->next_      = table[buck];
+  table[buck] = b;
+  num_buckets++;
+  total_stack_depth += depth;
+  return b;
+}
+
+void HeapProfiler::EarlyStartLocked() {
+  assert(!is_on_);
+
+  // GNU libc++ versions 3.3 and 3.4 obey the environment variables
+  // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively.  Setting one of
+  // these variables forces the STL default allocator to call new() or delete()
+  // for each allocation or deletion.  Otherwise the STL allocator tries to
+  // avoid the high cost of doing allocations by pooling memory internally.
+  // This STL pool makes it impossible to get an accurate heap profile.
+  // Luckily, our tcmalloc implementation gives us similar performance
+  // characteristics *and* allows to to profile accurately.
+  setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/);
+  setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/);
+
+  heap_profiler_memory.Init();
+
+  is_on_ = true;
+  if (temp_disable_) abort();
+  filename_prefix_ = NULL;
+
+  // Make the table
+  const int table_bytes = kHashTableSize * sizeof(Bucket*);
+  table = reinterpret_cast<Bucket**>(Malloc(table_bytes));
+  memset(table, 0, table_bytes);
+
+  // Make allocation map
+  void* aptr = Malloc(sizeof(AllocationMap));
+  allocation_ = new (aptr) AllocationMap(Malloc, Free);
+
+  memset(&total_, 0, sizeof(total_));
+  num_buckets = 0;
+  total_stack_depth = 0;
+  last_dump = 0;
+  // We do not reset dump_count so if the user does a sequence of
+  // HeapProfilerStart/HeapProfileStop, we will get a continuous
+  // sequence of profiles.
+
+  // Now set the hooks that capture mallocs/frees
+  MallocHook::SetNewHook(NewHook);
+
+  // Our first allocation after registering our hook is treated specially by
+  // RecordAlloc();  It looks at the stack and counts how many frames up we
+  // are.  First we record the current stack pointer.
+  void* here[1];
+  GetStackTrace(here, 1, 0);
+  // This actually records the frame above this one.  We take this into account
+  // in RecordAlloc.
+  recordalloc_reference_stack_position_ = here[0];
+  done_first_alloc_ = false; // Initialization has not occured yet
+  void* first_alloc = malloc(kFirstAllocationNumBytes);
+  free(first_alloc);
+
+  MallocHook::SetDeleteHook(DeleteHook);
+
+  HeapProfiler::MESSAGE(0, "HeapProfiler: Starting heap tracking\n");
+}
+
+void HeapProfiler::StartLocked(const char* prefix) {
+  assert(filename_prefix_ == NULL);
+
+  if (!is_on_) EarlyStartLocked();
+
+  // Copy filename prefix
+  const int prefix_length = strlen(prefix);
+  filename_prefix_ = reinterpret_cast<char*>(Malloc(prefix_length + 1));
+  memcpy(filename_prefix_, prefix, prefix_length);
+  filename_prefix_[prefix_length] = '\0';
+}
+
+void HeapProfiler::StopLocked() {
+  assert(is_on_);
+  MallocHook::SetNewHook(NULL);
+  MallocHook::SetDeleteHook(NULL);
+
+  // Get rid of all memory we allocated
+  heap_profiler_memory.Clear();
+
+  table             = NULL;
+  filename_prefix_  = NULL;
+  allocation_       = NULL;
+  is_on_            = false;
+}
+
+void HeapProfilerStart(const char* prefix) {
+  HeapProfiler::Lock();
+  if (HeapProfiler::filename_prefix_ == NULL) {
+    HeapProfiler::StartLocked(prefix);
+  }
+  HeapProfiler::Unlock();
+}
+
+void HeapProfilerStop() {
+  HeapProfiler::Lock();
+  if (HeapProfiler::is_on_) HeapProfiler::StopLocked();
+  HeapProfiler::Unlock();
+}
+
+//----------------------------------------------------------------------
+// Initialization/finalization code
+//----------------------------------------------------------------------
+
+// helper function for HeapProfiler::Init()
+inline static bool GlobOk(int r) {
+  return r == 0 || r == GLOB_NOMATCH;
+}
+
+// Initialization code
+void HeapProfiler::Init() {
+  if (FLAGS_mmap_profile || FLAGS_mmap_log) {
+    MallocHook::SetMmapHook(MmapHook);
+    MallocHook::SetMunmapHook(MunmapHook);
+  }
+
+  if (FLAGS_heap_profile.empty()) return;
+
+  // Cleanup any old profile files
+  string pattern = FLAGS_heap_profile + ".[0-9][0-9][0-9][0-9].heap";
+  glob_t g;
+  const int r = glob(pattern.c_str(), GLOB_ERR, NULL, &g);
+  pattern = FLAGS_heap_profile + ".*-beg.heap";
+  const int r2 = glob(pattern.c_str(), GLOB_ERR|GLOB_APPEND, NULL, &g);
+  pattern = FLAGS_heap_profile + ".*-end.heap";
+  const int r3 = glob(pattern.c_str(), GLOB_ERR|GLOB_APPEND, NULL, &g);
+  if (GlobOk(r) && GlobOk(r2) && GlobOk(r3)) {
+    const int prefix_length = FLAGS_heap_profile.size();
+    for (int i = 0; i < g.gl_pathc; i++) {
+      const char* fname = g.gl_pathv[i];
+      if ((strlen(fname) >= prefix_length) &&
+          (memcmp(fname, FLAGS_heap_profile.data(), prefix_length) == 0)) {
+        HeapProfiler::MESSAGE(0, "HeapProfiler: "
+                              "Removing old profile %s\n", fname);
+        unlink(fname);
+      }
+    }
+  }
+  globfree(&g);
+
+  HeapProfilerStart(FLAGS_heap_profile.c_str());
+}
+
+// class used for finalization -- dumps the heap-profile at program exit
+class HeapProfileEndWriter {
+ public:
+  ~HeapProfileEndWriter() {
+    HeapProfilerDump("Exiting");
+  }
+};
+
+REGISTER_MODULE_INITIALIZER(heapprofile, HeapProfiler::Init());
+static HeapProfileEndWriter heap_profile_end_writer;
diff --git a/src/internal_logging.cc b/src/internal_logging.cc
new file mode 100644
index 0000000..16b040e
--- /dev/null
+++ b/src/internal_logging.cc
@@ -0,0 +1,64 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Sanjay Ghemawat <opensource@google.com>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include "internal_logging.h"
+
+int TCMallocDebug::level;
+
+void TCMalloc_MESSAGE(const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  char buf[800];
+  vsnprintf(buf, sizeof(buf), format, ap);
+  write(STDERR_FILENO, buf, strlen(buf));
+}
+
+void TCMalloc_Printer::printf(const char* format, ...) {
+  if (left_ > 0) {
+    va_list ap;
+    va_start(ap, format);
+    const int r = vsnprintf(buf_, left_, format, ap);
+    if (r < 0) {
+      // Perhaps an old glibc that returns -1 on truncation?
+      left_ = 0;
+    } else if (r > left_) {
+      // Truncation
+      left_ = 0;
+    } else {
+      left_ -= r;
+      buf_ += r;
+    }
+  }
+}
diff --git a/src/internal_logging.h b/src/internal_logging.h
new file mode 100644
index 0000000..b5a721e
--- /dev/null
+++ b/src/internal_logging.h
@@ -0,0 +1,102 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat <opensource@google.com>
+//
+// Internal logging and related utility routines.
+
+#ifndef TCMALLOC_INTERNAL_LOGGING_H__
+#define TCMALLOC_INTERNAL_LOGGING_H__
+
+#include "google/perftools/config.h"
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+//-------------------------------------------------------------------
+// Utility routines
+//-------------------------------------------------------------------
+
+struct TCMallocDebug {
+  static int level;
+  
+  enum { kNone, kInfo, kVerbose };
+};
+
+// Safe debugging routine: we write directly to the stderr file
+// descriptor and avoid FILE buffering because that may invoke
+// malloc()
+extern void TCMalloc_MESSAGE(const char* format, ...)
+#ifdef HAVE___ATTRIBUTE__
+  __attribute__ ((__format__ (__printf__, 1, 2)))
+#endif
+;
+
+// Short form for convenience
+#define MESSAGE TCMalloc_MESSAGE
+
+// Like assert(), but executed even in NDEBUG mode
+#undef CHECK_CONDITION
+#define CHECK_CONDITION(cond)                                            \
+do {                                                                     \
+  if (!(cond)) {                                                         \
+    MESSAGE("%s:%d: assertion failed: %s\n", __FILE__, __LINE__, #cond); \
+    abort();                                                             \
+  }                                                                      \
+} while (0)
+
+// Our own version of assert() so we can avoid hanging by trying to do
+// all kinds of goofy printing while holding the malloc lock.
+#ifndef NDEBUG
+#define ASSERT(cond) CHECK_CONDITION(cond)
+#else
+#define ASSERT(cond) ((void) 0)
+#endif
+
+// Print into buffer
+class TCMalloc_Printer {
+ private:
+  char* buf_;           // Where should we write next
+  int   left_;          // Space left in buffer (including space for \0)
+
+ public:
+  // REQUIRES: "length > 0"
+  TCMalloc_Printer(char* buf, int length) : buf_(buf), left_(length) {
+    buf[0] = '\0';
+  }
+
+  void printf(const char* format, ...)
+#ifdef HAVE___ATTRIBUTE__
+    __attribute__ ((__format__ (__printf__, 2, 3)))
+#endif
+;
+};
+
+#endif  // TCMALLOC_INTERNAL_LOGGING_H__
diff --git a/src/internal_spinlock.h b/src/internal_spinlock.h
new file mode 100644
index 0000000..75e3bba
--- /dev/null
+++ b/src/internal_spinlock.h
@@ -0,0 +1,151 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat <opensource@google.com>
+
+#ifndef TCMALLOC_INTERNAL_SPINLOCK_H__
+#define TCMALLOC_INTERNAL_SPINLOCK_H__
+
+#include "google/perftools/config.h"
+#include <time.h>       /* For nanosleep() */
+#include <sched.h>      /* For sched_yield() */
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include <sys/types.h>
+#endif
+#include <stdlib.h>	/* for abort() */
+
+#if defined __i386__ && defined __GNUC__
+
+static void TCMalloc_SlowLock(volatile unsigned int* lockword);
+
+// The following is a struct so that it can be initialized at compile time
+struct TCMalloc_SpinLock {
+  volatile unsigned int private_lockword_;
+
+  inline void Init() { private_lockword_ = 0; }
+  inline void Finalize() { }
+    
+  inline void Lock() {
+    int r;
+    __asm__ __volatile__
+      ("xchgl %0, %1"
+       : "=r"(r), "=m"(private_lockword_)
+       : "0"(1), "m"(private_lockword_)
+       : "memory");
+    if (r) TCMalloc_SlowLock(&private_lockword_);
+  }
+
+  inline void Unlock() {
+    __asm__ __volatile__
+      ("movl $0, %0"
+       : "=m"(private_lockword_)
+       : "m" (private_lockword_)
+       : "memory");
+  }
+};
+
+#define SPINLOCK_INITIALIZER { 0 }
+
+static void TCMalloc_SlowLock(volatile unsigned int* lockword) {
+  sched_yield();        // Yield immediately since fast path failed
+  while (true) {
+    int r;
+    __asm__ __volatile__
+      ("xchgl %0, %1"
+       : "=r"(r), "=m"(*lockword)
+       : "0"(1), "m"(*lockword)
+       : "memory");
+    if (!r) {
+      return;
+    }
+
+    // This code was adapted from the ptmalloc2 implementation of
+    // spinlocks which would sched_yield() upto 50 times before
+    // sleeping once for a few milliseconds.  Mike Burrows suggested
+    // just doing one sched_yield() outside the loop and always
+    // sleeping after that.  This change helped a great deal on the
+    // performance of spinlocks under high contention.  A test program
+    // with 10 threads on a dual Xeon (four virtual processors) went
+    // from taking 30 seconds to 16 seconds.
+
+    // Sleep for a few milliseconds
+    struct timespec tm;
+    tm.tv_sec = 0;
+    tm.tv_nsec = 2000001;
+    nanosleep(&tm, NULL);
+  }
+}
+
+#else
+
+#include <pthread.h>
+
+// Portable version
+struct TCMalloc_SpinLock {
+  pthread_mutex_t private_lock_;
+
+  inline void Init() {
+    if (pthread_mutex_init(&private_lock_, NULL) != 0) abort();
+  }
+  inline void Finalize() {
+    if (pthread_mutex_destroy(&private_lock_) != 0) abort();
+  }
+  inline void Lock() {
+    if (pthread_mutex_lock(&private_lock_) != 0) abort();
+  }
+  inline void Unlock() {
+    if (pthread_mutex_unlock(&private_lock_) != 0) abort();
+  }
+};
+
+#define SPINLOCK_INITIALIZER { PTHREAD_MUTEX_INITIALIZER }
+
+#endif
+
+// Corresponding locker object that arranges to acquire a spinlock for
+// the duration of a C++ scope.
+class TCMalloc_SpinLockHolder {
+ private:
+  TCMalloc_SpinLock* lock_;
+ public:
+  inline explicit TCMalloc_SpinLockHolder(TCMalloc_SpinLock* l)
+    : lock_(l) { l->Lock(); }
+  inline ~TCMalloc_SpinLockHolder() { lock_->Unlock(); }
+};
+
+// Short-hands for convenient use by tcmalloc.cc
+typedef TCMalloc_SpinLock SpinLock;
+typedef TCMalloc_SpinLockHolder SpinLockHolder;
+
+#endif  // TCMALLOC_INTERNAL_SPINLOCK_H__
diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc
new file mode 100644
index 0000000..16710ee
--- /dev/null
+++ b/src/malloc_hook.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat <opensource@google.com>
+
+#include <google/malloc_hook.h>
+#include <google/perftools/basictypes.h>
+
+MallocHook::NewHook    MallocHook::new_hook_ = NULL;
+MallocHook::DeleteHook MallocHook::delete_hook_ = NULL;
+MallocHook::MmapHook   MallocHook::mmap_hook_ = NULL;
+MallocHook::MunmapHook MallocHook::munmap_hook_ = NULL;
+
+// On Linux/x86, we override mmap/munmap and provide support for
+// calling the related hooks.  
+#if defined(__i386__) && defined(__linux)
+
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+extern "C" void* mmap(void *start, size_t length,
+                      int prot, int flags, 
+                      int fd, off_t offset) __THROW {
+  // Old syscall interface cannot handle six args, so pass in an array
+  int32 args[6] = { (int32) start, length, prot, flags, fd, (off_t) offset };
+  void* result = (void *)syscall(SYS_mmap, args);
+  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
+  return result;
+}
+  
+extern "C" void* mmap64(void *start, size_t length,
+                        int prot, int flags, 
+                        int fd, __off64_t offset) __THROW {
+  // TODO: Use 64 bit mmap2 system call if kernel is new enough
+  return mmap(start, length, prot, flags, fd, static_cast<off_t>(offset));
+}
+
+extern "C" int munmap(void* start, size_t length) __THROW {
+  MallocHook::InvokeMunmapHook(start, length);
+  return syscall(SYS_munmap, start, length);
+}
+
+#endif
diff --git a/src/malloc_interface.cc b/src/malloc_interface.cc
new file mode 100644
index 0000000..eca3459
--- /dev/null
+++ b/src/malloc_interface.cc
@@ -0,0 +1,208 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat <opensource@google.com>
+
+#include "google/perftools/config.h"
+#include <assert.h>
+#include <string.h>
+#include <pthread.h>
+#include <stdio.h>
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include <sys/types.h>
+#endif
+#include <string>
+#include "google/perftools/hash_set.h"
+#include "google/malloc_interface.h"
+
+using STL_NAMESPACE::string;
+
+// Default implementation -- does nothing
+MallocInterface::~MallocInterface() { }
+bool MallocInterface::VerifyAllMemory() { return true; }
+bool MallocInterface::VerifyNewMemory(void* p) { return true; }
+bool MallocInterface::VerifyArrayNewMemory(void* p) { return true; }
+bool MallocInterface::VerifyMallocMemory(void* p) { return true; }
+
+bool MallocInterface::GetNumericProperty(const char* property, size_t* value) {
+  return false;
+}
+
+bool MallocInterface::SetNumericProperty(const char* property, size_t value) {
+  return false;
+}
+
+void MallocInterface::GetStats(char* buffer, int length) {
+  assert(length > 0);
+  buffer[0] = '\0';
+}
+
+bool MallocInterface::MallocMemoryStats(int* blocks, size_t* total,
+                                       int histogram[kMallocHistogramSize]) {
+  *blocks = 0;
+  *total = 0;
+  memset(histogram, sizeof(histogram), 0);
+  return true;
+}
+
+void** MallocInterface::ReadStackTraces() {
+  return NULL;
+}
+
+// The current malloc interface object.  We also keep a pointer to
+// the default implementation so that the heap-leak checker does not
+// complain about a memory leak.
+
+static pthread_once_t module_init = PTHREAD_ONCE_INIT;
+static MallocInterface* default_instance = NULL;
+static MallocInterface* current_instance = NULL;
+
+static void InitModule() {
+  default_instance = new MallocInterface;
+  current_instance = default_instance;
+}
+
+MallocInterface* MallocInterface::instance() {
+  pthread_once(&module_init, InitModule);
+  return current_instance;
+}
+
+void MallocInterface::Register(MallocInterface* implementation) {
+  pthread_once(&module_init, InitModule);
+  current_instance = implementation;
+}
+
+// -----------------------------------------------------------------------
+// Heap sampling support
+// -----------------------------------------------------------------------
+
+namespace {
+
+// Accessors
+uintptr_t Count(void** entry) {
+  return reinterpret_cast<uintptr_t>(entry[0]);
+}
+uintptr_t Size(void** entry) {
+  return reinterpret_cast<uintptr_t>(entry[1]);
+}
+uintptr_t Depth(void** entry) {
+  return reinterpret_cast<uintptr_t>(entry[2]);
+}
+void* PC(void** entry, int i) {
+  return entry[3+i];
+}
+
+// Hash table routines for grouping all entries with same stack trace
+struct StackTraceHash {
+  size_t operator()(void** entry) const {
+    uintptr_t h = 0;
+    for (int i = 0; i < Depth(entry); i++) {
+      uintptr_t pc = reinterpret_cast<uintptr_t>(PC(entry, i));
+      h = (h << 8) | (h >> (8*(sizeof(h)-1)));
+      h += (pc * 31) + (pc * 7) + (pc * 3);
+    }
+    return h;
+  }
+};
+
+struct StackTraceEqual {
+  bool operator()(void** entry1, void** entry2) const {
+    if (Depth(entry1) != Depth(entry2)) return false;
+    for (int i = 0; i < Depth(entry1); i++) {
+      if (PC(entry1, i) != PC(entry2, i)) {
+        return false;
+      }
+    }
+    return true;
+  }
+};
+
+typedef HASH_NAMESPACE::hash_set<void**, StackTraceHash, StackTraceEqual> StackTraceTable;
+
+void DebugStringWriter(const char* str, void* arg) {
+  string* result = reinterpret_cast<string*>(arg);
+  *result += str;
+}
+
+}
+
+void MallocInterface::GetHeapSample(string* result) {
+  void** entries = ReadStackTraces();
+  if (entries == NULL) {
+    *result += "this malloc implementation does not support sampling\n";
+    return;
+  }
+
+  // Group together all entries with same stack trace
+  StackTraceTable table;
+  int total_count = 0;
+  int total_size = 0;
+  for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) {
+    StackTraceTable::iterator iter = table.find(entry);
+    total_count += Count(entry);
+    total_size += Size(entry);
+    if (iter == table.end()) {
+      // New occurrence
+      table.insert(entry);
+    } else {
+      void** canonical = *iter;
+      canonical[0] = reinterpret_cast<void*>(Count(canonical) + Count(entry));
+      canonical[1] = reinterpret_cast<void*>(Size(canonical) +  Size(entry));
+    }
+  }
+
+  char buf[100];
+  snprintf(buf, sizeof(buf), "heap profile: %6d: %8d [%6d: %8d] @\n",
+           total_count, total_size, total_count, total_size);
+  *result += buf;
+  for (StackTraceTable::iterator iter = table.begin();
+       iter != table.end();
+       ++iter) {
+    void** entry = *iter;
+    snprintf(buf, sizeof(buf), "%6d: %8d [%6d: %8d] @",
+             int(Count(entry)), int(Size(entry)),
+             int(Count(entry)), int(Size(entry)));
+    *result += buf;
+    for (int i = 0; i < Depth(entry); i++) {
+      snprintf(buf, sizeof(buf), " %p", PC(entry, i));
+      *result += buf;
+    }
+    *result += "\n";
+  }
+
+  // TODO(menage) Get this working in google-perftools
+  //DumpAddressMap(DebugStringWriter, result);
+
+  delete[] entries;
+}
diff --git a/src/pagemap.h b/src/pagemap.h
new file mode 100644
index 0000000..6fcddd7
--- /dev/null
+++ b/src/pagemap.h
@@ -0,0 +1,181 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat <opensource@google.com>
+//
+// A data structure used by the caching malloc.  It maps from page# to
+// a pointer that contains info about that page.  We use two
+// representations: one for 32-bit addresses, and another for 64 bit
+// addresses.  Both representations provide the same interface.  The
+// first representation is implemented as a flat array, the seconds as
+// a three-level radix tree that strips away approximately 1/3rd of
+// the bits every time.
+//
+// The BITS parameter should be the number of bits required to hold
+// a page number.  E.g., with 32 bit pointers and 4K pages (i.e.,
+// page offset fits in lower 12 bits), BITS == 20.
+
+#ifndef TCMALLOC_PAGEMAP_H__
+#define TCMALLOC_PAGEMAP_H__
+
+#include "google/perftools/config.h"
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include <sys/types.h>
+#endif
+#include "internal_logging.h"
+
+// Single-level array
+template <int BITS>
+class TCMalloc_PageMap1 {
+ private:
+  void** array_;
+
+ public:
+  typedef uintptr_t Number;
+
+  explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) {
+    array_ = reinterpret_cast<void**>((*allocator)(sizeof(void*) << BITS));
+    memset(array_, 0, sizeof(void*) << BITS);
+  }
+
+  // Ensure that the map contains initialized entries "x .. x+n-1".
+  // Returns true if successful, false if we could not allocate memory.
+  bool Ensure(Number x, size_t n) {
+    // Nothing to do since flat array was allocate at start
+    return true;
+  }
+
+  // REQUIRES "k" is in range "[0,2^BITS-1]".
+  // REQUIRES "k" has been ensured before.
+  //
+  // Return the current value for KEY.  Returns "Value()" if not
+  // yet set.
+  void* get(Number k) const {
+    return array_[k];
+  }
+
+  // REQUIRES "k" is in range "[0,2^BITS-1]".
+  // REQUIRES "k" has been ensured before.
+  //
+  // Sets the value for KEY.
+  void set(Number k, void* v) {
+    array_[k] = v;
+  }
+};
+
+// Three-level radix tree
+template <int BITS>
+class TCMalloc_PageMap3 {
+ private:
+  // How many bits should we consume at each interior level
+  static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up
+  static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS;
+
+  // How many bits should we consume at leaf level
+  static const int LEAF_BITS = BITS - 2*INTERIOR_BITS;
+  static const int LEAF_LENGTH = 1 << LEAF_BITS;
+
+  // Interior node
+  struct Node {
+    Node* ptrs[INTERIOR_LENGTH];
+  };
+
+  // Leaf node
+  struct Leaf {
+    void* values[LEAF_LENGTH];
+  };
+
+  Node* root_;                          // Root of radix tree
+  void* (*allocator_)(size_t);          // Memory allocator
+
+  Node* NewNode() {
+    Node* result = reinterpret_cast<Node*>((*allocator_)(sizeof(Node)));
+    if (result != NULL) {
+      memset(result, 0, sizeof(*result));
+    }
+    return result;
+  }
+
+ public:
+  typedef uintptr_t Number;
+
+  explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) {
+    allocator_ = allocator;
+    root_ = NewNode();
+  }
+
+  void* get(Number k) const {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
+    const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
+    const Number i3 = k & (LEAF_LENGTH-1);
+    return reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3];
+  }
+
+  void set(Number k, void* v) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS);
+    const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1);
+    const Number i3 = k & (LEAF_LENGTH-1);
+    reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3] = v;
+  }
+
+  bool Ensure(Number start, size_t n) {
+    for (Number key = start; key <= start + n - 1; ) {
+      const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS);
+      const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH-1);
+
+      // Make 2nd level node if necessary
+      if (root_->ptrs[i1] == NULL) {
+        Node* n = NewNode();
+        if (n == NULL) return false;
+        root_->ptrs[i1] = n;
+      }
+
+      // Make leaf node if necessary
+      if (root_->ptrs[i1]->ptrs[i2] == NULL) {
+        Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf)));
+        if (leaf == NULL) return false;
+        memset(leaf, 0, sizeof(*leaf));
+        root_->ptrs[i1]->ptrs[i2] = reinterpret_cast<Node*>(leaf);
+      }
+
+      // Advance key past whatever is covered by this leaf node
+      key = ((key >> LEAF_BITS) + 1) << LEAF_BITS;
+    }
+    return true;
+  }
+};
+
+#endif  // TCMALLOC_PAGEMAP_H__
diff --git a/src/pprof b/src/pprof
new file mode 100755
index 0000000..1aa5544
--- /dev/null
+++ b/src/pprof
@@ -0,0 +1,1448 @@
+#! /usr/bin/perl -w
+
+# Copyright (c) 2005, Google Inc.
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Program for printing the profile generated by common/profiler.cc,
+# or by the heap profiler (common/debugallocation.cc)
+#
+# The profile contains a sequence of entries of the form:
+#	<count> <stack trace>
+# This program parses the profile, and generates user-readable
+# output.
+#
+# Examples:
+#
+# % tools/pprof "program" "profile"
+#   Generates one line per procedure
+#
+# % tools/pprof --gv "program" "profile"
+#   Generates annotated call-graph and displays via "gv"
+#
+# % tools/pprof --gv --focus=Mutex "program" "profile"
+#   Restrict to code paths that involve an entry that matches "Mutex"
+#
+# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile"
+#   Restrict to code paths that involve an entry that matches "Mutex"
+#   and does not match "string"
+#
+# % tools/pprof --list=IBF_CheckDocid "program" "profile"
+#   Generates disassembly listing of all routines with at least one
+#   sample that match the --list=<regexp> pattern.  The listing is
+#   annotated with the flat and cumulative sample counts at each line.
+#
+# % tools/pprof --disasm=IBF_CheckDocid "program" "profile"
+#   Generates disassembly listing of all routines with at least one
+#   sample that match the --disasm=<regexp> pattern.  The listing is
+#   annotated with the flat and cumulative sample counts at each PC value.
+#
+# TODO: Use color to indicate files?
+
+use strict;
+use Getopt::Long;
+
+# These are the external binaries we use.  We hard-code in the path
+# because some people have colorizing versions of these binaries which
+# can cause trouble.
+my $OBJDUMP = "/usr/bin/objdump";
+my $NM = "/usr/bin/nm";
+my $ADDR2LINE = "/usr/bin/addr2line";
+my $DOT = "dot";          # leave non-absolute, since it may be in /usr/local
+
+##### Argument parsing #####
+
+sub usage_string {
+  return <<'EOF';
+Usage: pprof [options] <program> <profile>
+   Prints specified cpu- or heap-profile 
+   
+Options:
+   --cum               Sort by cumulative data
+   --base=<base>       Subtract <base> from <profile> before display
+   
+Reporting Granularity:
+   --addresses         Report at address level
+   --lines             Report at source line level
+   --functions         Report at function level [default]
+   --files             Report at source file level
+   
+Output type:
+   --text              Generate text report [default]
+   --gv                Generate Postscript and display
+   --list=<regexp>     Generate source listing of matching routines
+   --disasm=<regexp>   Generate disassembly of matching routines
+   --dot               Generate DOT file to stdout
+   --ps                Generate Postcript to stdout
+   --pdf               Generate PDF to stdout
+   --gif               Generate GIF to stdout
+   
+Heap-Profile Options:
+   --inuse_space       Display in-use (mega)bytes [default]
+   --inuse_objects     Display in-use objects
+   --alloc_space       Display allocated (mega)bytes
+   --alloc_objects     Display allocated objects
+   --show_bytes        Display space in bytes
+   --drop_negative     Ignore negaive differences
+   
+Call-graph Options:
+   --nodecount=<n>     Show at most so many nodes [default=80]
+   --nodefraction=<f>  Hide nodes below <f>*total [default=.005]
+   --edgefraction=<f>  Hide edges below <f>*total [default=.001]
+   --focus=<regexp>    Focus on nodes matching <regexp>
+   --ignore=<regexp>   Ignore nodes matching <regexp>
+   --scale=<n>         Set GV scaling [default=0]
+   
+Examples:
+   
+pprof /bin/ls ls.prof
+                       Outputs one line per procedure
+pprof --gv /bin/ls ls.prof
+                       Displays annotated call-graph via 'gv'
+pprof --gv --focus=Mutex /bin/ls ls.prof
+                       Restricts to code paths including a .*Mutex.* entry
+pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof
+                       Code paths including Mutex but not string
+pprof --list=getdir /bin/ls ls.prof
+                       (Per-line) annotated source listing for getdir()
+pprof --disasm=getdir /bin/ls ls.prof
+                       (Per-PC) annotated disassembly for getdir()
+EOF
+}
+
+sub version_string {
+  return <<'EOF'
+pprof (part of google-perftools)
+
+Copyright (c) 2005 Google Inc.
+EOF
+}
+
+sub fatal {
+  my $msg = shift;
+  print STDERR "$msg\n\n";
+  print STDERR usage_string();
+  print STDERR "\nFATAL ERROR: $msg\n";    # just as a reminder
+  exit(1);
+}
+  
+
+$main::opt_help = 0;
+$main::opt_version = 0;
+
+$main::opt_cum = 0;
+$main::opt_base = '';
+$main::opt_addresses = 0;
+$main::opt_lines = 0;
+$main::opt_functions = 0;
+$main::opt_files = 0;
+
+$main::opt_text = 0;
+$main::opt_list = "";
+$main::opt_disasm = "";
+$main::opt_gv = 0;
+$main::opt_dot = 0;
+$main::opt_ps = 0;
+$main::opt_pdf = 0;
+$main::opt_gif = 0;
+
+$main::opt_nodecount = 80;
+$main::opt_nodefraction = 0.005;
+$main::opt_edgefraction = 0.001;
+$main::opt_focus = '';
+$main::opt_ignore = '';
+$main::opt_scale = 0;
+
+$main::opt_inuse_space   = 0;
+$main::opt_inuse_objects = 0;
+$main::opt_alloc_space   = 0;
+$main::opt_alloc_objects = 0;
+$main::opt_show_bytes    = 0;
+$main::opt_drop_negative = 0;
+
+# Are we printing a heap profile?
+$main::heap_profile = 0;
+
+GetOptions("help!"          => \$main::opt_help,
+	   "version!"       => \$main::opt_version,
+	   "cum!"           => \$main::opt_cum,
+	   "base=s"         => \$main::opt_base,
+	   "functions!"     => \$main::opt_functions,
+	   "lines!"         => \$main::opt_lines,
+	   "addresses!"     => \$main::opt_addresses,
+	   "files!"         => \$main::opt_files,
+	   "text!"          => \$main::opt_text,
+	   "list=s"         => \$main::opt_list,
+	   "disasm=s"       => \$main::opt_disasm,
+	   "gv!"            => \$main::opt_gv,
+	   "dot!"           => \$main::opt_dot,
+	   "ps!"            => \$main::opt_ps,
+	   "pdf!"           => \$main::opt_pdf,
+	   "gif!"           => \$main::opt_gif,
+	   "nodecount=i"    => \$main::opt_nodecount,
+	   "nodefraction=f" => \$main::opt_nodefraction,
+	   "edgefraction=f" => \$main::opt_edgefraction,
+	   "focus=s"        => \$main::opt_focus,
+	   "ignore=s"       => \$main::opt_ignore,
+	   "scale=i"        => \$main::opt_scale,
+	   "inuse_space!"   => \$main::opt_inuse_space,
+	   "inuse_objects!" => \$main::opt_inuse_objects,
+	   "alloc_space!"   => \$main::opt_alloc_space,
+	   "alloc_objects!" => \$main::opt_alloc_objects,
+	   "show_bytes!"    => \$main::opt_show_bytes,
+	   "drop_negative!" => \$main::opt_drop_negative,
+	   ) || fatal("Invalid option(s)");
+
+# Deal with the standard --help and --version
+if ($main::opt_help) {
+  print usage_string();
+  exit(0);
+}
+
+if ($main::opt_version) {
+  print version_string();
+  exit(0);
+}
+
+# Disassembly/listing mode requires address-level info
+if ($main::opt_disasm || $main::opt_list) {
+  $main::opt_functions = 0;
+  $main::opt_lines = 0;
+  $main::opt_addresses = 1;
+  $main::opt_files = 0;
+}
+
+# Check heap-profiling flags
+if ($main::opt_inuse_space +
+    $main::opt_inuse_objects +
+    $main::opt_alloc_space +
+    $main::opt_alloc_objects > 1) {
+  fatal("Specify at most on of --inuse/--alloc options");
+}
+
+# Check output granularities
+my $grains =
+  $main::opt_functions +
+  $main::opt_lines +
+  $main::opt_addresses +
+  $main::opt_files +
+  0;
+if ($grains > 1) {
+  fatal("Only specify one output granularity option");
+}
+if ($grains == 0) {
+  $main::opt_functions = 1;
+}
+
+# Check output modes
+my $modes =
+  $main::opt_text +
+  $main::opt_gv +
+  $main::opt_dot +
+  $main::opt_ps +
+  $main::opt_pdf +
+  $main::opt_gif +
+  0;
+if ($modes > 1) {
+  fatal("Only specify one output mode");
+}
+if ($modes == 0) {
+  $main::opt_text = 1;
+}
+
+my $prog = shift || fatal("Did not specify program");
+my $pfile = shift || fatal("Did not specify profile file");
+
+##### Main section #####
+
+# Setup tmp-file name and handler to clean it up
+$main::tmpfile_sym = "/tmp/pprof$$.sym";
+$main::tmpfile_ps = "/tmp/pprof$$.ps";
+$SIG{'INT'} = \&sighandler;
+
+# Read profile data
+my $data = ReadProfile($prog, $pfile);
+my $profile = $data->{profile};
+my $libs = $data->{libs};	# Info about main program and shared libraries
+
+# List of function names to skip
+$main::skip = ();
+if ($main::heap_profile) {
+  foreach my $name ('calloc',
+		    'cfree',
+		    'malloc',
+		    'free',
+		    'memalign',
+		    'pvalloc',
+		    'valloc',
+		    'realloc',
+		    '__builtin_delete',
+		    '__builtin_new',
+		    '__builtin_vec_delete',
+		    '__builtin_vec_new') {
+    $main::skip{$name} = 1;
+  }
+}
+
+# Subtract base from profile, if specified
+if ($main::opt_base ne '') {
+  my $base = ReadProfile($prog, $main::opt_base)->{profile};
+  $profile = SubtractProfile($profile, $base);
+}
+
+# Get total data in profile
+my $total = TotalProfile($profile);
+
+# Extract symbols
+my $symbols = ExtractSymbols($libs, $profile, $data->{pcs});
+
+# Focus?
+if ($main::opt_focus ne '') {
+  $profile = FocusProfile($symbols, $profile, $main::opt_focus);
+}
+
+# Ignore?
+if ($main::opt_ignore ne '') {
+  $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore);
+}
+
+# Reduce profiles to required output granularity, and also clean
+# each stack trace so a given entry exists at most once.
+my $reduced = ReduceProfile($symbols, $profile);
+
+# Get derived profiles
+my $flat = FlatProfile($reduced);
+my $cumulative = CumulativeProfile($reduced);
+
+# Print
+if ($main::opt_disasm) {
+  PrintDisassembly($libs, $flat, $cumulative);
+} elsif ($main::opt_list) {
+  PrintListing($libs, $flat, $cumulative);
+} elsif ($main::opt_text) {
+  PrintText($symbols, $flat, $cumulative, $total);
+} else {
+  PrintDot($prog, $symbols, $profile, $flat, $cumulative, $total);
+  if ($main::opt_gv) {
+    system("gv -scale $main::opt_scale $main::tmpfile_ps");
+  }
+}
+
+cleanup();
+exit(0);
+
+##### Output code #####
+
+# Print text output
+sub PrintText {
+  my $symbols = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $total = shift;
+
+  # Which profile to sort by?
+  my $s = $main::opt_cum ? $cumulative : $flat;
+
+  my $running_sum = 0;
+  foreach my $k (sort { GetEntry($s,$b) <=> GetEntry($s, $a) }
+		 keys(%{$cumulative})) {
+    my $f = GetEntry($flat, $k);
+    my $c = GetEntry($cumulative, $k);
+    $running_sum += $f;
+
+    my $sym = $k;
+    if (exists($symbols->{$k})) {
+      $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1];
+      if ($main::opt_addresses) {
+	$sym = $k . " " . $sym;
+      }
+    }
+
+    if ($f != 0 || $c != 0) {
+      printf("%8s %6s %6s %8s %6s %s\n",
+             Unparse($f),
+             Percent($f, $total),
+             Percent($running_sum, $total),
+             Unparse($c),
+             Percent($c, $total),
+             $sym);
+    }
+  }
+}
+
+# Print disassembly for all all routines that match $main::opt_disasm
+sub PrintDisassembly {
+  my $libs = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+
+  foreach my $lib (@{$libs}) {
+    my $symbol_table = GetProcedureBoundaries($lib->[0], $main::opt_disasm);
+    my $offset = $lib->[1] - $lib->[3];
+    foreach my $routine (keys(%{$symbol_table})) {
+      my $start_addr = $symbol_table->{$routine}->[0];
+      my $end_addr = $symbol_table->{$routine}->[1];
+      # See if there are any samples in this routine
+      my $total_flat = 0;
+      my $total_cum = 0;
+      for (my $addr = $start_addr; $addr < $end_addr; $addr++) {
+	$total_flat += GetEntry($flat, sprintf("0x%x", $addr+$offset));
+	$total_cum += GetEntry($cumulative, sprintf("0x%x", $addr+$offset));
+      }
+
+      # Skip disassembly if there are no samples in routine
+      next if ($total_cum == 0);
+
+      print "ROUTINE ====================== $routine\n";
+      printf "%6s %6s Total samples (flat / cumulative)\n",
+	Unparse($total_flat), Unparse($total_cum);
+
+      my @instructions = Disassemble($lib->[0], $offset,
+				     $start_addr, $end_addr);
+      foreach my $e (@instructions) {
+	my $location = ($e->[2] >= 0) ? "$e->[1]:$e->[2]" : "";
+	$location =~ s|.*/||;	# Remove directory portion, if any
+	if (length($location) >= 20) {
+	  # For long locations, just show the last 20 characters
+	  $location = substr($location, -20);
+	}
+	my $f = GetEntry($flat, $e->[0]);
+	my $c = GetEntry($cumulative, $e->[0]);
+	my $address = $e->[0];	$address =~ s/^0x//;
+	printf("%6s %6s %-20s %8s: %6s\n",
+	       UnparseAlt($f),
+	       UnparseAlt($c),
+	       $location,
+	       $address,
+	       $e->[3]);
+      }
+      close(OBJDUMP);
+    }
+  }
+}
+
+# Return reference to array of tuples of the form:
+#	[address, filename, linenumber, instruction]
+# E.g.,
+#	["0x806c43d", "/foo/bar.cc", 131, "ret"]
+sub Disassemble {
+  my $prog = shift;
+  my $offset = shift;
+  my $start_addr = shift;
+  my $end_addr = shift;
+
+  my $cmd = sprintf("$OBJDUMP -d -l --no-show-raw-insn " .
+		    "--start-address=%d --stop-address=%d $prog",
+		    $start_addr, $end_addr);
+  open(OBJDUMP, "$cmd |") || error("$OBJDUMP: $!\n");
+  my @result = ();
+  my $filename = "";
+  my $linenumber = -1;
+  while (<OBJDUMP>) {
+    chop;
+    if (m|\s*([^:\s]+):(\d+)\s*$|) {
+      # Location line of the form:
+      #   <filename>:<linenumber>
+      $filename = $1;
+      $linenumber = $2;
+    } elsif (m/^ +([0-9a-f]+):\s*(.*)/) {
+      # Disassembly line
+      my $k = sprintf("0x%x", hex($1) + $offset);
+      push(@result, [$k, $filename, $linenumber, $2]);
+    }
+  }
+  close(OBJDUMP);
+  return @result;
+}
+
+# For sorting functions by name
+sub ByName {
+  return ShortFunctionName($a) cmp ShortFunctionName($b);
+}
+
+# Print source-listing for all all routines that match $main::opt_list
+sub PrintListing {
+  my $libs = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+
+  foreach my $lib (@{$libs}) {
+    my $symbol_table = GetProcedureBoundaries($lib->[0], $main::opt_list);
+    my $offset = $lib->[1] - $lib->[3];
+    foreach my $routine (sort ByName keys(%{$symbol_table})) {
+      # Print if there are any samples in this routine
+      my $start_addr = $symbol_table->{$routine}->[0];
+      my $end_addr = $symbol_table->{$routine}->[1];
+      for (my $addr = $start_addr; $addr < $end_addr; $addr++) {
+	if (defined($cumulative->{sprintf("0x%x", $addr+$offset)})) {
+	  PrintSource($lib->[0], $offset,
+		      $routine, $flat, $cumulative,
+		      $start_addr, $end_addr);
+	  last;
+	}
+      }
+    }
+  }
+}
+
+# Print source-listing for one routine
+sub PrintSource {
+  my $prog = shift;
+  my $offset = shift;
+  my $routine = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $start_addr = shift;
+  my $end_addr = shift;
+
+  # Disassemble all instructions (just to get line numbers)
+  my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr);
+
+  # Hack 1: assume that the last source location mentioned in the
+  # disassembly is the end of the source code.
+  my $filename = undef;
+  my $lastline = -1;
+  for (my $i = $#instructions; $i >= 0; $i--) {
+    if ($instructions[$i]->[2] >= 0) {
+      $filename = $instructions[$i]->[1];
+      $lastline = $instructions[$i]->[2];
+      last;
+    }
+  }
+  if (!defined($filename)) {
+    print STDERR "no filename found in $routine\n";
+    return;
+  }
+
+  # Hack 2: assume the first source location from "filename" is the start of
+  # the source code.
+  my $firstline = 1;
+  for (my $i = 0; $i <= $#instructions; $i++) {
+    if ($instructions[$i]->[1] eq $filename) {
+      $firstline = $instructions[$i]->[2];
+      last;
+    }
+  }
+
+  # Assign all samples to the range $firstline,$lastline,
+  # Hack 3: If an instruction does not occur in the range, its samples
+  # are moved to the next instruction that occurs in the range.
+  my $samples1 = {};
+  my $samples2 = {};
+  my $running1 = 0;	# Unassigned flat counts
+  my $running2 = 0;	# Unassigned cumulative counts
+  my $total1 = 0;	# Total flat counts
+  my $total2 = 0;	# Total cumulative counts
+  foreach my $e (@instructions) {
+    my $c1 = GetEntry($flat, $e->[0]);
+    my $c2 = GetEntry($cumulative, $e->[0]);
+    $running1 += $c1;
+    $running2 += $c2;
+    $total1 += $c1;
+    $total2 += $c2;
+    my $file = $e->[1];
+    my $line = $e->[2];
+    if (($file eq $filename) &&
+        ($line >= $firstline) &&
+        ($line <= $lastline)) {
+      # Assign all accumulated samples to this line
+      AddEntry($samples1, $line, $running1);
+      AddEntry($samples2, $line, $running2);
+      $running1 = 0;
+      $running2 = 0;
+    }
+  }
+
+  # Assign any leftover samples to $lastline
+  AddEntry($samples1, $lastline, $running1);
+  AddEntry($samples2, $lastline, $running2);
+
+  printf("ROUTINE ====================== %s in %s\n" .
+	 "%6s %6s Total %s (flat / cumulative)\n",
+	 ShortFunctionName($routine),
+	 $filename,
+	 Units(),
+	 Unparse($total1),
+	 Unparse($total2));
+  if (!open(FILE, "<$filename")) {
+    print STDERR "$filename: $!\n";
+    return;
+  }
+  my $l = 0;
+  while (<FILE>) {
+    $l++;
+    if ($l >= $firstline - 5 && $l <= $lastline + 5) {
+      chop;
+      my $text = $_;
+      printf("%6s %6s %4d: %s\n",
+	     UnparseAlt(GetEntry($samples1, $l)),
+	     UnparseAlt(GetEntry($samples2, $l)),
+	     $l,
+	     $text);
+    };
+  }
+  close(FILE);
+}
+
+# Print DOT graph
+sub PrintDot {
+  my $prog = shift;
+  my $symbols = shift;
+  my $raw = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $overall_total = shift;
+
+  # Get total
+  my $local_total = TotalProfile($flat);
+  my $nodelimit = int($main::opt_nodefraction * $local_total);
+  my $edgelimit = int($main::opt_edgefraction * $local_total);
+  my $nodecount = $main::opt_nodecount;
+
+  # Find nodes to include
+  my @list = (sort { abs(GetEntry($cumulative, $b)) <=>
+                     abs(GetEntry($cumulative, $a)) }
+	      keys(%{$cumulative}));
+  my $last = $nodecount - 1;
+  if ($last > $#list) {
+    $last = $#list;
+  }
+  while (($last >= 0) &&
+         (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) {
+    $last--;
+  }
+  if ($last < 0) {
+    print STDERR "No nodes to print\n";
+    exit(1);
+  }
+
+  printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n",
+		 Unparse($nodelimit), Units(),
+		 Unparse($edgelimit), Units());
+
+  # Open DOT output file
+  my $output;
+  if ($main::opt_gv) {
+    $output = "| $DOT -Tps >$main::tmpfile_ps";
+  } elsif ($main::opt_ps) {
+    $output = "| $DOT -Tps";
+  } elsif ($main::opt_pdf) {
+    $output = "| $DOT -Tps | ps2pdf - -";
+  } elsif ($main::opt_gif) {
+    $output = "| $DOT -Tgif";
+  } else {
+    $output = ">&STDOUT";
+  }
+  open(DOT, $output) || error("$output: $!\n");
+
+  # Title
+  printf DOT ("digraph \"%s; %s %s\" {\n",
+	      $prog,
+	      Unparse($overall_total),
+	      Units());
+  if ($main::opt_pdf) {
+    # The output is more printable if we set the page size for dot. 
+    printf DOT ("size=\"8,11\"\n");
+  }   
+  printf DOT ("node [width=0.375,height=0.25];\n");
+
+  # Print legend
+  printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," .
+	      "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n",
+	      $prog,
+	      sprintf("Total %s: %s", Units(), Unparse($overall_total)),
+	      sprintf("Focusing on: %s", Unparse($local_total)),
+	      sprintf("Dropped nodes with <= %s abs(%s)",
+		      Unparse($nodelimit), Units()),
+	      sprintf("Dropped edges with <= %s %s",
+		      Unparse($edgelimit), Units())
+	      );
+
+  # Print nodes
+  my %node = ();
+  my $nextnode = 1;
+  foreach my $a (@list[0..$last]) {
+    # Pick font size
+    my $f = GetEntry($flat, $a);
+    my $c = GetEntry($cumulative, $a);
+
+    my $fs = 8;
+    if ($local_total > 0) {
+      $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total)));
+    }
+
+    $node{$a} = $nextnode++;
+    my $sym = $a;
+    $sym =~ s/\s+/\\n/g;
+    $sym =~ s/::/\\n/g;
+
+    # Extra cumulative info to print for non-leaves
+    my $extra = "";
+    if ($f != $c) {
+      $extra = sprintf("\\rof %s (%s)",
+                       Unparse($c),
+                       Percent($c, $overall_total));
+    }
+    printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" .
+                "\",shape=box,fontsize=%.1f];\n",
+                $node{$a},
+                $sym,
+                Unparse($f),
+                Percent($f, $overall_total),
+                $extra,
+                $fs,
+               );
+  }
+
+  # Get edges and counts per edge
+  my %edge = ();
+  foreach my $k (keys(%{$raw})) {
+    # TODO: omit low %age edges
+    my $n = $raw->{$k};
+    my @addrs = split(/\n/, $k);
+    for (my $i = 1; $i <= $#addrs; $i++) {
+      my $src = OutputKey($symbols, $addrs[$i]);
+      my $dst = OutputKey($symbols, $addrs[$i-1]);
+      #next if ($src eq $dst);	# Avoid self-edges?
+      if (exists($node{$src}) && exists($node{$dst})) {
+	my $e = "$src\001$dst";
+	if (!exists($edge{$e})) {
+	  $edge{$e} = 0;
+	}
+	$edge{$e} += $n;
+      }
+    }
+  }
+
+  # Print edges
+  foreach my $e (keys(%edge)) {
+    my @x = split(/\001/, $e);
+    my $n = $edge{$e};
+
+    if (abs($n) > $edgelimit) {
+      # Compute line width based on edge count
+      my $fraction = $local_total ? (3 * ($n / $local_total)) : 0;
+      if ($fraction > 1) { $fraction = 1; }
+      my $w = $fraction * 2;
+      #if ($w < 1) { $w = 1; }
+
+      # Use a slightly squashed function of the edge count as the weight
+      printf DOT ("N%s -> N%s [label=%s, weight=%d, " .
+		  "style=\"setlinewidth(%f)\"];\n", 
+		  $node{$x[0]},
+		  $node{$x[1]},
+		  Unparse($n),
+		  int($n ** 0.7),
+		  $w);
+    }
+  }
+
+  print DOT ("}\n");
+
+  close(DOT);
+}
+
+# Generate the key under which a given address should be counted
+# based on the user-specified output granularity.
+sub OutputKey {
+  my $symbols = shift;
+  my $a = shift;
+
+  # Skip large addresses since they sometimes show up as fake entries on RH9
+  if (hex($a) > 0x7fffffff) {
+    return '';
+  }
+
+  # Extract symbolic info for address
+  my $func = $a;
+  my $fullfunc = $a;
+  my $fileline = "";
+  if (exists($symbols->{$a})) {
+    $func = $symbols->{$a}->[0];
+    $fullfunc = $symbols->{$a}->[2];
+    $fileline = $symbols->{$a}->[1];
+  }
+
+  # We drop a few well-known names
+  if ($main::skip{$func}) {
+    return '';
+  }
+
+  if ($main::opt_disasm || $main::opt_list) {
+    return $a;   # We want just the address for the key
+  } elsif ($main::opt_addresses) {
+    return "$a $func $fileline";
+  } elsif ($main::opt_lines) {
+    return "$func $fileline";
+  } elsif ($main::opt_functions) {
+    return $func;
+  } elsif ($main::opt_files) {
+    my $f = ($fileline eq '') ? $a : $fileline;
+    $f =~ s/:\d+$//;
+    return $f;
+  } else {
+    return $a;
+  }
+}
+
+# Generate percent string for a number and a total
+sub Percent {
+  my $num = shift;
+  my $tot = shift;
+  if ($tot != 0) {
+    return sprintf("%.1f%%", $num * 100.0 / $tot);
+  } else {
+    return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf");
+  }
+}
+
+# Generate pretty-printed form of number
+sub Unparse {
+  my $num = shift;
+  if ($main::heap_profile) {
+    if ($main::opt_inuse_objects || $main::opt_alloc_objects) {
+      return sprintf("%d", $num);
+    } else {
+      if ($main::opt_show_bytes) {
+        return sprintf("%d", $num);
+      } else {
+        return sprintf("%.1f", $num / 1048576.0);
+      }
+    }
+  } else {
+    return sprintf("%d", $num);
+  }
+}
+
+# Alternate pretty-printed form: 0 maps to "."
+sub UnparseAlt {
+  my $num = shift;
+  if ($num == 0) {
+    return ".";
+  } else {
+    return Unparse($num);
+  }
+}
+
+# Return output units
+sub Units {
+  if ($main::heap_profile) {
+    if ($main::opt_inuse_objects || $main::opt_alloc_objects) {
+      return "objects";
+    } else {
+      if ($main::opt_show_bytes) {
+        return "B";
+      } else {
+        return "MB";
+      }
+    }
+  } else {
+    return "samples";
+  }
+}
+
+##### Profile manipulation code #####
+
+# Generate flattened profile:
+# If count is charged to stack [a,b,c,d], in generated profile,
+# it will be charged to [a]
+sub FlatProfile {
+  my $profile = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    AddEntry($result, $addrs[0], $count);
+  }
+  return $result;
+}
+
+# Generate cumulative profile:
+# If count is charged to stack [a,b,c,d], in generated profile,
+# it will be charged to [a], [b], [c], [d]
+sub CumulativeProfile {
+  my $profile = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    foreach my $a (@addrs) {
+      AddEntry($result, $a, $count);
+    }
+  }
+  return $result;
+}
+
+# Reduce profile to granularity given by user
+sub ReduceProfile {
+  my $symbols = shift;
+  my $profile = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    my @path = ();
+    my %seen = ();
+    $seen{''} = 1;	# So that empty keys are skipped
+    foreach my $a (@addrs) {
+      # To avoid double-counting due to recursion, skip a stack-trace
+      # entry if it has already been seen
+      my $key = OutputKey($symbols, $a);
+      if (!$seen{$key}) {
+	$seen{$key} = 1;
+	push(@path, $key);
+      }
+    }
+    my $reduced_path = join("\n", @path);
+    AddEntry($result, $reduced_path, $count);
+  }
+  return $result;
+}
+
+# Focus only on paths involving specified regexps
+sub FocusProfile {
+  my $symbols = shift;
+  my $profile = shift;
+  my $focus = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    foreach my $a (@addrs) {
+      # Reply if it matches either the address/shortname/fileline
+      if (($a =~ m/$focus/o) ||
+	  (exists($symbols->{$a}) &&
+	   (($symbols->{$a}->[0] =~ m/$focus/o) ||
+	    ($symbols->{$a}->[1] =~ m/$focus/o)))) {
+	AddEntry($result, $k, $count);
+	last;
+      }
+    }
+  }
+  return $result;
+}
+
+# Focus only on paths not involving specified regexps
+sub IgnoreProfile {
+  my $symbols = shift;
+  my $profile = shift;
+  my $ignore = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    my $matched = 0;
+    foreach my $a (@addrs) {
+      # Reply if it matches either the address/shortname/fileline
+      if (($a =~ m/$ignore/o) ||
+	  (exists($symbols->{$a}) &&
+	   (($symbols->{$a}->[0] =~ m/$ignore/o) ||
+	    ($symbols->{$a}->[1] =~ m/$ignore/o)))) {
+	$matched = 1;
+	last;
+      }
+    }
+    if (!$matched) {
+      AddEntry($result, $k, $count);
+    }
+  }
+  return $result;
+}
+
+# Get total count in profile
+sub TotalProfile {
+  my $profile = shift;
+  my $result = 0;
+  foreach my $k (keys(%{$profile})) {
+    $result += $profile->{$k};
+  }
+  return $result;
+}
+
+# Subtract B from A
+sub SubtractProfile {
+  my $A = shift;
+  my $B = shift;
+
+  my $R = {};
+  foreach my $k (keys(%{$A})) {
+    my $v = $A->{$k} - GetEntry($B, $k);
+    if ($v < 0 && $main::opt_drop_negative) {
+      $v = 0;
+    }
+    AddEntry($R, $k, $v);
+  }
+  if (!$main::opt_drop_negative) {
+    # take care of when substracted profile has more things
+    foreach my $k (keys(%{$B})) {
+      if (!exists($A->{$k})) {
+        AddEntry($R, $k, 0 - $B->{$k});
+      }
+    }
+  }
+  return $R;
+}
+
+# Get entry from profile; zero if not present
+sub GetEntry {
+  my $profile = shift;
+  my $k = shift;
+  if (exists($profile->{$k})) {
+    return $profile->{$k};
+  } else {
+    return 0;
+  }
+}
+
+# Add entry to specified profile
+sub AddEntry {
+  my $profile = shift;
+  my $k = shift;
+  my $n = shift;
+  if (!exists($profile->{$k})) {
+    $profile->{$k} = 0;
+  }
+  $profile->{$k} += $n;
+}
+
+##### Parsing code #####
+
+# Parse profile generated by common/profiler.cc and return a reference
+# to a map:
+#      $result->{version}     Version number of profile file
+#      $result->{period}      Sampling period (in microseconds)
+#      $result->{profile}     Profile object
+#      $result->{map}         Memory map info from profile
+#      $result->{pcs}         List of all PC values seen
+sub ReadProfile {
+  my $prog = shift;
+  my $fname = shift;
+
+  # Look at first line to see if it is a heap or a CPU profile
+  open(PROFILE, "<$fname") || error("$fname: $!\n");
+  binmode PROFILE;	# New perls do UTF-8 processing
+  my $header = <PROFILE>;
+  if ($header =~ m/^heap profile:/) {
+    $main::heap_profile = 1;
+    return ReadHeapProfile($prog, $fname);
+  } else {
+    # Need to unread the line we just read
+    close(PROFILE);
+    open(PROFILE, "<$fname") || error("$fname: $!\n");
+    binmode PROFILE;	# New perls do UTF-8 processing
+    $main::heap_profile = 0;
+    return ReadCPUProfile($prog, $fname);
+  }
+}
+
+# CPU profile reader
+sub ReadCPUProfile {
+  my $prog = shift;
+  my $fname = shift;
+
+  # Read entire profile into a string
+  my $str;
+  my $nbytes = read(PROFILE, $str, 100000000);
+  close(PROFILE);
+
+  # Parse string into array of slots.
+  # L! is needed for 64-bit # platforms, but not supported on 5.005
+  # (despite the manpage claims)
+
+  my $format;
+  if ($] >= 5.008) {
+      $format = "L!*";
+  } else {
+      $format = "L*";
+  }
+
+  my @slots = unpack($format, $str);
+
+  # Read header
+  if ($#slots < 1 || $slots[0] != 0 || $slots[1] < 3) {
+    error("$fname: not a profile file, or old format profile file\n");
+  }
+  my $version = $slots[2];
+  my $period = $slots[3];
+  my $i = 2 + $slots[1];
+
+  # Parse profile
+  my $profile = {};
+  my $pcs = {};
+  while ($i <= $#slots) {
+    my $n = $slots[$i++];
+    my $d = $slots[$i++];
+    if ($slots[$i] == 0) {
+      # End of profile data marker
+      $i += $d;
+      last;
+    }
+
+    # Make key out of the stack entries
+    my $k = "";
+    for (my $j = 0; $j < $d; $j++) {
+      my $pc = $slots[$i+$j];
+      $pcs->{$pc} = 1;
+      $k .= sprintf("\n0x%x", $pc);
+    }
+    $k =~ s/^\n//;
+
+    AddEntry($profile, $k, $n);
+    $i += $d;
+  }
+
+  # Parse map
+  my $map = substr($str, $i * 4);
+
+  my $r = {};
+  $r->{version} = $version;
+  $r->{period} = $period;
+  $r->{profile} = $profile;
+  $r->{libs} = ParseLibraries($prog, $map, $pcs);
+  $r->{pcs} = $pcs;
+
+  return $r;
+}
+
+sub ReadHeapProfile {
+  my $prog = shift;
+  my $fname = shift;
+
+  my $index = 1;
+  if ($main::opt_inuse_space) {
+    $index = 1;
+  } elsif ($main::opt_inuse_objects) {
+    $index = 0;
+  } elsif ($main::opt_alloc_space) {
+    $index = 3;
+  } elsif ($main::opt_alloc_objects) {
+    $index = 2;
+  }
+
+  my $profile = {};
+  my $pcs = {};
+  my $map = "";
+  while (<PROFILE>) {
+    if (/^MAPPED_LIBRARIES:/) {
+      # Read the /proc/self/maps data
+      while (<PROFILE>) {
+	$map .= $_;
+      }
+      last;
+    }
+
+    if (/^--- Memory map:/) {
+      # Read /proc/self/maps data as formatted by DumpAddressMap()
+      while (<PROFILE>) {
+	$map .= $_;
+      }
+      last;
+    }
+
+    # Read entry of the form:
+    #  <count1>: <bytes1> [<count2>: <bytes2>] @ a1 a2 a3 ... an
+    s/^\s*//;
+    s/\s*$//;
+    if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) {
+      my $stack = $5;
+      my @counts = ($1, $2, $3, $4);
+      my $n = $counts[$index];
+      my $k = "";
+      foreach my $e (split(/\s+/, $stack)) {
+        my $pc = hex($e);
+        $pcs->{$pc} = 1;
+        $k .= sprintf("\n0x%x", $pc);
+      }
+      $k =~ s/^\n//;
+      AddEntry($profile, $k, $n);
+    }
+  }
+
+  my $r = {};
+  $r->{version} = "heap";
+  $r->{period} = 1;
+  $r->{profile} = $profile;
+  $r->{libs} = ParseLibraries($prog, $map, $pcs);
+  $r->{pcs} = $pcs;
+  return $r;
+}
+
+##### Symbol extraction #####
+
+# Split /proc/pid/maps dump into a list of libraries
+sub ParseLibraries {
+  my $prog = shift;
+  my $map = shift;
+  my $pcs = shift;
+
+  my $result = [];
+  my $h = "[a-f0-9]+";
+  foreach my $l (split("\n", $map)) {
+    my $start;
+    my $finish;
+    my $offset;
+    my $lib;
+    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.so(\.\d+)*)/) {
+      # Full line from /proc/self/maps.  Example:
+      #   40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
+      $start = hex($1);
+      $finish = hex($2);
+      $offset = hex($3);
+      $lib = $4;
+    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
+      # Cooked line from DumpAddressMap.  Example:
+      #	  40000000-40015000: /lib/ld-2.3.2.so
+      $start = hex($1);
+      $finish = hex($2);
+      $offset = 0;
+      $lib = $3;
+    } else {
+      next;
+    }
+
+    # Get objdump output from the library file to figure out how to
+    # map between mapped addresses and addresses in the library.
+    open(OBJDUMP, "$OBJDUMP -h $lib |") || error("$OBJDUMP $lib: $!\n");
+    while (<OBJDUMP>) {
+      # Idx Name          Size      VMA       LMA       File off  Algn
+      #  10 .text         00104b2c  420156f0  420156f0  000156f0  2**4
+      my @x = split;
+      if (($#x >= 6) && ($x[1] eq '.text')) {
+	my $vma = hex($x[3]);
+	my $file_offset = hex($x[5]);
+	$offset += $vma - $file_offset;
+	last;
+      }
+    }
+    close(OBJDUMP);
+
+    push(@{$result}, [$lib, $start, $finish, $offset]);
+  }
+
+  # Append special entry for the main program
+  my $max_pc = 0;
+  foreach my $pc (keys(%{$pcs})) {
+    if ($pc > $max_pc) { $max_pc = $pc; }
+  }
+  push(@{$result}, [$prog, 0, $max_pc, 0]);
+
+  return $result;
+}
+
+# Extract symbols for all PC values found in profile
+sub ExtractSymbols {
+  my $libs = shift;
+  my $profile = shift;
+  my $pcset = shift;
+
+  my $symbols = {};
+
+  # Map each PC value to the containing library
+  my %seen = ();
+  foreach my $lib (@{$libs}) {
+    my $libname = $lib->[0];
+    my $start = $lib->[1];
+    my $finish = $lib->[2];
+    my $offset = $lib->[3];
+
+    # Get list of pcs that belong in this library.
+    my $contained = [];
+    foreach my $pc (keys(%{$pcset})) {
+      if (!$seen{$pc} && ($pc >= $start) && ($pc <= $finish)) {
+	$seen{$pc} = 1;
+	push(@{$contained}, $pc);
+      }
+    }
+    # Map to symbols
+    MapToSymbols($libname, $start - $offset, $contained, $symbols);
+  }
+
+  return $symbols;
+}
+
+# Map list of PC values to symbols for a given image
+sub MapToSymbols {
+  my $image = shift;
+  my $offset = shift;
+  my $pclist = shift;
+  my $symbols = shift;
+
+  # Ignore empty binaries
+  if ($#{$pclist} < 0) { return; }
+
+  MapSymbolsWithNM($image, $offset, $pclist, $symbols);
+  if ($main::opt_lines || $main::opt_files || $main::opt_list) {
+    GetLineNumbers($image, $offset, $pclist, $symbols);
+  }
+}
+
+sub GetLineNumbers {
+  my $image = shift;
+  my $offset = shift;
+  my $pclist = shift;
+  my $symbols = shift;
+
+  # Make file with all PC values
+  open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n");
+  for (my $i = 0; $i <= $#{$pclist}; $i++) {
+    printf ADDRESSES ("0x%x\n", $pclist->[$i] - $offset);
+  }
+  close(ADDRESSES);
+
+  # Pass to addr2line
+  open(SYMBOLS, "$ADDR2LINE -f -C -e $image <$main::tmpfile_sym |")
+    || error("$ADDR2LINE: $!\n");
+  my $count = 0;
+  while (<SYMBOLS>) {
+    chop;
+    my $fullfunction = $_;
+
+    $_ = <SYMBOLS>;
+    chop;
+    my $filelinenum = $_;
+    if (!$main::opt_list) {
+      $filelinenum =~ s|^.*/([^/]+:\d+)$|$1|;    # Remove directory name
+    }
+
+    my $pcstr = sprintf("0x%x", $pclist->[$count]);
+    if (defined($symbols->{$pcstr})) {
+      # Override just the line-number portion.  The function name portion
+      # is less buggy when computed using nm instead of addr2line.
+      $symbols->{$pcstr}->[1] = $filelinenum;
+    } else {
+      my $function = ShortFunctionName($fullfunction);
+      $symbols->{$pcstr} = [$function, $filelinenum, $fullfunction];
+    }
+    $count++;
+  }
+  close(SYMBOLS);
+}
+
+# Alternate implementation
+sub MapSymbolsWithNM {
+  my $image = shift;
+  my $offset = shift;
+  my $pclist = shift;
+  my $symbols = shift;
+
+  # Get nm output sorted by increasing address
+  my $symbol_table = GetProcedureBoundaries($image, ".");
+  my @names = sort { $symbol_table->{$a}->[0] <=> $symbol_table->{$b}->[0] }
+    keys(%{$symbol_table});
+
+  if ($#names < 0) {
+    # No symbols: just use address
+    foreach my $pc (@{$pclist}) {
+      my $pcstr = sprintf("0x%x", $pc);
+      $symbols->{$pcstr} = [$pcstr, "?", $pcstr];
+    }
+    return;
+  }
+
+  # Sort addresses so we can do a join against nm output
+  my $index = 0;
+  my $fullname = $names[0];
+  my $name = ShortFunctionName($fullname);
+  foreach my $pc (sort { $a <=> $b } @{$pclist}) {
+    # Adjust for mapped offset
+    my $mpc = $pc - $offset;
+    while (($index < $#names) && ($mpc >= $symbol_table->{$fullname}->[1])){
+      $index++;
+      $fullname = $names[$index];
+      $name = ShortFunctionName($fullname);
+    }
+    my $pcstr = sprintf("0x%x", $pc);
+    $symbols->{$pcstr} = [$name, "?", $fullname];
+  }
+}
+
+sub ShortFunctionName {
+  my $function = shift;
+  while ($function =~ s/\([^()]*\)//g) { }   # Remove argument types
+  while ($function =~ s/<[^<>]*>//g)  { }    # Remove template arguments
+  $function =~ s/^.*\s+(\w+::)/$1/;	     # Remove leading type
+  return $function;
+}
+
+##### Miscellaneous #####
+
+sub cleanup {
+  unlink($main::tmpfile_sym);
+  unlink($main::tmpfile_ps);
+}
+
+sub sighandler {
+  cleanup();
+  exit(1);
+}
+
+sub error {
+  my $msg = shift;
+  print STDERR $msg;
+  cleanup();
+  exit(1);
+}
+
+# Return a list of all routines that match $regexp.
+# For each routine, the following list is returned:
+#	$result->[i]->[0]	Routine name
+#	$result->[i]->[1]	Start address
+#	$result->[i]->[2]	Finish address
+#	$result->[i]->[3]	Image file name (program or shared library)
+#	$result->[i]->[4]	Offset for image in address space
+sub GetMatchingRoutines {
+}
+
+
+# Gets the procedure boundaries for all routines in "$image" whose names
+# match "$regexp" and returns them in a hashtable mapping from procedure
+# name to a two-element vector of [start address, end address]
+sub GetProcedureBoundaries {
+  my $image = shift;
+  my $regexp = shift;
+
+  my $symbol_table = {};
+  open(NM, "$NM -C -n $image |") || error("$NM: $!\n");
+  my $last_start = "0x0";
+  my $routine = "";
+  while (<NM>) {
+    if (m/^([0-9a-f]+) . (..*)/) {
+      my $start_val = $1;
+      my $this_routine = $2;
+      if (defined($routine) && $routine =~ m/$regexp/) {
+	$symbol_table->{$routine} = [hex($last_start), hex($start_val)];
+      }
+      $last_start = $start_val;
+      $routine = $this_routine;
+    }
+  }
+  close(NM);
+
+  return $symbol_table;
+}
diff --git a/src/profiler.cc b/src/profiler.cc
new file mode 100644
index 0000000..1622795
--- /dev/null
+++ b/src/profiler.cc
@@ -0,0 +1,577 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Profile current program by sampling stack-trace every so often
+
+#include "google/perftools/config.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>                 // for getuid() and geteuid()
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include <sys/types.h>
+#endif
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <string.h>
+#include <fcntl.h>
+#include "google/profiler.h"
+#include "google/stacktrace.h"
+#include "base/commandlineflags.h"
+#include "base/googleinit.h"
+#ifdef HAVE_CONFLICT_SIGNAL_H
+#include "conflict-signal.h"          /* used on msvc machines */
+#endif
+#include "base/logging.h"
+
+#if HAVE_PTHREAD
+#  include <pthread.h>
+#  define LOCK(m) pthread_mutex_lock(m)
+#  define UNLOCK(m) pthread_mutex_unlock(m)
+// Macro for easily checking return values from pthread routines
+#  define PCALL(f) do { int __r = f;  if (__r != 0) { fprintf(stderr, "%s: %s\n", #f, strerror(__r)); abort(); } } while (0)
+#else
+#  define LOCK(m)
+#  define UNLOCK(m)
+#  define PCALL(f)
+#endif
+
+// For now, keep logging as a noop.  TODO: do something better?
+#undef LOG
+#define LOG(msg)
+
+DEFINE_string(cpu_profile, "",
+              "Profile file name (used if CPUPROFILE env var not specified)");
+
+// Figure out how to get the PC for our architecture
+#if defined HAVE_STRUCT_SIGINFO_SI_FADDR
+typedef struct siginfo SigStructure;
+inline void* GetPC(const SigStructure& sig_structure ) {
+  return (void*)sig_structure.si_faddr; // maybe not correct
+} 
+
+#elif defined HAVE_STRUCT_SIGCONTEXT_SC_EIP
+typedef struct sigcontext SigStructure;
+inline void* GetPC(const SigStructure& sig_structure ) {
+  return (void*)sig_structure.sc_eip;
+}
+
+#elif defined HAVE_STRUCT_SIGCONTEXT_EIP
+typedef struct sigcontext SigStructure;
+inline void* GetPC(const SigStructure& sig_structure ) {
+  return (void*)sig_structure.eip;
+}
+
+#elif defined HAVE_STRUCT_SIGCONTEXT_SC_IP
+typedef struct sigcontext SigStructure;
+inline void* GetPC(const SigStructure& sig_structure ) {
+  return (void*)sig_structure.ip;
+}
+
+#elif defined HAVE_STRUCT_UCONTEXT_UC_MCONTEXT
+typedef struct ucontext SigStructure;
+inline void* GetPC(const SigStructure& sig_structure ) {
+  return (void*)sig_structure.uc_mcontext.gregs[REG_RIP];
+
+#else
+#error I dont know what your PC is
+
+#endif
+
+
+// Collects up all profile data
+class ProfileData {
+ public:
+  ProfileData();
+  ~ProfileData();
+
+  // Is profiling turned on at all
+  inline bool enabled() { return out_ >= 0; }
+    
+  // Should we automatically profile all threads
+  inline bool profile_all() { return (out_ >= 0) && profile_all_; }
+
+  // What is the frequency of interrupts (ticks per second)
+  inline int frequency() { return frequency_; }
+
+  // Record an interrupt at "pc"
+  void Add(unsigned long pc);
+
+  void FlushTable();
+
+  // Start profiler to write profile info into fname
+  bool Start(const char* fname);
+  // Stop profiling and flush the data
+  void Stop();
+  
+ private:
+  static const int kMaxStackDepth = 64;         // Max stack depth profiled
+  static const int kMaxFrequency = 4000;        // Largest allowed frequency
+  static const int kDefaultFrequency = 100;     // Default frequency
+  static const int kAssociativity = 4;          // For hashtable
+  static const int kBuckets = 1 << 10;          // For hashtable
+  static const int kBufferLength = 1 << 18;     // For eviction buffer
+
+  // Type of slots: each slot can be either a count, or a PC value
+  typedef uintptr_t Slot;
+
+  // Hash-table/eviction-buffer entry
+  struct Entry {
+    Slot count;                 // Number of hits
+    Slot depth;                 // Stack depth
+    Slot stack[kMaxStackDepth]; // Stack contents
+  };
+
+  // Hash table bucket
+  struct Bucket {
+    Entry entry[kAssociativity];
+  };
+
+#ifdef HAVE_PTHREAD
+  pthread_mutex_t lock_;        // Cannot use "Mutex" in signal handlers
+  pthread_mutex_t flush_lock_;  // Acquired during explicit flushes
+#endif
+  Bucket*       hash_;          // hash table
+  
+  Slot*         evict_;         // evicted entries
+  int           num_evicted_;   // how many evicted entries?
+  int           out_;           // fd for output file
+  bool          profile_all_;   // profile all threads automatically?
+  int           count_;         // How many interrupts recorded
+  int           evictions_;     // How many evictions
+  size_t        total_bytes_;   // How much output
+  char*         fname_;         // Profile file name
+  int           frequency_;     // Interrupts per second
+
+  // Add "pc -> count" to eviction buffer
+  void Evict(const Entry& entry);
+
+  // Write contents of eviction buffer to disk
+  void FlushEvicted();
+
+  // Handler that records the interrupted pc in the profile data
+  static void prof_handler(int sig, SigStructure sig_structure );
+
+  // Sets the timer interrupt signal handler to the specified routine
+  static void SetHandler(void (*handler)(int));
+};
+
+// Evict the specified entry to the evicted-entry buffer
+inline void ProfileData::Evict(const Entry& entry) {
+  const int d = entry.depth;
+  const int nslots = d + 2;     // Number of slots needed in eviction buffer
+  if (num_evicted_ + nslots > kBufferLength) {
+    FlushEvicted();
+    assert(num_evicted_ == 0);
+    assert(nslots <= kBufferLength);
+  }
+  evict_[num_evicted_++] = entry.count;
+  evict_[num_evicted_++] = d;
+  memcpy(&evict_[num_evicted_], entry.stack, d * sizeof(Slot));
+  num_evicted_ += d;
+}
+
+// Initialize profiling: activated if getenv("CPUPROFILE") exists.
+ProfileData::ProfileData() :
+  hash_(0),
+  evict_(0),
+  num_evicted_(0),
+  out_(-1),
+  profile_all_(false),
+  count_(0),
+  evictions_(0),
+  total_bytes_(0),
+  fname_(0),
+  frequency_(0) {
+
+  PCALL(pthread_mutex_init(&lock_, NULL));
+  PCALL(pthread_mutex_init(&flush_lock_, NULL));
+
+  if (getenv("PROFILESELECTED") == NULL) {
+    profile_all_ = true;
+  }
+
+  // Get frequency of interrupts (if specified)
+  char junk;
+  const char* fr = getenv("FREQUENCY");
+  if (fr != NULL && (sscanf(fr, "%d%c", &frequency_, &junk) == 1) &&
+      (frequency_ > 0)) {
+    // Limit to kMaxFrequency
+    frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_;
+  } else {
+    frequency_ = kDefaultFrequency;
+  }
+
+  // Should profiling be enabled?
+  const char* fname = getenv("CPUPROFILE");
+  if (fname == 0) {
+    return;
+  }
+  // We don't enable profiling if setuid -- it's a security risk
+  if (getuid() != geteuid())
+    return;
+
+  if (!Start(fname)) {
+    fprintf(stderr, "Can't turn on cpu profiling: ");
+    perror(fname); 
+    exit(1); 
+  }
+}
+
+bool ProfileData::Start(const char* fname) {
+  LOCK(&lock_);
+  if (enabled()) {
+    // profiling is already enabled
+    UNLOCK(&lock_);
+    return false;
+  }
+
+  // Open output file and initialize various data structures
+  int fd = open(fname, O_CREAT | O_WRONLY | O_TRUNC, 0666);
+  if (fd < 0) {
+    // Can't open outfile for write
+    UNLOCK(&lock_);
+    return false;
+  }
+  
+  // Reset counters 
+  num_evicted_ = 0;
+  count_       = 0;
+  evictions_   = 0;
+  total_bytes_ = 0;
+  // But leave profile_all_ and frequency_ alone (i.e., ProfilerStart()
+  // doesn't affect their values originally set in the constructor)
+
+  fname_ = strdup(fname);
+  out_  = fd;
+
+  hash_ = new Bucket[kBuckets];
+  evict_ = new Slot[kBufferLength];
+  memset(hash_, 0, sizeof(hash_[0]) * kBuckets);
+
+  // Record special entries
+  evict_[num_evicted_++] = 0;                     // count for header
+  evict_[num_evicted_++] = 3;                     // depth for header
+  evict_[num_evicted_++] = 0;                     // Version number
+  evict_[num_evicted_++] = 1000000 / frequency_;  // Period (microseconds)
+  evict_[num_evicted_++] = 0;                     // Padding
+
+  // Setup handler for SIGPROF interrupts
+  SetHandler((void (*)(int)) prof_handler);
+
+  // Start profiling on this thread if automatic profiling is on
+  ProfilerRegisterThread();
+
+  UNLOCK(&lock_);
+  return true;
+}
+
+// Write out any collected profile data
+ProfileData::~ProfileData() {
+  Stop();
+}
+
+// Stop profiling and write out any collected profile data
+void ProfileData::Stop() {
+  // Prevent handler from running anymore
+  SetHandler(SIG_IGN);
+
+  // This lock prevents interference with signal handlers in other threads
+  LOCK(&lock_);
+
+  if (out_ < 0) {
+    // Profiling is not enabled
+    UNLOCK(&lock_);
+    return;
+  }
+
+  // Move data from hash table to eviction buffer
+  for (int b = 0; b < kBuckets; b++) {
+    Bucket* bucket = &hash_[b];
+    for (int a = 0; a < kAssociativity; a++) {
+      if (bucket->entry[a].count > 0) {
+        Evict(bucket->entry[a]);
+      }
+    }
+  }
+
+  if (num_evicted_ + 3 > kBufferLength) {
+    // Ensure there is enough room for end of data marker
+    FlushEvicted();
+  }
+
+  // Write end of data marker
+  evict_[num_evicted_++] = 0;         // count
+  evict_[num_evicted_++] = 1;         // depth
+  evict_[num_evicted_++] = 0;         // end of data marker
+  FlushEvicted();
+
+  // Dump "/proc/self/maps" so we get list of mapped shared libraries
+  int maps = open("/proc/self/maps", O_RDONLY);
+  if (maps >= 0) {
+    char buf[100];
+    ssize_t r;
+    while ((r = read(maps, buf, sizeof(buf))) > 0) {
+      write(out_, buf, r);
+    }
+    close(maps);
+  }
+
+  close(out_);
+  fprintf(stderr, "PROFILE: interrupts/evictions/bytes = %d/%d/%" PRIuS "\n",
+          count_, evictions_, total_bytes_);
+  delete[] hash_;
+  hash_ = 0;
+  delete[] evict_;
+  evict_ = 0;
+  free(fname_);
+  fname_ = 0;
+
+  out_ = -1;
+  UNLOCK(&lock_);
+}
+
+void ProfileData::SetHandler(void (*handler)(int)) {
+  struct sigaction sa;
+  sa.sa_handler = handler;
+  sa.sa_flags   = 0;
+  sigemptyset(&sa.sa_mask);
+  if (sigaction(SIGPROF, &sa, NULL) != 0) {
+    perror("sigaction(SIGPROF)");
+    exit(1);
+  }
+}
+
+void ProfileData::FlushTable() {
+  if (out_ < 0) {
+    // Profiling is not enabled
+    return;
+  }
+
+  LOCK(&flush_lock_); {
+    SetHandler(SIG_IGN);       // Disable timer interrupts while we're flushing
+    LOCK(&lock_); {
+      // Move data from hash table to eviction buffer
+      for (int b = 0; b < kBuckets; b++) {
+        Bucket* bucket = &hash_[b];
+        for (int a = 0; a < kAssociativity; a++) {
+          if (bucket->entry[a].count > 0) {
+            Evict(bucket->entry[a]);
+            bucket->entry[a].depth = 0;
+            bucket->entry[a].count = 0;
+          }
+        }
+      }
+
+      // Write out all pending data
+      FlushEvicted();
+    } UNLOCK(&lock_);
+    SetHandler((void (*)(int)) prof_handler);
+  } UNLOCK(&flush_lock_);
+}
+
+// Record the specified "pc" in the profile data
+void ProfileData::Add(unsigned long pc) {
+  void* stack[kMaxStackDepth];
+  stack[0] = (void*)pc;
+  int depth = GetStackTrace(stack+1, kMaxStackDepth-1, 
+                            3/*Removes sighandlers*/);
+  depth++;              // To account for pc value
+
+  // Make hash-value
+  Slot h = 0;
+  for (int i = 0; i < depth; i++) {
+    Slot pc = reinterpret_cast<Slot>(stack[i]);
+    h = (h << 8) | (h >> (8*(sizeof(h)-1)));
+    h += (pc * 31) + (pc * 7) + (pc * 3);
+  }
+
+  LOCK(&lock_);
+  count_++;
+
+  // See if table already has an entry for this stack trace
+  bool done = false;
+  Bucket* bucket = &hash_[h % kBuckets];
+  for (int a = 0; a < kAssociativity; a++) {
+    Entry* e = &bucket->entry[a];
+    if (e->depth == depth) {
+      bool match = true;
+      for (int i = 0; i < depth; i++) {
+        if (e->stack[i] != reinterpret_cast<Slot>(stack[i])) {
+          match = false;
+          break;
+        }
+      }
+      if (match) {
+        e->count++;
+        done = true;
+        break;
+      }
+    }
+  }
+  
+  if (!done) {
+    // Evict entry with smallest count
+    Entry* e = &bucket->entry[0];
+    for (int a = 1; a < kAssociativity; a++) {
+      if (bucket->entry[a].count < e->count) {
+        e = &bucket->entry[a];
+      }
+    }
+    if (e->count > 0) {
+      evictions_++;
+      Evict(*e);
+    }
+    
+    // Use the newly evicted entry
+    e->depth = depth;
+    e->count = 1;
+    for (int i = 0; i < depth; i++) {
+      e->stack[i] = reinterpret_cast<Slot>(stack[i]);
+    }
+  }
+  UNLOCK(&lock_);
+}
+
+// Write all evicted data to the profile file
+void ProfileData::FlushEvicted() {
+  if (num_evicted_ > 0) {
+    const char* buf = reinterpret_cast<char*>(evict_);
+    size_t bytes = sizeof(evict_[0]) * num_evicted_;
+    total_bytes_ += bytes;
+    while (bytes > 0) {
+      ssize_t r = write(out_, buf, bytes);
+      if (r < 0) {
+        perror("write");
+        exit(1);
+      }
+      buf += r;
+      bytes -= r;
+    }
+  }
+  num_evicted_ = 0;
+}
+
+// Profile data structure: Constructor will check to see if profiling
+// should be enabled.  Destructor will write profile data out to disk.
+static ProfileData pdata;
+
+// Signal handler that records the pc in the profile-data structure
+void ProfileData::prof_handler(int sig, SigStructure sig_structure) {
+  int saved_errno = errno;
+  pdata.Add( (unsigned long int)GetPC( sig_structure ) );
+  errno = saved_errno;
+}
+
+// Start interval timer for the current thread
+void ProfilerEnable() {
+  // Generate periodic interrupts
+  if (pdata.enabled()) {
+    // TODO: Randomize the initial interrupt value?
+    // TODO: Randmize the inter-interrupt period on every interrupt?
+    struct itimerval timer;
+    timer.it_interval.tv_sec = 0;
+    timer.it_interval.tv_usec = 1000000 / pdata.frequency();
+    timer.it_value = timer.it_interval;
+    setitimer(ITIMER_PROF, &timer, 0);
+  }
+}
+
+static void ProfilerTurnOffIntervalTimer() {
+  struct itimerval timer;
+  timer.it_interval.tv_sec = 0;
+  timer.it_interval.tv_usec = 0;
+  timer.it_value = timer.it_interval;
+  setitimer(ITIMER_PROF, &timer, 0);
+}
+
+// Stop interval timer for the current thread
+void ProfilerDisable() {
+  if (pdata.enabled()) {
+    ProfilerTurnOffIntervalTimer();
+  }
+}
+
+void ProfilerFlush() {
+  if (pdata.enabled()) {
+    pdata.FlushTable();
+  }
+}
+
+void ProfilerRegisterThread() {
+  if (pdata.profile_all()) {
+    ProfilerEnable();
+  }
+}
+
+bool ProfilingIsEnabledForAllThreads() { 
+  return pdata.profile_all();
+}
+
+bool ProfilerStart(const char* fname) {
+  return pdata.Start(fname);
+}
+
+void ProfilerStop() {
+  pdata.Stop();
+}
+
+
+ProfilerThreadState::ProfilerThreadState() {
+  was_enabled_ = pdata.profile_all();
+}
+
+void ProfilerThreadState::ThreadCheck() {
+  bool is_enabled = pdata.profile_all();
+  if (was_enabled_ != is_enabled) {
+    if (is_enabled) {
+      LOG("Enabling profiling in thread");
+      ProfilerRegisterThread();
+    } else {
+      LOG("Profiling disabled in thread");
+      ProfilerTurnOffIntervalTimer();
+    }
+    was_enabled_ = is_enabled;
+  }
+}
+
+REGISTER_MODULE_INITIALIZER(profiler, {
+  if (!FLAGS_cpu_profile.empty()) {
+    ProfilerStart(FLAGS_cpu_profile.c_str());
+  }
+});
diff --git a/src/stacktrace.cc b/src/stacktrace.cc
new file mode 100644
index 0000000..66a47a5
--- /dev/null
+++ b/src/stacktrace.cc
@@ -0,0 +1,110 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Produce stack trace
+
+#include "google/perftools/config.h"
+#include "google/stacktrace.h"
+
+#undef IMPLEMENTED_STACK_TRACE
+
+// Linux/x86 implementation (requires the binary to be compiled with
+// frame pointers)
+#if (defined(__i386__) || defined(__x86_64)) && \
+    defined(__linux) && !defined(NO_FRAME_POINTER) && !defined(_LP64)
+#define IMPLEMENTED_STACK_TRACE
+
+int GetStackTrace(void** result, int max_depth, int skip_count) {
+  void **sp;
+#ifdef __i386__
+  // Stack frame format:
+  //    sp[0]   pointer to previous frame
+  //    sp[1]   caller address
+  //    sp[2]   first argument
+  //    ...
+  sp = (void **)&result - 2;
+#endif
+
+#ifdef __x86_64__
+  // Arguments are passed in registers on x86-64, so we can't just
+  // offset from &result
+  sp = (void **) __builtin_frame_address(0);
+#endif
+
+  int n = 0;
+  skip_count++;         // Do not include the "GetStackTrace" frame
+  while (sp && n < max_depth) {
+    if (skip_count > 0) {
+      skip_count--;
+    } else {
+      result[n++] = *(sp+1);
+    }
+    void** new_sp = (void**) *sp;
+
+    // A little bit of sanity checking to avoid crashes
+    if (new_sp < sp || new_sp > sp + 100000) {
+      break;
+    }
+    sp = new_sp;
+  }
+  return n;
+}
+#endif
+
+// Portable implementation - just use glibc
+#if !defined(IMPLEMENTED_STACK_TRACE) && defined(HAVE_EXECINFO_H)
+#include <stdlib.h>
+#include <execinfo.h>
+
+int GetStackTrace(void** result, int max_depth, int skip_count) {
+  static const int kStackLength = 64;
+  void * stack[kStackLength];
+  int size;
+  
+  size = backtrace(stack, kStackLength);
+  skip_count++;  // we want to skip the current frame as well
+  int result_count = size - skip_count;
+  if ( result_count < 0 )
+    result_count = 0;
+  else if ( result_count > max_depth )
+    result_count = max_depth;
+
+  for (int i = 0; i < result_count; i++)
+    result[i] = stack[i + skip_count];
+
+  return result_count;
+}
+#endif
+
+#if !defined(IMPLEMENTED_STACK_TRACE) && !defined(HAVE_EXECINFO_H)
+#error Cannot calculate stack trace: will need to write for your environment
+#endif
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
new file mode 100644
index 0000000..2476d83
--- /dev/null
+++ b/src/system-alloc.cc
@@ -0,0 +1,273 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+
+#include "google/perftools/config.h"
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include <sys/types.h>
+#endif
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include "system-alloc.h"
+#include "internal_spinlock.h"
+#include "internal_logging.h"
+#include "base/commandlineflags.h"
+
+// Structure for discovering alignment
+union MemoryAligner {
+  void*  p;
+  double d;
+  size_t s;
+};
+
+static SpinLock spinlock = SPINLOCK_INITIALIZER;
+  
+// Page size is initialized on demand
+static size_t pagesize = 0;
+
+// Configuration parameters.
+//
+// if use_devmem is true, either use_sbrk or use_mmap must also be true.
+// For 2.2 kernels, it looks like the sbrk address space (500MBish) and
+// the mmap address space (1300MBish) are disjoint, so we need both allocators
+// to get as much virtual memory as possible.
+static bool use_devmem = true;
+static bool use_sbrk = true;
+static bool use_mmap = true;
+
+// Flags to keep us from retrying allocators that failed.
+static bool devmem_failure = false;
+static bool sbrk_failure = false;
+static bool mmap_failure = false;
+
+DEFINE_int32(malloc_devmem_start, 0,
+             "Physical memory starting location in MB for /dev/mem allocation."
+             "  Setting this to 0 disables /dev/mem allocation");
+DEFINE_int32(malloc_devmem_limit, 0,
+             "Physical memory limit location in MB for /dev/mem allocation."
+             "  Setting this to 0 means no limit.");
+
+#ifdef HAVE_SBRK
+
+static void* TrySbrk(size_t size, size_t alignment) {
+  size = ((size + alignment - 1) / alignment) * alignment;
+  void* result = sbrk(size);
+  if (result == reinterpret_cast<void*>(-1)) {
+    sbrk_failure = true;
+    return NULL;
+  }
+
+  // Is it aligned?
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
+  if ((ptr & (alignment-1)) == 0)  return result;
+
+  // Try to get more memory for alignment
+  size_t extra = alignment - (ptr & (alignment-1));
+  void* r2 = sbrk(extra);
+  if (reinterpret_cast<uintptr_t>(r2) == (ptr + size)) {
+    // Contiguous with previous result
+    return reinterpret_cast<void*>(ptr + extra);
+  }
+
+  // Give up and ask for "size + alignment - 1" bytes so
+  // that we can find an aligned region within it.
+  result = sbrk(size + alignment - 1);
+  if (result == reinterpret_cast<void*>(-1)) {
+    sbrk_failure = true;
+    return NULL;
+  }
+  ptr = reinterpret_cast<uintptr_t>(result);
+  if ((ptr & (alignment-1)) != 0) {
+    ptr += alignment - (ptr & (alignment-1));
+  }
+  return reinterpret_cast<void*>(ptr);
+}
+
+#endif /* HAVE_SBRK */
+
+#ifdef HAVE_MMAP
+
+static void* TryMmap(size_t size, size_t alignment) {
+  // Enforce page alignment
+  if (pagesize == 0) pagesize = getpagesize();
+  if (alignment < pagesize) alignment = pagesize;
+  size = ((size + alignment - 1) / alignment) * alignment;
+
+  // Ask for extra memory if alignment > pagesize
+  size_t extra = 0;
+  if (alignment > pagesize) {
+    extra = alignment - pagesize;
+  }
+  void* result = mmap(NULL, size + extra,
+                      PROT_READ|PROT_WRITE,
+                      MAP_PRIVATE|MAP_ANONYMOUS,
+                      -1, 0);
+  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
+    mmap_failure = true;
+    return NULL;
+  }
+
+  // Adjust the return memory so it is aligned
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
+  size_t adjust = 0;
+  if ((ptr & (alignment - 1)) != 0) {
+    adjust = alignment - (ptr & (alignment - 1));
+  }
+
+  // Return the unused memory to the system
+  if (adjust > 0) {
+    munmap(reinterpret_cast<void*>(ptr), adjust);
+  }
+  if (adjust < extra) {
+    munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust);
+  }
+
+  ptr += adjust;
+  return reinterpret_cast<void*>(ptr);
+}
+
+#endif /* HAVE_MMAP */
+
+static void* TryDevMem(size_t size, size_t alignment) {
+  static bool initialized = false;
+  static off_t physmem_base;  // next physical memory address to allocate
+  static off_t physmem_limit; // maximum physical address allowed
+  static int physmem_fd;      // file descriptor for /dev/mem
+  
+  // Check if we should use /dev/mem allocation.  Note that it may take
+  // a while to get this flag initialized, so meanwhile we fall back to
+  // the next allocator.  (It looks like 7MB gets allocated before
+  // this flag gets initialized -khr.)
+  if (FLAGS_malloc_devmem_start == 0) {
+    // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to
+    // try us again next time.
+    return NULL;
+  }
+  
+  if (!initialized) {
+    physmem_fd = open("/dev/mem", O_RDWR);
+    if (physmem_fd < 0) {
+      devmem_failure = true;
+      return NULL;
+    }
+    physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL;
+    physmem_limit = FLAGS_malloc_devmem_limit*1024LL*1024LL;
+    initialized = true;
+  }
+  
+  // Enforce page alignment
+  if (pagesize == 0) pagesize = getpagesize();
+  if (alignment < pagesize) alignment = pagesize;
+  size = ((size + alignment - 1) / alignment) * alignment;
+
+  // Ask for extra memory if alignment > pagesize
+  size_t extra = 0;
+  if (alignment > pagesize) {
+    extra = alignment - pagesize;
+  }
+  
+  // check to see if we have any memory left
+  if (physmem_limit != 0 && physmem_base + size + extra > physmem_limit) {
+    devmem_failure = true;
+    return NULL;
+  }
+  void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ,
+                      MAP_SHARED, physmem_fd, physmem_base);
+  if (result == reinterpret_cast<void*>(MAP_FAILED)) {
+    devmem_failure = true;
+    return NULL;
+  }
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(result);
+  
+  // Adjust the return memory so it is aligned
+  size_t adjust = 0;
+  if ((ptr & (alignment - 1)) != 0) {
+    adjust = alignment - (ptr & (alignment - 1));
+  }
+  
+  // Return the unused virtual memory to the system
+  if (adjust > 0) {
+    munmap(reinterpret_cast<void*>(ptr), adjust);
+  }
+  if (adjust < extra) {
+    munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust);
+  }
+  
+  ptr += adjust;
+  physmem_base += adjust + size;
+  
+  return reinterpret_cast<void*>(ptr);
+}
+
+void* TCMalloc_SystemAlloc(size_t size, size_t alignment) {
+  if (TCMallocDebug::level >= TCMallocDebug::kVerbose) {
+    MESSAGE("TCMalloc_SystemAlloc(%" PRIuS ", %" PRIuS")\n", 
+            size, alignment);
+  }
+  SpinLockHolder lock_holder(&spinlock);
+
+  // Enforce minimum alignment
+  if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner);
+
+  // Try twice, once avoiding allocators that failed before, and once
+  // more trying all allocators even if they failed before.
+  for (int i = 0; i < 2; i++) {
+    if (use_devmem && !devmem_failure) {
+      void* result = TryDevMem(size, alignment);
+      if (result != NULL) return result;
+    }
+    
+#ifdef HAVE_SBRK
+    if (use_sbrk && !sbrk_failure) {
+      void* result = TrySbrk(size, alignment);
+      if (result != NULL) return result;
+    }
+#endif
+
+#ifdef HAVE_MMAP    
+    if (use_mmap && !mmap_failure) {
+      void* result = TryMmap(size, alignment);
+      if (result != NULL) return result;
+    }
+#endif
+
+    // nothing worked - reset failure flags and try again
+    devmem_failure = false;
+    sbrk_failure = false;
+    mmap_failure = false;
+  }
+  return NULL;
+}
diff --git a/src/system-alloc.h b/src/system-alloc.h
new file mode 100644
index 0000000..7cb4bd4
--- /dev/null
+++ b/src/system-alloc.h
@@ -0,0 +1,46 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Routine that uses sbrk/mmap to allocate memory from the system.
+// Useful for implementing malloc.
+
+#ifndef TCMALLOC_SYSTEM_ALLOC_H__
+#define TCMALLOC_SYSTEM_ALLOC_H__
+
+// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment
+//
+// Allocate and return "N" bytes of zeroed memory.  The returned
+// pointer is a multiple of "alignment" if non-zero.  Returns NULL
+// when out of memory.
+extern void* TCMalloc_SystemAlloc(size_t bytes, size_t alignment = 0);
+
+#endif /* TCMALLOC_SYSTEM_ALLOC_H__ */
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
new file mode 100644
index 0000000..d34d477
--- /dev/null
+++ b/src/tcmalloc.cc
@@ -0,0 +1,2053 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat <opensource@google.com>
+//
+// A malloc that uses a per-thread cache to satisfy small malloc requests.
+// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
+//
+// See doc/tcmalloc.html for a high-level
+// description of how this malloc works.
+//
+// SYNCHRONIZATION
+//  1. The thread-specific lists are accessed without acquiring any locks.
+//     This is safe because each such list is only accessed by one thread.
+//  2. We have a lock per central free-list, and hold it while manipulating
+//     the central free list for a particular size.
+//  3. The central page allocator is protected by "pageheap_lock".
+//  4. The pagemap (which maps from page-number to descriptor),
+//     can be read without holding any locks, and written while holding
+//     the "pageheap_lock".
+//
+//     This multi-threaded access to the pagemap is safe for fairly
+//     subtle reasons.  We basically assume that when an object X is
+//     allocated by thread A and deallocated by thread B, there must
+//     have been appropriate synchronization in the handoff of object
+//     X from thread A to thread B.
+//
+// TODO: Bias reclamation to larger addresses
+// TODO: implement mallinfo/mallopt
+// TODO: Better testing
+// TODO: Return memory to system
+//
+// 9/28/2003 (new page-level allocator replaces ptmalloc2):
+// * malloc/free of small objects goes from ~300 ns to ~50 ns.
+// * allocation of a reasonably complicated struct
+//   goes from about 1100 ns to about 300 ns.
+
+#include "google/perftools/config.h"
+#include <new>
+#include <stdio.h>
+#include <stddef.h>
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include <sys/types.h>
+#endif
+#include <malloc.h>
+#include <string.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdarg.h>
+#include "google/malloc_hook.h"
+#include "google/malloc_interface.h"
+#include "google/stacktrace.h"
+#include "internal_logging.h"
+#include "internal_spinlock.h"
+#include "pagemap.h"
+#include "system-alloc.h"
+
+#if defined HAVE_INTTYPES_H
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#define LLU   PRIu64
+#else
+#define LLU   "llu"              // hope for the best
+#endif
+
+//-------------------------------------------------------------------
+// Configuration
+//-------------------------------------------------------------------
+
+// Not all possible combinations of the following parameters make
+// sense.  In particular, if kMaxSize increases, you may have to
+// increase kNumClasses as well.
+static const size_t kPageShift  = 12;
+static const size_t kPageSize   = 1 << kPageShift;
+static const size_t kMaxSize    = 8u * kPageSize;
+static const size_t kAlignShift = 3;
+static const size_t kAlignment  = 1 << kAlignShift;
+static const size_t kNumClasses = 170;
+
+// Minimum number of pages to fetch from system at a time.  Must be
+// significantly bigger than kBlockSize to amortize system-call
+// overhead, and also to reduce external fragementation.  Also, we
+// should keep this value big because various incarnations of Linux
+// have small limits on the number of mmap() regions per
+// address-space.
+static const int kMinSystemAlloc = 1 << (20 - kPageShift);
+
+// Number of objects to move between a per-thread list and a central
+// list in one shot.  We want this to be not too small so we can
+// amortize the lock overhead for accessing the central list.  Making
+// it too big may temporarily cause unnecessary memory wastage in the
+// per-thread free list until the scavenger cleans up the list.
+static const int kNumObjectsToMove = 32;
+
+// Maximum length we allow a per-thread free-list to have before we
+// move objects from it into the corresponding central free-list.  We
+// want this big to avoid locking the central free-list too often.  It
+// should not hurt to make this list somewhat big because the
+// scavenging code will shrink it down when its contents are not in use.
+static const int kMaxFreeListLength = 256;
+
+// Lower and upper bounds on the per-thread cache sizes
+static const size_t kMinThreadCacheSize = kMaxSize * 2;
+static const size_t kMaxThreadCacheSize = 2 << 20;
+
+// Default bound on the total amount of thread caches
+static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
+
+// For all span-lengths < kMaxPages we keep an exact-size list.
+// REQUIRED: kMaxPages >= kMinSystemAlloc;
+static const size_t kMaxPages = kMinSystemAlloc;
+
+// Twice the approximate gap between sampling actions.
+// I.e., we take one sample approximately once every
+//      kSampleParameter/2
+// bytes of allocation, i.e., ~ once every 128KB.
+// Must be a prime number.
+static const size_t kSampleParameter = 266053;
+
+//-------------------------------------------------------------------
+// Mapping from size to size_class and vice versa
+//-------------------------------------------------------------------
+
+// A pair of arrays we use for implementing the mapping from a size to
+// its size class.  Indexed by "floor(lg(size))".
+static const int kSizeBits = 8 * sizeof(size_t);
+static unsigned char size_base[kSizeBits];
+static unsigned char size_shift[kSizeBits];
+
+// Mapping from size class to size
+static size_t class_to_size[kNumClasses];
+
+// Mapping from size class to number of pages to allocate at a time
+static size_t class_to_pages[kNumClasses];
+
+// Return floor(log2(n)) for n > 0.
+#if defined __i386__ && defined __GNUC__
+static inline int LgFloor(size_t n) {
+  // "ro" for the input spec means the input can come from either a
+  // register ("r") or offsetable memory ("o").
+  int result;
+  __asm__("bsrl  %1, %0"
+          : "=r" (result)               // Output spec
+          : "ro" (n)                    // Input spec
+          : "cc"                        // Clobbers condition-codes
+          );
+  return result;
+}
+#else
+// Note: the following only works for "n"s that fit in 32-bits, but
+// that is fine since we only use it for small sizes.
+static inline int LgFloor(size_t n) {
+  int log = 0;
+  for (int i = 4; i >= 0; --i) {
+    int shift = (1 << i);
+    size_t x = n >> shift;
+    if (x != 0) {
+      n = x;
+      log += shift;
+    }
+  }
+  ASSERT(n == 1);
+  return log;
+}
+#endif
+
+static inline int SizeClass(size_t size) {
+  if (size == 0) size = 1;
+  const int lg = LgFloor(size);
+  const int align = size_shift[lg];
+  return static_cast<int>(size_base[lg]) + ((size-1) >> align);
+}
+
+// Get the byte-size for a specified class
+static inline size_t ByteSizeForClass(size_t cl) {
+  return class_to_size[cl];
+}
+
+// Initialize the mapping arrays
+static void InitSizeClasses() {
+  // Special initialization for small sizes
+  for (int lg = 0; lg < kAlignShift; lg++) {
+    size_base[lg] = 1;
+    size_shift[lg] = kAlignShift;
+  }
+
+  int next_class = 1;
+  int alignshift = kAlignShift;
+  int last_lg = -1;
+  for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
+    int lg = LgFloor(size);
+    if (lg > last_lg) {
+      // Increase alignment every so often.
+      //
+      // Since we double the alignment every time size doubles and
+      // size >= 128, this means that space wasted due to alignment is
+      // at most 16/128 i.e., 12.5%.  Plus we cap the alignment at 256
+      // bytes, so the space wasted as a percentage starts falling for
+      // sizes > 2K.
+      if ((lg >= 7) && (alignshift < 8)) {
+        alignshift++;
+      }
+      size_base[lg] = next_class - ((size-1) >> alignshift);
+      size_shift[lg] = alignshift;
+    }
+
+    class_to_size[next_class] = size;
+    last_lg = lg;
+
+    next_class++;
+  }
+  if (next_class >= kNumClasses) {
+    MESSAGE("used up too many size classes: %d\n", next_class);
+    abort();
+  }
+
+  // Initialize the number of pages we should allocate to split into
+  // small objects for a given class.
+  for (size_t cl = 1; cl < next_class; cl++) {
+    // Allocate enough pages so leftover is less than 1/8 of total.
+    // This bounds wasted space to at most 12.5%.
+    size_t psize = kPageSize;
+    const size_t s = class_to_size[cl];
+    while ((psize % s) > (psize >> 3)) {
+      psize += kPageSize;
+    }
+    class_to_pages[cl] = psize >> kPageShift;
+  }
+
+  // Double-check sizes just to be safe
+  for (size_t size = 0; size <= kMaxSize; size++) {
+    const int sc = SizeClass(size);
+    if (sc == 0) {
+      MESSAGE("Bad size class %d for %" PRIuS "\n", sc, size);
+      abort();
+    }
+    if (sc > 1 && size <= class_to_size[sc-1]) {
+      MESSAGE("Allocating unnecessarily large class %d for %" PRIuS
+              "\n", sc, size);
+      abort();
+    }
+    if (sc >= kNumClasses) {
+      MESSAGE("Bad size class %d for %" PRIuS "\n", sc, size);
+      abort();
+    }
+    const size_t s = class_to_size[sc];
+    if (size > s) {
+      MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %d)\n", s, size, sc);
+      abort();
+    }
+    if (s == 0) {
+      MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %d)\n", s, size, sc);
+      abort();
+    }
+  }
+}
+
+// -------------------------------------------------------------------------
+// Simple allocator for objects of a specified type.  External locking
+// is required before accessing one of these objects.
+// -------------------------------------------------------------------------
+
+// Metadata allocator -- keeps stats about how many bytes allocated
+static uint64_t metadata_system_bytes = 0;
+static void* MetaDataAlloc(size_t bytes) {
+  void* result = TCMalloc_SystemAlloc(bytes);
+  if (result != NULL) {
+    metadata_system_bytes += bytes;
+  }
+  return result;
+}
+
+template <class T>
+class PageHeapAllocator {
+ private:
+  // How much to allocate from system at a time
+  static const int kAllocIncrement = 256 << 10;
+
+  // Aligned size of T
+  static const size_t kAlignedSize
+  = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
+  
+  // Free area from which to carve new objects
+  char* free_area_;
+  size_t free_avail_;
+
+  // Free list of already carved objects
+  void* free_list_;
+
+  // Number of allocated but unfreed objects
+  int inuse_;
+
+ public:
+  void Init() {
+    ASSERT(kAlignedSize <= kAllocIncrement);
+    inuse_ = 0;
+    free_area_ = NULL;
+    free_avail_ = 0;
+    free_list_ = NULL;
+    New(); New(); // Reduces cache conflicts?
+  }
+
+  T* New() {
+    // Consult free list
+    void* result;
+    if (free_list_ != NULL) {
+      result = free_list_;
+      free_list_ = *(reinterpret_cast<void**>(result));
+    } else {
+      if (free_avail_ < kAlignedSize) {
+        // Need more room
+        free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
+        if (free_area_ == NULL) abort();
+        free_avail_ = kAllocIncrement;
+      }
+      result = free_area_;
+      free_area_ += kAlignedSize;
+      free_avail_ -= kAlignedSize;
+    }
+    inuse_++;
+    return reinterpret_cast<T*>(result);
+  }
+
+  void Delete(T* p) {
+    *(reinterpret_cast<void**>(p)) = free_list_;
+    free_list_ = p;
+    inuse_--;
+  }
+
+  int inuse() const { return inuse_; }
+};
+
+// -------------------------------------------------------------------------
+// Span - a contiguous run of pages
+// -------------------------------------------------------------------------
+
+// Type that can hold a page number
+typedef uintptr_t PageID;
+
+// Type that can hold the length of a run of pages
+typedef uintptr_t Length;
+
+// Convert byte size into pages
+static inline Length pages(size_t bytes) {
+  return ((bytes + kPageSize - 1) >> kPageShift);
+}
+
+// Convert a user size into the number of bytes that will actually be
+// allocated
+static size_t AllocationSize(size_t bytes) {
+  if (bytes > kMaxSize) {
+    // Large object: we allocate an integral number of pages
+    return pages(bytes) << kPageShift;
+  } else {
+    // Small object: find the size class to which it belongs
+    return ByteSizeForClass(SizeClass(bytes));
+  }
+}
+
+// Information kept for a span (a contiguous run of pages).
+struct Span {
+  PageID        start;          // Starting page number
+  Length        length;         // Number of pages in span
+  Span*         next;           // Used when in link list
+  Span*         prev;           // Used when in link list
+  void*         objects;        // Linked list of free objects
+  unsigned int  free : 1;       // Is the span free
+  unsigned int  sample : 1;     // Sampled object?
+  unsigned int  sizeclass : 8;  // Size-class for small objects (or 0)
+  unsigned int  refcount : 11;  // Number of non-free objects
+
+#undef SPAN_HISTORY
+#ifdef SPAN_HISTORY
+  // For debugging, we can keep a log events per span
+  int nexthistory;
+  char history[64];
+  int value[64];
+#endif
+};
+
+#ifdef SPAN_HISTORY
+void Event(Span* span, char op, int v = 0) {
+  span->history[span->nexthistory] = op;
+  span->value[span->nexthistory] = v;
+  span->nexthistory++;
+  if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
+}
+#else
+#define Event(s,o,v) ((void) 0)
+#endif
+
+// Allocator/deallocator for spans
+static PageHeapAllocator<Span> span_allocator;
+static Span* NewSpan(PageID p, Length len) {
+  Span* result = span_allocator.New();
+  memset(result, 0, sizeof(*result));
+  result->start = p;
+  result->length = len;
+#ifdef SPAN_HISTORY
+  result->nexthistory = 0;
+#endif
+  return result;
+}
+
+static void DeleteSpan(Span* span) {
+#ifndef NDEBUG
+  // In debug mode, trash the contents of deleted Spans
+  memset(span, 0x3f, sizeof(*span));
+#endif
+  span_allocator.Delete(span);
+}
+
+// -------------------------------------------------------------------------
+// Doubly linked list of spans.
+// -------------------------------------------------------------------------
+
+static void DLL_Init(Span* list) {
+  list->next = list;
+  list->prev = list;
+}
+
+static void DLL_Remove(Span* span) {
+  span->prev->next = span->next;
+  span->next->prev = span->prev;
+  span->prev = NULL;
+  span->next = NULL;
+}
+
+static inline bool DLL_IsEmpty(const Span* list) {
+  return list->next == list;
+}
+
+static int DLL_Length(const Span* list) {
+  int result = 0;
+  for (Span* s = list->next; s != list; s = s->next) {
+    result++;
+  }
+  return result;
+}
+
+#if 0 /* Not needed at the moment -- causes compiler warnings if not used */
+static void DLL_Print(const char* label, const Span* list) {
+  MESSAGE("%-10s %p:", label, list);
+  for (const Span* s = list->next; s != list; s = s->next) {
+    MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
+  }
+  MESSAGE("\n");
+}
+#endif
+
+static void DLL_Prepend(Span* list, Span* span) {
+  ASSERT(span->next == NULL);
+  ASSERT(span->prev == NULL);
+  span->next = list->next;
+  span->prev = list;
+  list->next->prev = span;
+  list->next = span;
+}
+
+static void DLL_InsertOrdered(Span* list, Span* span) {
+  ASSERT(span->next == NULL);
+  ASSERT(span->prev == NULL);
+  // Look for appropriate place to insert
+  Span* x = list;
+  while ((x->next != list) && (x->next->start < span->start)) {
+    x = x->next;
+  }
+  span->next = x->next;
+  span->prev = x;
+  x->next->prev = span;
+  x->next = span;
+}
+
+// -------------------------------------------------------------------------
+// Stack traces kept for sampled allocations
+//   The following state is protected by pageheap_lock_.
+// -------------------------------------------------------------------------
+
+static const int kMaxStackDepth = 31;
+struct StackTrace {
+  uintptr_t size;          // Size of object
+  int       depth;         // Number of PC values stored in array below
+  void*     stack[kMaxStackDepth];
+};
+static PageHeapAllocator<StackTrace> stacktrace_allocator;
+static Span sampled_objects;
+
+// -------------------------------------------------------------------------
+// Map from page-id to per-page data
+// -------------------------------------------------------------------------
+
+// We use PageMap1<> for 32-bit and PageMap3<> for 64-bit machines.
+
+// Selector class -- general selector uses 3-level map
+template <int BITS> class MapSelector {
+ public:
+  typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
+};
+
+// A single-level map for 32-bit machines
+template <> class MapSelector<32> {
+ public:
+  typedef TCMalloc_PageMap1<32-kPageShift> Type;
+};
+
+// -------------------------------------------------------------------------
+// Page-level allocator
+//  * Eager coalescing
+//
+// Heap for page-level allocation.  We allow allocating and freeing a
+// contiguous runs of pages (called a "span").
+// -------------------------------------------------------------------------
+
+class TCMalloc_PageHeap {
+ public:
+  TCMalloc_PageHeap();
+
+  // Allocate a run of "n" pages.  Returns zero if out of memory.
+  Span* New(Length n);
+
+  // Delete the span "[p, p+n-1]".
+  // REQUIRES: span was returned by earlier call to New() and
+  //           has not yet been deleted.
+  void Delete(Span* span);
+
+  // Mark an allocated span as being used for small objects of the
+  // specified size-class.
+  // REQUIRES: span was returned by an earlier call to New()
+  //           and has not yet been deleted.
+  void RegisterSizeClass(Span* span, size_t sc);
+
+  // Split an allocated span into two spans: one of length "n" pages
+  // followed by another span of length "span->length - n" pages.
+  // Modifies "*span" to point to the first span of length "n" pages.
+  // Returns a pointer to the second span.
+  //
+  // REQUIRES: "0 < n < span->length"
+  // REQUIRES: !span->free
+  // REQUIRES: span->sizeclass == 0
+  Span* Split(Span* span, Length n);
+
+  // Return the descriptor for the specified page.
+  inline Span* GetDescriptor(PageID p) const {
+    return reinterpret_cast<Span*>(pagemap_.get(p));
+  }
+
+  // Dump state to stderr
+  void Dump(TCMalloc_Printer* out);
+
+  // Return number of bytes allocated from system
+  inline uint64_t SystemBytes() const { return system_bytes_; }
+
+  // Return number of free bytes in heap
+  uint64_t FreeBytes() const {
+    Length pages = 0;
+    for (int length = 0; length < kMaxPages; length++) {
+      pages += length * DLL_Length(&free_[length]);
+    }
+    for (Span* s = large_.next; s != &large_; s = s->next) {
+      pages += s->length;
+    }
+    return (static_cast<uint64_t>(pages) << kPageShift);
+  }
+
+  bool Check();
+  bool CheckList(Span* list, Length min_pages, Length max_pages);
+
+ private:
+  // Pick the appropriate map type based on pointer size
+  typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
+  PageMap pagemap_;
+
+  // List of free spans of length >= kMaxPages
+  Span large_;
+
+  // Array mapping from span length to a doubly linked list of free spans
+  Span free_[kMaxPages];
+
+  // Bytes allocated from system
+  uint64_t system_bytes_;
+
+  bool GrowHeap(Length n);
+
+  // REQUIRES   span->length >= n
+  // Remove span from its free list, and move any leftover part of
+  // span into appropriate free lists.  Also update "span" to have
+  // length exactly "n" and mark it as non-free so it can be returned
+  // to the client.
+  void Carve(Span* span, Length n);
+
+  void RecordSpan(Span* span) {
+    pagemap_.set(span->start, span);
+    if (span->length > 1) {
+      pagemap_.set(span->start + span->length - 1, span);
+    }
+  }
+};
+
+TCMalloc_PageHeap::TCMalloc_PageHeap() : pagemap_(MetaDataAlloc),
+                                         system_bytes_(0) {
+  DLL_Init(&large_);
+  for (int i = 0; i < kMaxPages; i++) {
+    DLL_Init(&free_[i]);
+  }
+}
+
+Span* TCMalloc_PageHeap::New(Length n) {
+  ASSERT(Check());
+  if (n == 0) n = 1;
+
+  // Find first size >= n that has a non-empty list
+  for (int s = n; s < kMaxPages; s++) {
+    if (!DLL_IsEmpty(&free_[s])) {
+      Span* result = free_[s].next;
+      Carve(result, n);
+      ASSERT(Check());
+      return result;
+    }
+  }
+
+  // Look in large list.  If we first do not find something, we try to
+  // grow the heap and try again.
+  for (int i = 0; i < 2; i++) {
+    // find the best span (closest to n in size)
+    Span *best = NULL;
+    for (Span* span = large_.next; span != &large_; span = span->next) {
+      if (span->length >= n &&
+          (best == NULL || span->length < best->length)) {
+        best = span;
+      }
+    }
+    if (best != NULL) {
+      Carve(best, n);
+      ASSERT(Check());
+      return best;
+    }
+    if (i == 0) {
+      // Nothing suitable in large list.  Grow the heap and look again.
+      if (!GrowHeap(n)) {
+        ASSERT(Check());
+        return NULL;
+      }
+    }
+  }
+  return NULL;
+}
+
+Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
+  ASSERT(0 < n);
+  ASSERT(n < span->length);
+  ASSERT(!span->free);
+  ASSERT(span->sizeclass == 0);
+  Event(span, 'T', n);
+
+  const int extra = span->length - n;
+  Span* leftover = NewSpan(span->start + n, extra);
+  Event(leftover, 'U', extra);
+  RecordSpan(leftover);
+  pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
+  span->length = n;
+
+  return leftover;
+}
+
+void TCMalloc_PageHeap::Carve(Span* span, Length n) {
+  ASSERT(n > 0);
+  DLL_Remove(span);
+  span->free = 0;
+  Event(span, 'A', n);
+
+  const int extra = span->length - n;
+  ASSERT(extra >= 0);
+  if (extra > 0) {
+    Span* leftover = NewSpan(span->start + n, extra);
+    leftover->free = 1;
+    Event(leftover, 'S', extra);
+    RecordSpan(leftover);
+    if (extra < kMaxPages) {
+      DLL_Prepend(&free_[extra], leftover);
+    } else {
+      DLL_InsertOrdered(&large_, leftover);
+    }
+    span->length = n;
+    pagemap_.set(span->start + n - 1, span);
+  }
+}
+
+void TCMalloc_PageHeap::Delete(Span* span) {
+  ASSERT(Check());
+  ASSERT(!span->free);
+  ASSERT(span->length > 0);
+  ASSERT(GetDescriptor(span->start) == span);
+  ASSERT(GetDescriptor(span->start + span->length - 1) == span);
+  span->sizeclass = 0;
+  span->sample = 0;
+
+  // Coalesce -- we guarantee that "p" != 0, so no bounds checking
+  // necessary.  We do not bother resetting the stale pagemap
+  // entries for the pieces we are merging together because we only
+  // care about the pagemap entries for the boundaries.
+  const PageID p = span->start;
+  const Length n = span->length;
+  Span* prev = GetDescriptor(p-1);
+  if (prev != NULL && prev->free) {
+    // Merge preceding span into this span
+    ASSERT(prev->start + prev->length == p);
+    const Length len = prev->length;
+    DLL_Remove(prev);
+    DeleteSpan(prev);
+    span->start -= len;
+    span->length += len;
+    pagemap_.set(span->start, span);
+    Event(span, 'L', len);
+  }
+  Span* next = GetDescriptor(p+n);
+  if (next != NULL && next->free) {
+    // Merge next span into this span
+    ASSERT(next->start == p+n);
+    const Length len = next->length;
+    DLL_Remove(next);
+    DeleteSpan(next);
+    span->length += len;
+    pagemap_.set(span->start + span->length - 1, span);
+    Event(span, 'R', len);
+  }
+
+  Event(span, 'D', span->length);
+  span->free = 1;
+  if (span->length < kMaxPages) {
+    DLL_Prepend(&free_[span->length], span);
+  } else {
+    DLL_InsertOrdered(&large_, span);
+  }
+
+  ASSERT(Check());
+}
+
+void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
+  // Associate span object with all interior pages as well
+  ASSERT(!span->free);
+  ASSERT(GetDescriptor(span->start) == span);
+  ASSERT(GetDescriptor(span->start+span->length-1) == span);
+  Event(span, 'C', sc);
+  span->sizeclass = sc;
+  for (Length i = 1; i < span->length-1; i++) {
+    pagemap_.set(span->start+i, span);
+  }
+}
+
+void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
+  int nonempty_sizes = 0;
+  for (int s = 0; s < kMaxPages; s++) {
+    if (!DLL_IsEmpty(&free_[s])) nonempty_sizes++;
+  }
+  out->printf("------------------------------------------------\n");
+  out->printf("PageHeap: %d sizes\n", nonempty_sizes);
+  out->printf("------------------------------------------------\n");
+  uint64_t cumulative = 0;
+  for (int s = 0; s < kMaxPages; s++) {
+    if (!DLL_IsEmpty(&free_[s])) {
+      const int list_length = DLL_Length(&free_[s]);
+      uint64_t s_pages = s * list_length;
+      cumulative += s_pages;
+      out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum\n",
+                  s, list_length,
+                  (s_pages << kPageShift) / 1048576.0,
+                  (cumulative << kPageShift) / 1048576.0);
+    }
+  }
+
+  uint64_t large_pages = 0;
+  int large_spans = 0;
+  for (Span* s = large_.next; s != &large_; s = s->next) {
+    out->printf("   [ %6" PRIuS " spans ]\n", s->length);
+    large_pages += s->length;
+    large_spans++;
+  }
+  cumulative += large_pages;
+  out->printf(">255   large * %6u spans ~ %6.1f MB; %6.1f MB cum\n",
+              large_spans,
+              (large_pages << kPageShift) / 1048576.0,
+              (cumulative << kPageShift) / 1048576.0);
+}
+
+bool TCMalloc_PageHeap::GrowHeap(Length n) {
+  ASSERT(kMaxPages >= kMinSystemAlloc);
+  Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
+  void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, kPageSize);
+  if (ptr == NULL) {
+    if (n < ask) {
+      // Try growing just "n" pages
+      ask = n;
+      ptr = TCMalloc_SystemAlloc(ask << kPageShift, kPageSize);
+    }
+    if (ptr == NULL) return false;
+  }
+  system_bytes_ += (ask << kPageShift);
+  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+  ASSERT(p > 0);
+  
+  // Make sure pagemap_ has entries for all of the new pages.
+  // Plus ensure one before and one after so coalescing code
+  // does not need bounds-checking.
+  if (pagemap_.Ensure(p-1, ask+2)) {
+    // Pretend the new area is allocated and then Delete() it to
+    // cause any necessary coalescing to occur.
+    Span* span = NewSpan(p, ask);
+    RecordSpan(span);
+    Delete(span);
+    ASSERT(Check());
+    return true;
+  } else {
+    // We could not allocate memory within "pagemap_"
+    // TODO: Once we can return memory to the system, return the new span
+    return false;
+  }
+}
+
+bool TCMalloc_PageHeap::Check() {
+  ASSERT(free_[0].next == &free_[0]);
+  CheckList(&large_, kMaxPages, 1000000000);
+  for (Length s = 1; s < kMaxPages; s++) {
+    CheckList(&free_[s], s, s);
+  }
+  return true;
+}
+
+bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) {
+  for (Span* s = list->next; s != list; s = s->next) {
+    CHECK_CONDITION(s->free);
+    CHECK_CONDITION(s->length >= min_pages);
+    CHECK_CONDITION(s->length <= max_pages);
+    CHECK_CONDITION(GetDescriptor(s->start) == s);
+    CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
+  }
+  return true;
+}
+
+//-------------------------------------------------------------------
+// Free list
+//-------------------------------------------------------------------
+
+class TCMalloc_ThreadCache_FreeList {
+ private:
+  void*    list_;       // Linked list of nodes
+  uint16_t length_;     // Current length
+  uint16_t lowater_;    // Low water mark for list length
+
+ public:
+  void Init() {
+    list_ = NULL;
+    length_ = 0;
+    lowater_ = 0;
+  }
+
+  // Return current length of list
+  int length() const {
+    return length_;
+  }
+
+  // Is list empty?
+  bool empty() const {
+    return list_ == NULL;
+  }
+
+  // Low-water mark management
+  int lowwatermark() const { return lowater_; }
+  void clear_lowwatermark() { lowater_ = length_; }
+
+  void Push(void* ptr) {
+    *(reinterpret_cast<void**>(ptr)) = list_;
+    list_ = ptr;
+    length_++;
+  }
+
+  void* Pop() {
+    ASSERT(list_ != NULL);
+    void* result = list_;
+    list_ = *(reinterpret_cast<void**>(result));
+    length_--;
+    if (length_ < lowater_) lowater_ = length_;
+    return result;
+  }
+};
+
+//-------------------------------------------------------------------
+// Data kept per thread
+//-------------------------------------------------------------------
+
+class TCMalloc_ThreadCache {
+ private:
+  typedef TCMalloc_ThreadCache_FreeList FreeList;
+
+  size_t        size_;                  // Combined size of data
+  pthread_t     tid_;                   // Which thread owns it
+  bool          setspecific_;           // Called pthread_setspecific?
+  FreeList      list_[kNumClasses];     // Array indexed by size-class
+
+  // We sample allocations, biased by the size of the allocation
+  uint32_t      rnd_;                   // Cheap random number generator
+  size_t        bytes_until_sample_;    // Bytes until we sample next
+
+ public:
+  // All ThreadCache objects are kept in a linked list (for stats collection)
+  TCMalloc_ThreadCache* next_;
+  TCMalloc_ThreadCache* prev_;
+
+  void Init(pthread_t tid);
+  void Cleanup();
+
+  // Accessors (mostly just for printing stats)
+  int freelist_length(size_t cl) const { return list_[cl].length(); }
+
+  // Total byte size in cache
+  size_t Size() const { return size_; }
+
+  void* Allocate(size_t size);
+  void Deallocate(void* ptr, size_t size_class);
+
+  void FetchFromCentralCache(size_t cl);
+  void ReleaseToCentralCache(size_t cl, int N);
+  void Scavenge();
+  void Print() const;
+
+  // Record allocation of "k" bytes.  Return true iff allocation
+  // should be sampled
+  bool SampleAllocation(size_t k);
+
+  // Pick next sampling point
+  void PickNextSample();
+
+  static void                  InitModule();
+  static void                  InitTSD();
+  static TCMalloc_ThreadCache* GetCache();
+  static TCMalloc_ThreadCache* GetCacheIfPresent();
+  static void*                 CreateCacheIfNecessary();
+  static void                  DeleteCache(void* ptr);
+  static void                  RecomputeThreadCacheSize();
+};
+
+//-------------------------------------------------------------------
+// Data kept per size-class in central cache
+//-------------------------------------------------------------------
+
+class TCMalloc_Central_FreeList {
+ public:
+  void Init(size_t cl);
+
+  // REQUIRES: lock_ is held
+  // Insert object.
+  // May temporarily release lock_.
+  void Insert(void* object);
+
+  // REQUIRES: lock_ is held
+  // Remove object from cache and return.
+  // Return NULL if no free entries in cache.
+  void* Remove();
+
+  // REQUIRES: lock_ is held
+  // Populate cache by fetching from the page heap.
+  // May temporarily release lock_.
+  void Populate();
+
+  // REQUIRES: lock_ is held
+  // Number of free objects in cache
+  int length() const { return counter_; }
+
+  // Lock -- exposed because caller grabs it before touching this object
+  SpinLock lock_;
+
+ private:
+  // We keep linked lists of empty and non-emoty spans.
+  size_t   size_class_;     // My size class
+  Span     empty_;          // Dummy header for list of empty spans
+  Span     nonempty_;       // Dummy header for list of non-empty spans
+  size_t   counter_;        // Number of free objects in cache entry
+};
+
+// Pad each CentralCache object to multiple of 64 bytes
+class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
+ private:
+  char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
+};
+
+//-------------------------------------------------------------------
+// Global variables
+//-------------------------------------------------------------------
+
+// Central cache -- a collection of free-lists, one per size-class.
+// We have a separate lock per free-list to reduce contention.
+static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
+
+// Page-level allocator
+static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
+static TCMalloc_PageHeap* pageheap = NULL;
+static char pageheap_memory[sizeof(TCMalloc_PageHeap)];
+
+// Thread-specific key.  Initialization here is somewhat tricky
+// because some Linux startup code invokes malloc() before it
+// is in a good enough state to handle pthread_keycreate().
+// Therefore, we use TSD keys only after tsd_inited is set to true.
+// Until then, we use a slow path to get the heap object.
+static bool tsd_inited = false;
+static pthread_key_t heap_key;
+
+// Allocator for thread heaps
+static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
+
+// Linked list of heap objects.  Protected by pageheap_lock.
+static TCMalloc_ThreadCache* thread_heaps = NULL;
+static int thread_heap_count = 0;
+
+// Overall thread cache size.  Protected by pageheap_lock.
+static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
+
+// Global per-thread cache size.  Writes are protected by
+// pageheap_lock.  Reads are done without any locking, which should be
+// fine as long as size_t can be written atomically and we don't place
+// invariants between this variable and other pieces of state.
+static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
+
+//-------------------------------------------------------------------
+// Central cache implementation
+//-------------------------------------------------------------------
+
+void TCMalloc_Central_FreeList::Init(size_t cl) {
+  lock_.Init();
+  size_class_ = cl;
+  DLL_Init(&empty_);
+  DLL_Init(&nonempty_);
+  counter_ = 0;
+}
+
+void TCMalloc_Central_FreeList::Insert(void* object) {
+  const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
+  Span* span = pageheap->GetDescriptor(p);
+  ASSERT(span != NULL);
+  ASSERT(span->refcount > 0);
+
+  // If span is empty, move it to non-empty list
+  if (span->objects == NULL) {
+    DLL_Remove(span);
+    DLL_Prepend(&nonempty_, span);
+    Event(span, 'N', 0);
+  }
+
+  // The following check is expensive, so it is disabled by default
+  if (false) {
+    // Check that object does not occur in list
+    int got = 0;
+    for (void* p = span->objects; p != NULL; p = *((void**) p)) {
+      ASSERT(p != object);
+      got++;
+    }
+    ASSERT(got + span->refcount == 
+           (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
+  }
+
+  counter_++;
+  span->refcount--;
+  if (span->refcount == 0) {
+    Event(span, '#', 0);
+    counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
+    DLL_Remove(span);
+
+    // Release central list lock while operating on pageheap
+    lock_.Unlock();
+    {
+      SpinLockHolder h(&pageheap_lock);
+      pageheap->Delete(span);
+    }
+    lock_.Lock();
+  } else {
+    *(reinterpret_cast<void**>(object)) = span->objects;
+    span->objects = object;
+  }
+}
+
+void* TCMalloc_Central_FreeList::Remove() {
+  if (DLL_IsEmpty(&nonempty_)) return NULL;
+  Span* span = nonempty_.next;
+
+  ASSERT(span->objects != NULL);
+  span->refcount++;
+  void* result = span->objects;
+  span->objects = *(reinterpret_cast<void**>(result));
+  if (span->objects == NULL) {
+    // Move to empty list
+    DLL_Remove(span);
+    DLL_Prepend(&empty_, span);
+    Event(span, 'E', 0);
+  }
+  counter_--;
+  return result;
+}
+
+// Fetch memory from the system and add to the central cache freelist.
+void TCMalloc_Central_FreeList::Populate() {
+  // Release central list lock while operating on pageheap
+  lock_.Unlock();
+  const size_t npages = class_to_pages[size_class_];
+
+  Span* span;
+  {
+    SpinLockHolder h(&pageheap_lock);
+    span = pageheap->New(npages);
+    if (span) pageheap->RegisterSizeClass(span, size_class_);
+  }
+  if (span == NULL) {
+    MESSAGE("allocation failed: %d\n", errno);
+    lock_.Lock();
+    return;
+  }
+
+  // Split the block into pieces and add to the free-list
+  // TODO: coloring of objects to avoid cache conflicts?
+  void** tail = &span->objects;
+  char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
+  char* limit = ptr + (npages << kPageShift);
+  const size_t size = ByteSizeForClass(size_class_);
+  int num = 0;
+  while (ptr + size <= limit) {
+    *tail = ptr;
+    tail = reinterpret_cast<void**>(ptr);
+    ptr += size;
+    num++;
+  }
+  ASSERT(ptr <= limit);
+  *tail = NULL;
+  span->refcount = 0; // No sub-object in use yet
+
+  // Add span to list of non-empty spans
+  lock_.Lock();
+  DLL_Prepend(&nonempty_, span);
+  counter_ += num;
+}
+
+//-------------------------------------------------------------------
+// TCMalloc_ThreadCache implementation
+//-------------------------------------------------------------------
+
+inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
+  if (bytes_until_sample_ < k) {
+    PickNextSample();
+    return true;
+  } else {
+    bytes_until_sample_ -= k;
+    return false;
+  }
+}
+
+void TCMalloc_ThreadCache::Init(pthread_t tid) {
+  size_ = 0;
+  next_ = NULL;
+  prev_ = NULL;
+  tid_  = tid;
+  setspecific_ = false;
+  for (size_t cl = 0; cl < kNumClasses; ++cl) {
+    list_[cl].Init();
+  }
+
+  // Initialize RNG -- run it for a bit to get to good values
+  rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
+  for (int i = 0; i < 100; i++) {
+    PickNextSample();
+  }
+}
+
+void TCMalloc_ThreadCache::Cleanup() {
+  // Put unused memory back into central cache
+  for (int cl = 0; cl < kNumClasses; ++cl) {
+    FreeList* src = &list_[cl];
+    TCMalloc_Central_FreeList* dst = &central_cache[cl];
+    SpinLockHolder h(&dst->lock_);
+    while (!src->empty()) {
+      dst->Insert(src->Pop());
+    }
+  }
+}
+
+inline void* TCMalloc_ThreadCache::Allocate(size_t size) {
+  ASSERT(size <= kMaxSize);
+  const size_t cl = SizeClass(size);
+  FreeList* list = &list_[cl];
+  if (list->empty()) {
+    FetchFromCentralCache(cl);
+    if (list->empty()) return NULL;
+  }
+  size_ -= ByteSizeForClass(cl);
+  return list->Pop();
+}
+
+inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
+  size_ += ByteSizeForClass(cl);
+  FreeList* list = &list_[cl];
+  list->Push(ptr);
+  // If enough data is free, put back into central cache
+  if (list->length() > kMaxFreeListLength) {
+    ReleaseToCentralCache(cl, kNumObjectsToMove);
+  }
+  if (size_ >= per_thread_cache_size) Scavenge();
+}
+
+// Remove some objects of class "cl" from central cache and add to thread heap
+void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl) {
+  TCMalloc_Central_FreeList* src = &central_cache[cl];
+  FreeList* dst = &list_[cl];
+  SpinLockHolder h(&src->lock_);
+  for (int i = 0; i < kNumObjectsToMove; i++) {
+    void* object = src->Remove();
+    if (object == NULL) {
+      if (i == 0) {
+        src->Populate();        // Temporarily releases src->lock_
+        object = src->Remove();
+      }
+      if (object == NULL) {
+        break;
+      }
+    }
+    dst->Push(object);
+    size_ += ByteSizeForClass(cl);
+  }
+}
+
+// Remove some objects of class "cl" from thread heap and add to central cache
+void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
+  FreeList* src = &list_[cl];
+  TCMalloc_Central_FreeList* dst = &central_cache[cl];
+  SpinLockHolder h(&dst->lock_);
+  if (N > src->length()) N = src->length();
+  size_ -= N*ByteSizeForClass(cl);
+  while (N-- > 0) {
+    void* ptr = src->Pop();
+    dst->Insert(ptr);
+  }
+}
+
+// Release idle memory to the central cache
+void TCMalloc_ThreadCache::Scavenge() {
+  // If the low-water mark for the free list is L, it means we would
+  // not have had to allocate anything from the central cache even if
+  // we had reduced the free list size by L.  We aim to get closer to
+  // that situation by dropping L/2 nodes from the free list.  This
+  // may not release much memory, but if so we will call scavenge again
+  // pretty soon and the low-water marks will be high on that call.
+  //int64 start = CycleClock::Now();
+
+  for (int cl = 0; cl < kNumClasses; cl++) {
+    FreeList* list = &list_[cl];
+    const int lowmark = list->lowwatermark();
+    if (lowmark > 0) {
+      const int drop = (lowmark > 1) ? lowmark/2 : 1;
+      ReleaseToCentralCache(cl, drop);
+    }
+    list->clear_lowwatermark();
+  }
+
+  //int64 finish = CycleClock::Now();
+  //CycleTimer ct;
+  //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
+}
+
+inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
+  void* ptr = NULL;
+  if (!tsd_inited) {
+    InitModule();
+  } else {
+    ptr = pthread_getspecific(heap_key);
+  }
+  if (ptr == NULL) ptr = CreateCacheIfNecessary();
+  return reinterpret_cast<TCMalloc_ThreadCache*>(ptr);
+}
+
+// In deletion paths, we do not try to create a thread-cache.  This is
+// because we may be in the thread destruction code and may have
+// already cleaned up the cache for this thread.
+inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
+  if (!tsd_inited) return NULL;
+  return reinterpret_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
+}
+
+void TCMalloc_ThreadCache::PickNextSample() {
+  // Make next "random" number
+  // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
+  static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
+  uint32_t r = rnd_;
+  rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
+
+  // Next point is "rnd_ % (2*sample_period)".  I.e., average
+  // increment is "sample_period".
+  bytes_until_sample_ = rnd_ % kSampleParameter;
+}
+
+void TCMalloc_ThreadCache::InitModule() {
+  // There is a slight potential race here because of double-checked
+  // locking idiom.  However, as long as the program does a small
+  // allocation before switching to multi-threaded mode, we will be
+  // fine.  We increase the chances of doing such a small allocation
+  // by doing one in the constructor of the module_enter_exit_hook
+  // object declared below.
+  SpinLockHolder h(&pageheap_lock);
+  if (pageheap == NULL) {
+    InitSizeClasses();
+    threadheap_allocator.Init();
+    span_allocator.Init();
+    stacktrace_allocator.Init();
+    DLL_Init(&sampled_objects);
+    for (int i = 0; i < kNumClasses; ++i) {
+      central_cache[i].Init(i);
+    }
+    pageheap = new ((void*)pageheap_memory) TCMalloc_PageHeap;
+  }
+}
+
+
+
+void TCMalloc_ThreadCache::InitTSD() {
+  ASSERT(!tsd_inited);
+  pthread_key_create(&heap_key, DeleteCache);
+  tsd_inited = true;
+    
+  // We may have used a fake pthread_t for the main thread.  Fix it.
+  pthread_t zero;
+  memset(&zero, 0, sizeof(zero));
+  SpinLockHolder h(&pageheap_lock);
+  for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
+    if (h->tid_ == zero) {
+      h->tid_ = pthread_self();
+    }
+  }
+}
+
+void* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
+  // Initialize per-thread data if necessary
+  TCMalloc_ThreadCache* heap = NULL;
+  {
+    SpinLockHolder h(&pageheap_lock);
+
+    // Early on in glibc's life, we cannot even call pthread_self()
+    pthread_t me;
+    if (!tsd_inited) {
+      memset(&me, 0, sizeof(me));
+    } else {
+      me = pthread_self();
+    }
+
+    // This may be a recursive malloc call from pthread_setspecific()
+    // In that case, the heap for this thread has already been created
+    // and added to the linked list.  So we search for that first.
+    for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
+      if (h->tid_ == me) {
+        heap = h;
+        break;
+      }
+    }
+
+    if (heap == NULL) {
+      // Create the heap and add it to the linked list
+      heap = threadheap_allocator.New();
+      heap->Init(me);
+      heap->next_ = thread_heaps;
+      heap->prev_ = NULL;
+      if (thread_heaps != NULL) thread_heaps->prev_ = heap;
+      thread_heaps = heap;
+      thread_heap_count++;
+      RecomputeThreadCacheSize();
+    }
+  }
+
+  // We call pthread_setspecific() outside the lock because it may
+  // call malloc() recursively.  The recursive call will never get
+  // here again because it will find the already allocated heap in the
+  // linked list of heaps.
+  if (!heap->setspecific_ && tsd_inited) {
+    heap->setspecific_ = true;
+    pthread_setspecific(heap_key, heap);
+  }
+  return heap;
+}
+
+void TCMalloc_ThreadCache::DeleteCache(void* ptr) {
+  // Remove all memory from heap
+  TCMalloc_ThreadCache* heap;
+  heap = reinterpret_cast<TCMalloc_ThreadCache*>(ptr);
+  heap->Cleanup();
+
+  // Remove from linked list
+  SpinLockHolder h(&pageheap_lock);
+  if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
+  if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
+  if (thread_heaps == heap) thread_heaps = heap->next_;
+  thread_heap_count--;
+  RecomputeThreadCacheSize();
+
+  threadheap_allocator.Delete(heap);
+}
+
+void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
+  // Divide available space across threads
+  int n = thread_heap_count > 0 ? thread_heap_count : 1;
+  size_t space = overall_thread_cache_size / n;
+
+  // Limit to allowed range
+  if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
+  if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
+
+  per_thread_cache_size = space;
+}
+
+void TCMalloc_ThreadCache::Print() const {
+  for (int cl = 0; cl < kNumClasses; ++cl) {
+    MESSAGE("      %5" PRIuS " : %4d len; %4d lo\n",
+            ByteSizeForClass(cl),
+            list_[cl].length(),
+            list_[cl].lowwatermark());
+  }
+}
+
+// Extract interesting stats
+struct TCMallocStats {
+  uint64_t system_bytes;        // Bytes alloced from system
+  uint64_t thread_bytes;        // Bytes in thread caches
+  uint64_t central_bytes;       // Bytes in central cache
+  uint64_t pageheap_bytes;      // Bytes in page heap
+  uint64_t metadata_bytes;      // Bytes alloced for metadata
+};
+
+// Get stats into "r".  Also get per-size-class counts if class_count != NULL
+static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
+  r->central_bytes = 0;
+  for (int cl = 0; cl < kNumClasses; ++cl) {
+    SpinLockHolder h(&central_cache[cl].lock_);
+    const int length = central_cache[cl].length();
+    r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
+    if (class_count) class_count[cl] = length;
+  }
+
+  // Add stats from per-thread heaps
+  r->thread_bytes = 0;
+  { // scope
+    SpinLockHolder h(&pageheap_lock);
+    for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
+      r->thread_bytes += h->Size();
+      if (class_count) {
+        for (int cl = 0; cl < kNumClasses; ++cl) {
+          class_count[cl] += h->freelist_length(cl);
+        }
+      }
+    }
+  }
+
+  { //scope
+    SpinLockHolder h(&pageheap_lock);
+    r->system_bytes = pageheap->SystemBytes();
+    r->metadata_bytes = metadata_system_bytes;
+    r->pageheap_bytes = pageheap->FreeBytes();
+  }
+}
+                     
+// WRITE stats to "out"
+static void DumpStats(TCMalloc_Printer* out, int level) {
+  TCMallocStats stats;
+  uint64_t class_count[kNumClasses];
+  ExtractStats(&stats, (level >= 2 ? class_count : NULL));
+
+  if (level >= 2) {
+    out->printf("------------------------------------------------\n");
+    uint64_t cumulative = 0;
+    for (int cl = 0; cl < kNumClasses; ++cl) {
+      if (class_count[cl] > 0) {
+        uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
+        cumulative += class_bytes;
+        out->printf("class %3d [ %8" PRIuS " bytes ] : "
+                "%8" LLU " objs; %5.1f MB; %5.1f cum MB\n",
+                cl, ByteSizeForClass(cl),
+                class_count[cl],
+                class_bytes / 1048576.0,
+                cumulative / 1048576.0);
+      }
+    }
+
+    SpinLockHolder h(&pageheap_lock);
+    pageheap->Dump(out);
+  }
+  
+  const uint64_t bytes_in_use = stats.system_bytes
+                                - stats.pageheap_bytes
+                                - stats.central_bytes
+                                - stats.thread_bytes;
+
+  out->printf("------------------------------------------------\n"
+              "MALLOC: %12" LLU " Heap size\n"
+              "MALLOC: %12" LLU " Bytes in use by application\n"
+              "MALLOC: %12" LLU " Bytes free in page heap\n"
+              "MALLOC: %12" LLU " Bytes free in central cache\n"
+              "MALLOC: %12" LLU " Bytes free in thread caches\n"
+              "MALLOC: %12" LLU " Spans in use\n"
+              "MALLOC: %12" LLU " Thread heaps in use\n"
+              "MALLOC: %12" LLU " Metadata allocated\n"
+              "------------------------------------------------\n",
+              stats.system_bytes,
+              bytes_in_use,
+              stats.pageheap_bytes,
+              stats.central_bytes,
+              stats.thread_bytes,
+              uint64_t(span_allocator.inuse()),
+              uint64_t(threadheap_allocator.inuse()),
+              stats.metadata_bytes);
+}
+
+static void PrintStats(int level) {
+  const int kBufferSize = 16 << 10;
+  char* buffer = new char[kBufferSize];
+  TCMalloc_Printer printer(buffer, kBufferSize);
+  DumpStats(&printer, level);
+  write(STDERR_FILENO, buffer, strlen(buffer));
+  delete[] buffer;
+}
+
+static void** DumpStackTraces() {
+  // Count how much space we need
+  int needed_slots = 0;
+  {
+    SpinLockHolder h(&pageheap_lock);
+    for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
+      StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
+      needed_slots += 3 + stack->depth;
+    }
+    needed_slots += 100;            // Slop in case sample grows
+    needed_slots += needed_slots/8; // An extra 12.5% slop
+  }
+
+  void** result = new void*[needed_slots];
+  if (result == NULL) {
+    MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
+            needed_slots);
+    return NULL;
+  }
+  
+  SpinLockHolder h(&pageheap_lock);
+  int used_slots = 0;
+  for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
+    ASSERT(used_slots < needed_slots);  // Need to leave room for terminator
+    StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
+    if (used_slots + 3 + stack->depth >= needed_slots) {
+      // No more room
+      break;
+    }
+
+    result[used_slots+0] = reinterpret_cast<void*>(1);
+    result[used_slots+1] = reinterpret_cast<void*>(stack->size);
+    result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
+    for (int d = 0; d < stack->depth; d++) {
+      result[used_slots+3+d] = stack->stack[d];
+    }
+    used_slots += 3 + stack->depth;
+  }
+  result[used_slots] = reinterpret_cast<void*>(0);
+  return result;
+}
+
+// TCMalloc's support for extra malloc interfaces
+class TCMallocImplementation : public MallocInterface {
+ public:
+  virtual void GetStats(char* buffer, int buffer_length) {
+    ASSERT(buffer_length > 0);
+    TCMalloc_Printer printer(buffer, buffer_length);
+    DumpStats(&printer, 2);
+  }
+
+  virtual void** ReadStackTraces() {
+    return DumpStackTraces();
+  }
+
+  virtual bool GetNumericProperty(const char* name, size_t* value) {
+    ASSERT(name != NULL);
+
+    if (strcmp(name, "generic.current_allocated_bytes") == 0) {
+      TCMallocStats stats;
+      ExtractStats(&stats, NULL);
+      *value = stats.system_bytes
+               - stats.thread_bytes
+               - stats.central_bytes
+               - stats.pageheap_bytes;
+      return true;
+    }
+
+    if (strcmp(name, "generic.heap_size") == 0) {
+      TCMallocStats stats;
+      ExtractStats(&stats, NULL);
+      *value = stats.system_bytes;
+      return true;
+    }
+
+    if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
+      // We assume that bytes in the page heap are not fragmented too
+      // badly, and are therefore available for allocation.
+      TCMallocStats stats;
+      ExtractStats(&stats, NULL);
+      *value = stats.pageheap_bytes;
+      return true;
+    }
+
+    if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
+      SpinLockHolder l(&pageheap_lock);
+      *value = overall_thread_cache_size;
+      return true;
+    }
+
+    if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
+      TCMallocStats stats;
+      ExtractStats(&stats, NULL);
+      *value = stats.thread_bytes;
+      return true;
+    }
+
+    return false;
+  }
+
+  virtual bool SetNumericProperty(const char* name, size_t value) {
+    ASSERT(name != NULL);
+
+    if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
+      // Clip the value to a reasonable range
+      if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
+      if (value > (1<<30)) value = (1<<30);     // Limit to 1GB
+
+      SpinLockHolder l(&pageheap_lock);
+      overall_thread_cache_size = static_cast<size_t>(value);
+      TCMalloc_ThreadCache::RecomputeThreadCacheSize();
+      return true;
+    }
+
+    return false;
+  }
+};
+
+
+
+// The constructor allocates an object to ensure that initialization
+// runs before main(), and therefore we do not have a chance to become
+// multi-threaded before initialization.  We also create the TSD key
+// here.  Presumably by the time this constructor runs, glibc is in
+// good enough shape to handle pthread_key_create().
+//
+// The destructor prints stats when the program exits.
+
+class TCMallocGuard {
+ public:
+  TCMallocGuard() {
+    char *envval;
+    if ((envval = getenv("TCMALLOC_DEBUG"))) {
+      TCMallocDebug::level = atoi(envval);
+      MESSAGE("Set tcmalloc debugging level to %d\n", TCMallocDebug::level);
+    }
+    free(malloc(1));
+    TCMalloc_ThreadCache::InitTSD();
+    free(malloc(1));
+    MallocInterface::Register(new TCMallocImplementation);
+  }
+
+  ~TCMallocGuard() {
+    const char* env = getenv("MALLOCSTATS");
+    if (env != NULL) {
+      int level = atoi(env);
+      if (level < 1) level = 1;
+      PrintStats(level);
+    }
+  }
+};
+static TCMallocGuard module_enter_exit_hook;
+
+//-------------------------------------------------------------------
+// Helpers for the exported routines below
+//-------------------------------------------------------------------
+
+static Span* DoSampledAllocation(size_t size) {
+  SpinLockHolder h(&pageheap_lock);
+
+  // Allocate span
+  Span* span = pageheap->New(pages(size == 0 ? 1 : size));
+  if (span == NULL) {
+    return NULL;
+  }
+
+  // Allocate stack trace
+  StackTrace* stack = stacktrace_allocator.New();
+  if (stack == NULL) {
+    // Sampling failed because of lack of memory
+    return span;
+  }
+
+  // Fill stack trace and record properly
+  stack->depth = GetStackTrace(stack->stack, kMaxStackDepth, 2);
+  stack->size = size;
+  span->sample = 1;
+  span->objects = stack;
+  DLL_Prepend(&sampled_objects, span);
+  
+  return span;
+}
+
+static inline void* do_malloc(size_t size) {
+
+  if (TCMallocDebug::level >= TCMallocDebug::kVerbose) 
+    MESSAGE("In tcmalloc do_malloc(%" PRIuS")\n", size);
+  // The following call forces module initialization
+  TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
+  if (heap->SampleAllocation(size)) {
+    Span* span = DoSampledAllocation(size);
+    if (span == NULL) return NULL;
+    return reinterpret_cast<void*>(span->start << kPageShift);
+  } else if (size > kMaxSize) {
+    // Use page-level allocator
+    SpinLockHolder h(&pageheap_lock);
+    Span* span = pageheap->New(pages(size));
+    if (span == NULL) return NULL;
+    return reinterpret_cast<void*>(span->start << kPageShift);
+  } else {
+    return heap->Allocate(size);
+  }
+}
+
+static inline void do_free(void* ptr) {
+  if (TCMallocDebug::level >= TCMallocDebug::kVerbose) 
+    MESSAGE("In tcmalloc do_free(%p)\n", ptr);
+  if (ptr == NULL) return;
+  ASSERT(pageheap != NULL);  // Should not call free() before malloc()
+  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+  Span* span = pageheap->GetDescriptor(p);
+  ASSERT(span != NULL);
+  ASSERT(!span->free);
+  const size_t cl = span->sizeclass;
+  if (cl != 0) {
+    ASSERT(!span->sample);
+    TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
+    if (heap != NULL) {
+      heap->Deallocate(ptr, cl);
+    } else {
+      // Delete directly into central cache
+      SpinLockHolder h(&central_cache[cl].lock_);
+      central_cache[cl].Insert(ptr);
+    }
+  } else {
+    SpinLockHolder h(&pageheap_lock);
+    ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+    ASSERT(span->start == p);
+    if (span->sample) {
+      DLL_Remove(span);
+      stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
+      span->objects = NULL;
+    }
+    pageheap->Delete(span);
+  }
+}
+
+// For use by exported routines below that want specific alignments
+//
+// Note: this code can be slow, and can significantly fragment memory.
+// The expectation is that memalign/posix_memalign/valloc/pvalloc will
+// not be invoked very often.  This requirement simplifies our
+// implementation and allows us to tune for expected allocation
+// patterns.
+static void* do_memalign(size_t align, size_t size) {
+  ASSERT((align & (align - 1)) == 0);
+  ASSERT(align > 0);
+  if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
+
+  // Allocate at least one byte to avoid boundary conditions below
+  if (size == 0) size = 1;
+
+  if (size <= kMaxSize && align < kPageSize) {
+    // Search through acceptable size classes looking for one with
+    // enough alignment.  This depends on the fact that
+    // InitSizeClasses() currently produces several size classes that
+    // are aligned at powers of two.  We will waste time and space if
+    // we miss in the size class array, but that is deemed acceptable
+    // since memalign() should be used rarely.
+    int cl = SizeClass(size);
+    while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
+      cl++;
+    }
+    if (cl < kNumClasses) {
+      TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
+      return heap->Allocate(class_to_size[cl]);
+    }
+  }
+
+  // We will allocate directly from the page heap
+  SpinLockHolder h(&pageheap_lock);
+
+  if (align <= kPageSize) {
+    // Any page-level allocation will be fine
+    // TODO: We could put the rest of this page in the appropriate
+    // TODO: cache but it does not seem worth it.
+    Span* span = pageheap->New(pages(size));
+    if (span == NULL) return NULL;
+    return reinterpret_cast<void*>(span->start << kPageShift);
+  }
+
+  // Allocate extra pages and carve off an aligned portion
+  const int alloc = pages(size + align);
+  Span* span = pageheap->New(alloc);
+  if (span == NULL) return NULL;
+
+  // Skip starting portion so that we end up aligned
+  int skip = 0;
+  while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
+    skip++;
+  }
+  ASSERT(skip < alloc);
+  if (skip > 0) {
+    Span* rest = pageheap->Split(span, skip);
+    pageheap->Delete(span);
+    span = rest;
+  }
+
+  // Skip trailing portion that we do not need to return
+  const int needed = pages(size);
+  ASSERT(span->length >= needed);
+  if (span->length > needed) {
+    Span* trailer = pageheap->Split(span, needed);
+    pageheap->Delete(trailer);
+  }
+  return reinterpret_cast<void*>(span->start << kPageShift);
+}
+
+//-------------------------------------------------------------------
+// Exported routines
+//-------------------------------------------------------------------
+
+// CAVEAT: The code structure below ensures that MallocHook methods are always
+//         called from the stack frame of the invoked allocation function.
+//         heap-checker.cc depends on this to start a stack trace from
+//         the call to the (de)allocation function.
+
+extern "C" void* malloc(size_t size) {
+  void* result = do_malloc(size);
+  MallocHook::InvokeNewHook(result, size);
+  return result;
+}
+
+extern "C" void free(void* ptr) {
+  MallocHook::InvokeDeleteHook(ptr);
+  do_free(ptr);
+}
+
+extern "C" void* calloc(size_t n, size_t elem_size) {
+  void* result = do_malloc(n * elem_size);
+  if (result != NULL) {
+    memset(result, 0, n * elem_size);
+  }
+  MallocHook::InvokeNewHook(result, n * elem_size);
+  return result;
+}
+
+extern "C" void cfree(void* ptr) {
+  MallocHook::InvokeDeleteHook(ptr);
+  do_free(ptr);
+}
+
+extern "C" void* realloc(void* old_ptr, size_t new_size) {
+  if (old_ptr == NULL) {
+    void* result = do_malloc(new_size);
+    MallocHook::InvokeNewHook(result, new_size);
+    return result;
+  }
+  if (new_size == 0) {
+    MallocHook::InvokeDeleteHook(old_ptr);
+    do_free(old_ptr);
+    return NULL;
+  }
+
+  // Get the size of the old entry
+  const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
+  Span* span = pageheap->GetDescriptor(p);
+  size_t old_size;
+  if (span->sizeclass != 0) {
+    old_size = ByteSizeForClass(span->sizeclass);
+  } else {
+    old_size = span->length << kPageShift;
+  }
+
+  // Reallocate if the new size is larger than the old size,
+  // or if the new size is significantly smaller than the old size.
+  if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
+    // Need to reallocate
+    void* new_ptr = do_malloc(new_size);
+    if (new_ptr == NULL) {
+      return NULL;
+    }
+    MallocHook::InvokeNewHook(new_ptr, new_size);
+    memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
+    MallocHook::InvokeDeleteHook(old_ptr);
+    do_free(old_ptr);
+    return new_ptr;
+  } else {
+    return old_ptr;
+  }
+}
+
+#ifndef COMPILER_INTEL
+#define OPNEW_THROW
+#define OPDELETE_THROW
+#else
+#define OPNEW_THROW throw(std::bad_alloc)
+#define OPDELETE_THROW throw()
+#endif
+
+void* operator new(size_t size) OPNEW_THROW {
+  void* p = do_malloc(size);
+  if (p == NULL) {
+    MESSAGE("Unable to allocate %" PRIuS " bytes: new failed\n", size);
+    abort();
+  }
+  MallocHook::InvokeNewHook(p, size);
+  return p;
+}
+
+void operator delete(void* p) OPDELETE_THROW {
+  MallocHook::InvokeDeleteHook(p);
+  do_free(p);
+}
+
+void* operator new[](size_t size) OPNEW_THROW {
+  void* p = do_malloc(size);
+  if (p == NULL) {
+    MESSAGE("Unable to allocate %" PRIuS " bytes: new failed\n", size);
+    abort();
+  }
+  MallocHook::InvokeNewHook(p, size);
+  return p;
+}
+
+void operator delete[](void* p) OPDELETE_THROW {
+  MallocHook::InvokeDeleteHook(p);
+  do_free(p);
+}
+
+extern "C" void* memalign(size_t align, size_t size) {
+  void* result = do_memalign(align, size);
+  MallocHook::InvokeNewHook(result, size);
+  return result;
+}
+
+extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size) {
+  if (((align % sizeof(void*)) != 0) ||
+      ((align & (align - 1)) != 0) ||
+      (align == 0)) {
+    return EINVAL;
+  }
+
+  void* result = do_memalign(align, size);
+  MallocHook::InvokeNewHook(result, size);
+  if (result == NULL) {
+    return ENOMEM;
+  } else {
+    *result_ptr = result;
+    return 0;
+  }
+}
+
+static size_t pagesize = 0;
+
+extern "C" void* valloc(size_t size) {
+  // Allocate page-aligned object of length >= size bytes
+  if (pagesize == 0) pagesize = getpagesize();
+  void* result = do_memalign(pagesize, size);
+  MallocHook::InvokeNewHook(result, size);
+  return result;
+}
+
+extern "C" void* pvalloc(size_t size) {
+  // Round up size to a multiple of pagesize
+  if (pagesize == 0) pagesize = getpagesize();
+  size = (size + pagesize - 1) & ~(pagesize - 1);
+  void* result = do_memalign(pagesize, size);
+  MallocHook::InvokeNewHook(result, size);
+  return result;
+}
+
+extern "C" void malloc_stats(void) {
+  PrintStats(1);
+}
+
+//-------------------------------------------------------------------
+// Some library routines on RedHat 9 allocate memory using malloc()
+// and free it using __libc_free() (or vice-versa).  Since we provide
+// our own implementations of malloc/free, we need to make sure that
+// the __libc_XXX variants also point to the same implementations.
+//-------------------------------------------------------------------
+
+extern "C" {
+#if defined(__GNUC__) && defined(HAVE___ATTRIBUTE__)
+  // Potentially faster variants that use the gcc alias extension
+#define ALIAS(x) __attribute__ ((weak, alias (x)))
+  void* __libc_malloc(size_t size)              ALIAS("malloc");
+  void  __libc_free(void* ptr)                  ALIAS("free");
+  void* __libc_realloc(void* ptr, size_t size)  ALIAS("realloc");
+  void* __libc_calloc(size_t n, size_t size)    ALIAS("calloc");
+  void  __libc_cfree(void* ptr)                 ALIAS("cfree");
+  void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
+  void* __libc_valloc(size_t size)              ALIAS("valloc");
+  void* __libc_pvalloc(size_t size)             ALIAS("pvalloc");
+  void* __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
+#undef ALIAS
+#else
+  // Portable wrappers
+  void* __libc_malloc(size_t size)              { return malloc(size);       }
+  void  __libc_free(void* ptr)                  { free(ptr);                 }
+  void* __libc_realloc(void* ptr, size_t size)  { return realloc(ptr, size); }
+  void* __libc_calloc(size_t n, size_t size)    { return calloc(n, size);    }
+  void  __libc_cfree(void* ptr)                 { cfree(ptr);                }
+  void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
+  void* __libc_valloc(size_t size)              { return valloc(size);       }
+  void* __libc_pvalloc(size_t size)             { return pvalloc(size);      }
+  void* __posix_memalign(void** r, size_t a, size_t s) {
+    return posix_memalign(r, a, s);
+  }
+#endif
+}
diff --git a/src/tests/addressmap_unittest.cc b/src/tests/addressmap_unittest.cc
new file mode 100644
index 0000000..555ce9c
--- /dev/null
+++ b/src/tests/addressmap_unittest.cc
@@ -0,0 +1,113 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+
+#include <vector>
+#include <algorithm>
+#include "addressmap-inl.h"
+#include "base/logging.h"
+#include "base/commandlineflags.h"
+
+DEFINE_int32(iters, 20, "Number of test iterations");
+DEFINE_int32(N, 100000,  "Number of elements to test per iteration");
+
+using std::vector;
+using std::random_shuffle;
+
+int main(int argc, char** argv) {
+  // Get a bunch of pointers
+  const int N = FLAGS_N;
+  static const int kObjectLength = 19;
+  vector<char*> ptrs;
+  for (int i = 0; i < N; ++i) {
+    ptrs.push_back(new char[kObjectLength]);
+  }
+
+  for (int x = 0; x < FLAGS_iters; ++x) {
+    // Permute pointers to get rid of allocation order issues
+    random_shuffle(ptrs.begin(), ptrs.end());
+
+    AddressMap<int> map(malloc, free);
+    int result;
+
+    // Insert a bunch of entries
+    for (int i = 0; i < N; ++i) {
+      void* p = ptrs[i];
+      CHECK(!map.Find(p, &result));
+      map.Insert(p, i);
+      CHECK(map.Find(p, &result));
+      CHECK_EQ(result, i);
+      map.Insert(p, i + N);
+      CHECK(map.Find(p, &result));
+      CHECK_EQ(result, i + N);
+    }
+
+    // Delete the even entries
+    for (int i = 0; i < N; i += 2) {
+      void* p = ptrs[i];
+      CHECK(map.FindAndRemove(p, &result));
+      CHECK_EQ(result, i + N);
+    }
+
+    // Lookup the odd entries and adjust them
+    for (int i = 1; i < N; i += 2) {
+      void* p = ptrs[i];
+      CHECK(map.Find(p, &result));
+      CHECK_EQ(result, i + N);
+      map.Insert(p, i + 2*N);
+      CHECK(map.Find(p, &result));
+      CHECK_EQ(result, i + 2*N);
+    }
+
+    // Insert even entries back
+    for (int i = 0; i < N; i += 2) {
+      void* p = ptrs[i];
+      map.Insert(p, i + 2*N);
+      CHECK(map.Find(p, &result));
+      CHECK_EQ(result, i + 2*N);
+    }
+
+    // Check all entries
+    for (int i = 0; i < N; ++i) {
+      void* p = ptrs[i];
+      CHECK(map.Find(p, &result));
+      CHECK_EQ(result, i + 2*N);
+    }
+
+  }
+
+  for (int i = 0; i < N; ++i) {
+    delete[] ptrs[i];
+  }
+
+  printf("PASS\n");
+  return 0;
+}
diff --git a/src/tests/heap-checker-death_unittest.sh b/src/tests/heap-checker-death_unittest.sh
new file mode 100755
index 0000000..1f58173
--- /dev/null
+++ b/src/tests/heap-checker-death_unittest.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+# Copyright (c) 2005, Google Inc.
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Run the heap checker unittest in a mode where it is supposed to crash and
+# return an error if it doesn't
+
+# When the environment variable HEAP_CHECKER_TEST_LEAK is set,
+# heap-checker_unittest should leak some memory and then crash on exit.
+HEAPCHECK_TEST_LEAK=1 ./heap-checker_unittest
+
+if [ "$?" == 0 ] ; then
+  echo >&2 "Heap checker unittest did not crash when it was supposed to.";
+  exit 1;
+fi
+
+# When the environment variable HEAP_CHECKER_TEST_LOOP_LEAK is set,
+# heap-checker_unittest should allocate two pointers that form a loop, leak
+# them, and crash on exit.
+HEAPCHECK_TEST_LOOP_LEAK=1 ./heap-checker_unittest
+
+if [ "$?" == 0 ] ; then
+  echo >&2 "Heap checker unittest did not crash when it was supposed to.";
+  exit 1;
+fi
+
+echo PASS
diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc
new file mode 100644
index 0000000..b8893f4
--- /dev/null
+++ b/src/tests/heap-checker_unittest.cc
@@ -0,0 +1,526 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Maxim Lifantsev
+//
+// Running:
+// ./heap-checker_unittest
+//
+// If the unittest crashes because it can't find pprof, try:
+// PPROF_PATH=/usr/local/someplace/bin/pprof ./heap-checker_unittest
+//
+// To test that the whole-program heap checker will actually cause a leak, try:
+// HEAPCHECK_TEST_LEAK= ./heap-checker_unittest
+// HEAPCHECK_TEST_LOOP_LEAK= ./heap-checker_unittest
+//
+// Note: Both of the above commands *should* abort with an error message.
+
+#include "google/perftools/config.h"
+#include "base/logging.h"
+#include "base/googleinit.h"
+
+#include <google/heap-profiler.h>
+#include <google/heap-checker.h>
+
+#include <stdlib.h>
+#include <vector>
+#include <string>
+
+using namespace std;
+
+// Use an int* variable so that the compiler does not complain.
+static void Use(int* foo) { CHECK(foo == foo); }
+
+// not deallocates
+static void TestHeapLeakCheckerDeathSimple() {
+  HeapLeakChecker check("death_simple");
+  int* foo = new int[100];
+  void* bar = malloc(300);
+  Use(foo);
+  CHECK_EQ(check.BriefSameHeap(), false);
+  delete [] foo;
+  free(bar);
+}
+
+// deallocates more than allocates
+static void TestHeapLeakCheckerDeathInverse() {
+  int* bar = new int[250];
+  Use(bar);
+  HeapLeakChecker check("death_inverse");
+  int* foo = new int[100];
+  Use(foo);
+  delete [] bar;
+  CHECK_EQ(check.BriefSameHeap(), false);
+  delete [] foo;
+}
+
+// deallocates more than allocates
+static void TestHeapLeakCheckerDeathNoLeaks() {
+  int* foo = new int[100];
+  int* bar = new int[250];
+  Use(foo);
+  Use(bar);
+  HeapLeakChecker check("death_noleaks");
+  delete [] bar;
+  CHECK_EQ(check.NoLeaks(), true);
+  delete [] foo;
+}
+
+// have less objecs
+static void TestHeapLeakCheckerDeathCountLess() {
+  int* bar1 = new int[50];
+  int* bar2 = new int[50];
+  Use(bar1);
+  Use(bar2);
+  HeapLeakChecker check("death_count_less");
+  int* foo = new int[100];
+  Use(foo);
+  delete [] bar1;
+  delete [] bar2;
+  CHECK_EQ(check.BriefSameHeap(), false);
+  delete [] foo;
+}
+
+// have more objecs
+static void TestHeapLeakCheckerDeathCountMore() {
+  int* foo = new int[100];
+  Use(foo);
+  HeapLeakChecker check("death_count_more");
+  int* bar1 = new int[50];
+  int* bar2 = new int[50];
+  Use(bar1);
+  Use(bar2);
+  delete [] foo;
+  CHECK_EQ(check.BriefSameHeap(), false);
+  delete [] bar1;
+  delete [] bar2;
+}
+
+static void TestHeapLeakChecker() {
+  { HeapLeakChecker check("trivial");
+    int foo = 5;
+    Use(&foo);
+    CHECK(check.BriefSameHeap());
+  }
+  { HeapLeakChecker check("simple");
+    int* foo = new int[100];
+    int* bar = new int[200];
+    Use(foo);
+    Use(bar);
+    delete [] foo;
+    delete [] bar;
+    CHECK(check.BriefSameHeap());
+  }
+}
+
+// no false positives from pprof
+static void TestHeapLeakCheckerPProf() {
+  { HeapLeakChecker check("trivial_p");
+    int foo = 5;
+    Use(&foo);
+    CHECK(check.SameHeap());
+  }
+  { HeapLeakChecker check("simple_p");
+    int* foo = new int[100];
+    int* bar = new int[200];
+    Use(foo);
+    Use(bar);
+    delete [] foo;
+    delete [] bar;
+    CHECK(check.SameHeap());
+  }
+}
+
+static void TestHeapLeakCheckerTrick() {
+  int* bar1 = new int[60];
+  int* bar2 = new int[40];
+  Use(bar1);
+  Use(bar2);
+  HeapLeakChecker check("trick");
+  int* foo1 = new int[70];
+  int* foo2 = new int[30];
+  Use(foo1);
+  Use(foo2);
+  delete [] bar1;
+  delete [] bar2;
+  CHECK(check.BriefSameHeap());
+  delete [] foo1;
+  delete [] foo2;
+}
+
+// no false negatives from pprof
+static void TestHeapLeakCheckerDeathTrick() {
+  int* bar1 = new int[60];
+  int* bar2 = new int[40];
+  Use(bar1);
+  Use(bar2);
+  HeapLeakChecker check("death_trick");
+  int* foo1 = new int[70];
+  int* foo2 = new int[30];
+  Use(foo1);
+  Use(foo2);
+  delete [] bar1;
+  delete [] bar2;
+  // If this check fails, you are probably running a stripped binary
+  CHECK_EQ(check.SameHeap(), false);  // pprof checking should catch it
+  delete [] foo1;
+  delete [] foo2;
+}
+
+static void TransLeaks() {
+  new char;
+}
+
+static void DisabledLeaks() {
+  HeapLeakChecker::DisableChecksUp(1);
+  TransLeaks();
+  new int[3];
+}
+
+static void RangeDisabledLeaks() {
+  void* start_address = HeapLeakChecker::GetDisableChecksStart();
+  new int[3];
+  TransLeaks();
+  HeapLeakChecker::DisableChecksToHereFrom(start_address);
+}
+
+static void* RunDisabledLeaks(void* a) {
+  DisabledLeaks();
+  RangeDisabledLeaks();
+  return a;
+}
+
+static void ThreadDisabledLeaks() {
+  pthread_t tid;
+  pthread_attr_t attr;
+  CHECK(pthread_attr_init(&attr) == 0);
+  CHECK(pthread_create(&tid, &attr, RunDisabledLeaks, NULL) == 0);
+  void* res;
+  CHECK(pthread_join(tid, &res) == 0);
+}
+
+static void TestHeapLeakCheckerDisabling() {
+  HeapLeakChecker check("disabling");
+
+  RunDisabledLeaks(NULL);
+  RunDisabledLeaks(NULL);
+  ThreadDisabledLeaks();
+  RunDisabledLeaks(NULL);
+  ThreadDisabledLeaks();
+  ThreadDisabledLeaks();
+
+  CHECK_EQ(check.SameHeap(), true);
+}
+
+
+REGISTER_MODULE_INITIALIZER(heap_checker_unittest, {
+  HeapLeakChecker::DisableChecksIn("NamedDisabledLeaks");
+});
+
+static void NamedDisabledLeaks() {
+  // We are testing two cases in this function: calling new[] directly and
+  // calling it at one level deep (inside TransLeaks).  We want to always call
+  // TransLeaks() first, because otherwise the compiler may turn this into a
+  // tail recursion when compiling in optimized mode.  This messes up the stack
+  // trace.
+  // TODO: Is there any way to prevent this from happening in the general case
+  // (i.e. user code)?
+  TransLeaks();
+  new float[5];
+}
+
+static void NamedTwoDisabledLeaks() {
+  static bool first = true;
+  if (first) {
+    HeapLeakChecker::DisableChecksIn("NamedTwoDisabledLeaks");
+    first = false;
+  }
+  TransLeaks();
+  new double[5];
+}
+
+static void NamedThreeDisabledLeaks() {
+  TransLeaks();
+  new float[5];
+}
+
+static void* RunNamedDisabledLeaks(void* a) {
+  void* start_address = NULL;
+  if (a)  start_address = HeapLeakChecker::GetDisableChecksStart();
+
+  NamedDisabledLeaks();
+  NamedTwoDisabledLeaks();
+  NamedThreeDisabledLeaks();
+
+  // TODO(maxim): do not need this if we make pprof work in automated test runs
+  if (a)  HeapLeakChecker::DisableChecksToHereFrom(start_address);
+
+  return a;
+}
+
+static void ThreadNamedDisabledLeaks(void* a = NULL) {
+  pthread_t tid;
+  pthread_attr_t attr;
+  CHECK(pthread_attr_init(&attr) == 0);
+  CHECK(pthread_create(&tid, &attr, RunNamedDisabledLeaks, a) == 0);
+  void* res;
+  CHECK(pthread_join(tid, &res) == 0);
+}
+
+static void TestHeapLeakCheckerNamedDisabling() {
+  HeapLeakChecker::DisableChecksIn("NamedThreeDisabledLeaks");
+
+  HeapLeakChecker check("named_disabling");
+
+  RunNamedDisabledLeaks(NULL);
+  RunNamedDisabledLeaks(NULL);
+  ThreadNamedDisabledLeaks();
+  RunNamedDisabledLeaks(NULL);
+  ThreadNamedDisabledLeaks();
+  ThreadNamedDisabledLeaks();
+
+  // If this check fails, you are probably be running a stripped binary.
+  CHECK_EQ(check.SameHeap(), true);  // pprof checking should allow it
+}
+
+// The code from here to main()
+// is to test that objects that are reachable from global
+// variables are not reported as leaks,
+// with the few exceptions like multiple-inherited objects.
+
+string* live_leak = NULL;
+string* live_leak2 = new string("ss");
+vector<int>* live_leak3 = new vector<int>(10,10);
+const char* live_leak4 = new char[5];
+vector<int> live_leak5(20,10);
+const vector<int> live_leak6(30,10);
+const string* live_leak_arr1 = new string[5];
+
+class ClassA {
+ public:
+  ClassA(int a) : ptr(NULL) { }
+  mutable char* ptr;
+};
+
+const ClassA live_leak7(1);
+
+template<class C>
+class TClass {
+ public:
+  TClass(int a) : ptr(NULL) { }
+  mutable C val;
+  mutable C* ptr;
+};
+
+const TClass<string> live_leak8(1);
+
+class ClassB {
+ public:
+  ClassB() { }
+  int b[10];
+  virtual void f() { }
+  virtual ~ClassB() { }
+};
+
+class ClassB2 {
+ public:
+  ClassB2() { }
+  int b2[10];
+  virtual void f2() { }
+  virtual ~ClassB2() { }
+};
+
+class ClassD1 : public ClassB {
+  int d1[10];
+  virtual void f() { }
+};
+
+class ClassD2 : public ClassB2 {
+  int d2[10];
+  virtual void f2() { }
+};
+
+class ClassD : public ClassD1, public ClassD2 {
+  int d[10];
+  virtual void f() { }
+  virtual void f2() { }
+};
+
+ClassB* live_leak_b;
+ClassD1* live_leak_d1;
+ClassD2* live_leak_d2;
+ClassD* live_leak_d;
+
+ClassB* live_leak_b_d1;
+ClassB2* live_leak_b2_d2;
+ClassB* live_leak_b_d;
+ClassB2* live_leak_b2_d;
+
+ClassD1* live_leak_d1_d;
+ClassD2* live_leak_d2_d;
+
+static void IgnoredLeaks() {
+  int* p = new int;
+  HeapLeakChecker::IgnoreObject(p);
+  int** leak = new int*;
+  HeapLeakChecker::IgnoreObject(leak);
+  *leak = new int;
+  HeapLeakChecker::UnIgnoreObject(p);
+  delete p;
+}
+
+static void TestHeapLeakCheckerLiveness() {
+  live_leak_b = new ClassB;
+  live_leak_d1 = new ClassD1;
+  live_leak_d2 = new ClassD2;
+  live_leak_d = new ClassD;
+
+  live_leak_b_d1 = new ClassD1;
+  live_leak_b2_d2 = new ClassD2;
+  live_leak_b_d = new ClassD;
+  live_leak_b2_d = new ClassD;
+
+  live_leak_d1_d = new ClassD;
+  live_leak_d2_d = new ClassD;
+
+
+#ifndef NDEBUG
+  HeapLeakChecker::IgnoreObject((ClassD*)live_leak_b2_d);
+  HeapLeakChecker::IgnoreObject((ClassD*)live_leak_d2_d);
+    // These two do not get deleted with liveness flood
+    // because the base class pointer points inside of the objects
+    // in such cases of multiple inheritance.
+    // Luckily google code does not use multiple inheritance almost at all.
+    // Somehow this does not happen in optimized mode.
+#endif
+
+  live_leak = new string("live_leak");
+  live_leak3->insert(live_leak3->begin(), 20, 20);
+  live_leak2->append(*live_leak);
+  live_leak7.ptr = new char [77];
+  live_leak8.ptr = new string("aaa");
+  live_leak8.val = string("bbbbbb");
+
+  IgnoredLeaks();
+  IgnoredLeaks();
+  IgnoredLeaks();
+}
+
+// Check that we don't give false negatives or positives on leaks from the STL
+// allocator.
+void TestHeapLeakCheckerSTL() {
+  HeapLeakChecker stl_check("stl");
+  {
+    string x = "banana";
+    for (int i = 0; i < 10000; i++)
+      x += "na";
+  }
+  CHECK(stl_check.SameHeap());
+}
+
+void TestHeapLeakCheckerSTLInverse() {
+  HeapLeakChecker inverse_stl_checker("inverse_stl");
+  string x = "queue";
+  for (int i = 0; i < 1000; i++)
+    x += "ue";
+  CHECK_EQ(inverse_stl_checker.SameHeap(), false);
+}
+
+int main(int argc, char** argv) {
+  // This needs to be set before InternalInitStart(), which makes a local copy
+  if (getenv("PPROF_PATH"))
+    HeapLeakChecker::set_pprof_path(getenv("PPROF_PATH"));
+
+  // This needs to be set early because it determines the behaviour of
+  // InternalInitStart().
+  string heap_check_type;
+  if (getenv("HEAPCHECK_MODE"))
+    heap_check_type = getenv("HEAPCHECK_MODE");
+  else
+    heap_check_type = "strict";
+
+  HeapLeakChecker::StartFromMain(heap_check_type);
+
+  LogPrintf(INFO, "In main()");
+
+  // The following two modes test whether the whole-program leak checker
+  // appropriately detects leaks on exit.
+  if (getenv("HEAPCHECK_TEST_LEAK")) {
+    void* arr = new vector<int>(10, 10);
+    LogPrintf(INFO, "Leaking %p", arr);
+    fprintf(stdout, "PASS\n");
+    return 0;
+  }
+
+  if (getenv("HEAPCHECK_TEST_LOOP_LEAK")) {
+    void** arr1 = new void*[2];
+    void** arr2 = new void*[2];
+    arr1[1] = (void*)arr2;
+    arr2[1] = (void*)arr1;
+    LogPrintf(INFO, "Loop leaking %p and %p", arr1, arr2);
+    fprintf(stdout, "PASS\n");
+    return 0;
+  }
+
+  TestHeapLeakCheckerLiveness();
+
+  HeapProfilerStart("/tmp/leaks");
+  HeapLeakChecker heap_check("all");
+
+  TestHeapLeakChecker();
+  TestHeapLeakCheckerTrick();
+
+  TestHeapLeakCheckerDeathSimple();
+  TestHeapLeakCheckerDeathInverse();
+  TestHeapLeakCheckerDeathNoLeaks();
+  TestHeapLeakCheckerDeathCountLess();
+  TestHeapLeakCheckerDeathCountMore();
+
+  TestHeapLeakCheckerDeathTrick();
+  TestHeapLeakCheckerPProf();
+
+  TestHeapLeakCheckerDisabling();
+  TestHeapLeakCheckerNamedDisabling();
+
+  TestHeapLeakCheckerSTL();
+  TestHeapLeakCheckerSTLInverse();
+
+  int a;
+  ThreadNamedDisabledLeaks(&a);
+
+  CHECK(heap_check.SameHeap());
+
+  HeapLeakChecker::IgnoreObject(new vector<int>(10, 10));
+    // This checks both that IgnoreObject works, and
+    // and the fact that we don't drop such leaks as live for some reason.
+
+  fprintf(stdout, "PASS\n");
+  return 0;
+}
diff --git a/src/tests/profiler_unittest.cc b/src/tests/profiler_unittest.cc
new file mode 100644
index 0000000..9a8aac8
--- /dev/null
+++ b/src/tests/profiler_unittest.cc
@@ -0,0 +1,142 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Craig Silverstein
+//
+// Does some simple arithmetic and a few libc routines, so we can profile it.
+// Define WITH_THREADS to add pthread functionality as well (otherwise, btw,
+// the num_threads argument to this program is ingored).
+
+#include "google/perftools/config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include "google/profiler.h"
+
+static int result = 0;
+
+#ifdef WITH_THREADS
+#include <pthread.h>
+
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#define LOCK   pthread_mutex_lock(&mutex)    /* ignore errors; oh well */
+#define UNLOCK pthread_mutex_unlock(&mutex)  /* ignore errors; oh well */
+
+void* test_other_thread(void* data) {
+  ProfilerRegisterThread();
+
+  int iters = *(int*)data;
+  int i, m;
+  char b[128];
+  for (m = 0; m < 1000000; ++m) {          // run millions of times
+    for (i = 0; i < iters; ++i ) {
+      LOCK;
+      result ^= i;
+      UNLOCK;
+    }
+    LOCK;
+    snprintf(b, sizeof(b), "%d", result);  // get some libc action
+    UNLOCK;
+  }
+  
+  return NULL;                             // success
+}
+
+#else   /* WITH_THREADS */
+
+#define LOCK
+#define UNLOCK
+
+#endif  /* WITH_THREADS */
+
+static int test_main_thread(int iters) {
+  int i, m;
+  char b[128];
+  for (m = 0; m < 1000000; ++m) {          // run millions of times
+    for (i = 0; i < iters; ++i ) {
+      LOCK;
+      result ^= i;
+      UNLOCK;
+    }
+    LOCK;
+    snprintf(b, sizeof(b), "%d", result);  // get some libc action
+    UNLOCK;
+  }
+  return result;
+}
+
+int main(int argc, char** argv) {
+  if ( argc <= 1 ) {
+    fprintf(stderr, "USAGE: %s <iters> [num_threads] [filename]\n", argv[0]);
+    fprintf(stderr, "   iters: How many million times to run the XOR test.\n");
+    fprintf(stderr, "   num_threads: how many concurrent threads.\n");
+    fprintf(stderr, "                0 or 1 for single-threaded mode.\n");
+    fprintf(stderr, "   filename: The name of the output profile.\n");
+    fprintf(stderr, ("             If you don't specify, set CPUPROFILE "
+                     "in the environment instead!\n"));
+    return 1;
+  }
+
+  int iters = atoi(argv[1]);
+  int num_threads = 1;
+  const char* filename = NULL;
+  if (argc > 2) {
+    num_threads = atoi(argv[2]);
+  }
+  if (argc > 3) {
+    filename = argv[3];
+  }
+
+  if (filename) {
+    ProfilerStart(filename);
+  }
+
+  test_main_thread(iters);
+
+  ProfilerFlush();                           // just because we can
+
+  // The other threads, if any, will run only half as long as the main thread
+#ifdef WITH_THREADS
+  for (; num_threads > 1; --num_threads) {
+    int thread_id;
+    pthread_t thr;
+    thread_id = pthread_create(&thr, NULL, &test_other_thread, &iters);
+  }
+#endif
+
+  int r = test_main_thread(iters);
+  printf("The XOR test returns %d\n", r);
+
+  if (filename) {
+    ProfilerStop();
+  }
+
+  return 0;
+}
diff --git a/src/tests/profiler_unittest.sh b/src/tests/profiler_unittest.sh
new file mode 100755
index 0000000..91869c9
--- /dev/null
+++ b/src/tests/profiler_unittest.sh
@@ -0,0 +1,176 @@
+#!/bin/sh
+
+# Copyright (c) 2005, Google Inc.
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: Craig Silverstein
+#
+# Runs the 4 profiler unittests and makes sure their profiles look
+# appropriate.  Takes three arguments: directory holding profilerX_unittest
+# scripts, directory holding profilerX_unittest executables, and directory
+# holding pprof.
+#
+# We expect two commandline args, as described below.
+#
+# We run under the assumption that if $PROFILER1 is run with no
+# arguments, it prints a usage line of the form
+#   USAGE: <actual executable being run> [...]
+
+if [ -z "$2" ]
+then
+    echo "USAGE: $0 <unittest dir> <pprof dir>"
+    exit 1
+fi
+
+UNITTEST_DIR=$1
+PPROF=$2/pprof
+
+PROFILER1=$UNITTEST_DIR/profiler1_unittest
+PROFILER2=$UNITTEST_DIR/profiler2_unittest
+PROFILER3=$UNITTEST_DIR/profiler3_unittest
+PROFILER4=$UNITTEST_DIR/profiler4_unittest
+
+TMPDIR=/tmp/profile_info
+
+# It's meaningful to the profiler, so make sure we know its state
+unset CPUPROFILE
+
+rm -rf $TMPDIR
+mkdir $TMPDIR || exit 2
+
+num_failures=0
+
+# Takes two filenames representing profiles, with their executable scripts,
+# and a multiplier, and verifies that the 'contentful' functions in
+# each profile take the same time (possibly scaled by the given
+# multiplier).  "Same" means within 50%, after adding an noise-reducing
+# X units to each value -- we're pretty forgiving.
+VerifySimilar() {
+    prof1=$TMPDIR/$1
+    # We need to run the script with no args to get the actual exe name
+    exec1=`$2 2>&1 | awk '{print $2; exit;}'`
+    prof2=$TMPDIR/$3
+    exec2=`$4 2>&1 | awk '{print $2; exit;}'`
+    mult=$5
+
+    mthread1=`$PPROF $exec1 $prof1 | grep test_main_thread | awk '{print $1}'`
+    mthread2=`$PPROF $exec2 $prof2 | grep test_main_thread | awk '{print $1}'`
+    mthread1_plus=`expr $mthread1 + 5`
+    mthread2_plus=`expr $mthread2 + 5`
+    if [ -z "$mthread1" ] || [ -z "$mthread2" ] || \
+       [ `expr $mthread1_plus \* $mult` -gt `expr $mthread2_plus \* 2` -o \
+         `expr $mthread1_plus \* $mult \* 2` -lt `expr $mthread2_plus` ]
+    then
+	echo
+	echo ">>> profile on $exec1 vs $exec2 with multiplier $mult failed:"
+	echo "Actual times (in profiling units) were '$mthread1' vs. '$mthread2'"
+	echo
+	num_failures=`expr $num_failures + 1`
+    fi
+}
+
+# Takes a filenames representing a profile, with its executables,
+# and a multiplier, and verifies that the main-thread function takes
+# the same amount of time as the other-threads function (possibly scaled
+# by the given multiplier).  Figuring out the multiplier can be tricky,
+# since by design the main thread runs twice as long as each of the
+# 'other' threads!  In any case, "same" means within 70% -- we're pretty
+# forgiving.
+VerifyAcrossThreads() {
+    prof1=$TMPDIR/$1
+    # We need to run the script with no args to get the actual exe name
+    exec1=`$2 2>&1 | awk '{print $2; exit;}'`
+    mult=$3
+
+    mthread=`$PPROF $exec1 $prof1 | grep test_main_thread | awk '{print $1}'`
+    othread=`$PPROF $exec2 $prof2 | grep test_other_thread | awk '{print $1}'`
+    if [ -z "$mthread" ] || [ -z "$othread" ] || \
+       [ `expr $mthread \* $mult \* 3` -gt `expr $othread \* 10` -o \
+         `expr $mthread \* $mult \* 10` -lt `expr $othread \* 3` ]
+    then
+	echo
+	echo ">>> profile on $exec1 vs $exec2 with multiplier $mult failed:"
+	echo "Actual times (in profiling units) were '$mthread1' vs. '$mthread2'"
+	echo
+	num_failures=`expr $num_failures + 1`
+    fi
+}
+
+echo
+echo ">>> WARNING <<<"
+echo "This test looks at timing information to determine correctness."
+echo "If your system is loaded, the test may spuriously fail."
+echo "If the test does fail with an 'Actual times' error, try running again."
+echo
+
+# profiler1 is a non-threaded version
+$PROFILER1 50 1 $TMPDIR/p1
+$PROFILER1 100 1 $TMPDIR/p2
+VerifySimilar p1 $PROFILER1 p2 $PROFILER1 2
+
+# Verify the same thing works if we statically link
+$PROFILER2 50 1 $TMPDIR/p3
+$PROFILER2 100 1 $TMPDIR/p4
+VerifySimilar p3 $PROFILER2 p4 $PROFILER2 2
+
+# Verify the same thing works if we specify via CPUPROFILE
+CPUPROFILE=$TMPDIR/p5 $PROFILER2 50
+CPUPROFILE=$TMPDIR/p6 $PROFILER2 100
+VerifySimilar p5 $PROFILER2 p6 $PROFILER2 2
+
+# When we compile with threads, things take a lot longer even when we only use 1
+CPUPROFILE=$TMPDIR/p5b $PROFILER3 10
+CPUPROFILE=$TMPDIR/p5c $PROFILER3 20
+VerifySimilar p5b $PROFILER3 p5c $PROFILER3 2
+
+# Now try what happens when we use threads
+$PROFILER3 5 2 $TMPDIR/p7
+$PROFILER3 10 2 $TMPDIR/p8
+VerifySimilar p7 $PROFILER3 p8 $PROFILER3 2
+
+$PROFILER4 5 2 $TMPDIR/p9
+$PROFILER4 10 2 $TMPDIR/p10
+VerifySimilar p9 $PROFILER4 p10 $PROFILER4 2
+
+# More threads!
+$PROFILER4 2 3 $TMPDIR/p9
+$PROFILER4 4 3 $TMPDIR/p10
+VerifySimilar p9 $PROFILER4 p10 $PROFILER4 2
+
+# Compare how much time the main thread takes compared to the other threads
+# Recall the main thread runs twice as long as the other threads, by design.
+$PROFILER4 2 4 $TMPDIR/p11
+VerifyAcrossThreads p11 $PROFILER4 2
+
+
+rm -rf $TMPDIR      # clean up
+
+echo "Tests finished with $num_failures failures"
+exit $num_failures
diff --git a/src/tests/ptmalloc/COPYRIGHT b/src/tests/ptmalloc/COPYRIGHT
new file mode 100644
index 0000000..4615c26
--- /dev/null
+++ b/src/tests/ptmalloc/COPYRIGHT
@@ -0,0 +1,19 @@
+Copyright (c) 2001-2004 Wolfram Gloger
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that (i) the above copyright notices and this permission
+notice appear in all copies of the software and related documentation,
+and (ii) the name of Wolfram Gloger may not be used in any advertising
+or publicity relating to the software.
+
+THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
+INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
+OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
diff --git a/src/tests/ptmalloc/lran2.h b/src/tests/ptmalloc/lran2.h
new file mode 100644
index 0000000..cea9920
--- /dev/null
+++ b/src/tests/ptmalloc/lran2.h
@@ -0,0 +1,51 @@
+/* lran2.h
+ * by Wolfram Gloger 1996.
+ *
+ * A small, portable pseudo-random number generator.
+ */
+
+#ifndef _LRAN2_H
+#define _LRAN2_H
+
+#define LRAN2_MAX 714025l /* constants for portable */
+#define IA	  1366l	  /* random number generator */
+#define IC	  150889l /* (see e.g. `Numerical Recipes') */
+
+struct lran2_st {
+    long x, y, v[97];
+};
+
+static void
+lran2_init(struct lran2_st* d, long seed)
+{
+    long x;
+    int j;
+
+    x = (IC - seed) % LRAN2_MAX;
+    if(x < 0) x = -x;
+    for(j=0; j<97; j++) {
+	x = (IA*x + IC) % LRAN2_MAX;
+	d->v[j] = x;
+    }
+    d->x = (IA*x + IC) % LRAN2_MAX;
+    d->y = d->x;
+}
+
+#ifdef __GNUC__
+__inline__
+#endif
+static long
+lran2(struct lran2_st* d)
+{
+    int j = (d->y % 97);
+
+    d->y = d->v[j];
+    d->x = (IA*d->x + IC) % LRAN2_MAX;
+    d->v[j] = d->x;
+    return d->y;
+}
+
+#undef IA
+#undef IC
+
+#endif
diff --git a/src/tests/ptmalloc/malloc-machine.h b/src/tests/ptmalloc/malloc-machine.h
new file mode 100644
index 0000000..f32ca35
--- /dev/null
+++ b/src/tests/ptmalloc/malloc-machine.h
@@ -0,0 +1,132 @@
+/* Basic platform-independent macro definitions for mutexes,
+   thread-specific data and parameters for malloc.
+   Posix threads (pthreads) version.
+   Copyright (C) 2004 Wolfram Gloger <wg@malloc.de>.
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that (i) the above copyright notices and this permission
+notice appear in all copies of the software and related documentation,
+and (ii) the name of Wolfram Gloger may not be used in any advertising
+or publicity relating to the software.
+
+THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
+INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
+OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef _PTHREAD_MALLOC_MACHINE_H
+#define _PTHREAD_MALLOC_MACHINE_H
+
+#include <pthread.h>
+
+#undef thread_atfork_static
+
+/* Use fast inline spinlocks with gcc.  */
+#if (defined __i386__ || defined __x86_64__) && defined __GNUC__ && \
+    !defined USE_NO_SPINLOCKS
+
+#include <time.h>
+#include <sched.h>
+
+typedef struct {
+  volatile unsigned int lock;
+  int pad0_;
+} mutex_t;
+
+#define MUTEX_INITIALIZER          { 0 }
+#define mutex_init(m)              ((m)->lock = 0)
+static inline int mutex_lock(mutex_t *m) {
+  int cnt = 0, r;
+  struct timespec tm;
+
+  for(;;) {
+    __asm__ __volatile__
+      ("xchgl %0, %1"
+       : "=r"(r), "=m"(m->lock)
+       : "0"(1), "m"(m->lock)
+       : "memory");
+    if(!r)
+      return 0;
+    if(cnt < 50) {
+      sched_yield();
+      cnt++;
+    } else {
+      tm.tv_sec = 0;
+      tm.tv_nsec = 2000001;
+      nanosleep(&tm, NULL);
+      cnt = 0;
+    }
+  }
+}
+static inline int mutex_trylock(mutex_t *m) {
+  int r;
+
+  __asm__ __volatile__
+    ("xchgl %0, %1"
+     : "=r"(r), "=m"(m->lock)
+     : "0"(1), "m"(m->lock)
+     : "memory");
+  return r;
+}
+static inline int mutex_unlock(mutex_t *m) {
+  m->lock = 0;
+  __asm __volatile ("" : "=m" (m->lock) : "0" (m->lock));
+  return 0;
+}
+
+#else
+
+/* Normal pthread mutex.  */
+typedef pthread_mutex_t mutex_t;
+
+#define MUTEX_INITIALIZER          PTHREAD_MUTEX_INITIALIZER
+#define mutex_init(m)              pthread_mutex_init(m, NULL)
+#define mutex_lock(m)              pthread_mutex_lock(m)
+#define mutex_trylock(m)           pthread_mutex_trylock(m)
+#define mutex_unlock(m)            pthread_mutex_unlock(m)
+
+#endif /* (__i386__ || __x86_64__) && __GNUC__ && !USE_NO_SPINLOCKS */
+
+/* thread specific data */
+#if defined(__sgi) || defined(USE_TSD_DATA_HACK)
+
+/* Hack for thread-specific data, e.g. on Irix 6.x.  We can't use
+   pthread_setspecific because that function calls malloc() itself.
+   The hack only works when pthread_t can be converted to an integral
+   type. */
+
+typedef void *tsd_key_t[256];
+#define tsd_key_create(key, destr) do { \
+  int i; \
+  for(i=0; i<256; i++) (*key)[i] = 0; \
+} while(0)
+#define tsd_setspecific(key, data) \
+ (key[(unsigned)pthread_self() % 256] = (data))
+#define tsd_getspecific(key, vptr) \
+ (vptr = key[(unsigned)pthread_self() % 256])
+
+#else
+
+typedef pthread_key_t tsd_key_t;
+
+#define tsd_key_create(key, destr) pthread_key_create(key, destr)
+#define tsd_setspecific(key, data) pthread_setspecific(key, data)
+#define tsd_getspecific(key, vptr) (vptr = pthread_getspecific(key))
+
+#endif
+
+/* at fork */
+#define thread_atfork(prepare, parent, child) \
+                                   pthread_atfork(prepare, parent, child)
+
+//#include <sysdeps/generic/malloc-machine.h>
+
+#endif /* !defined(_MALLOC_MACHINE_H) */
diff --git a/src/tests/ptmalloc/t-test.h b/src/tests/ptmalloc/t-test.h
new file mode 100644
index 0000000..a52829a
--- /dev/null
+++ b/src/tests/ptmalloc/t-test.h
@@ -0,0 +1,143 @@
+/*
+ * $Id: t-test.h,v 1.1 2004/11/04 14:32:21 wg Exp $
+ * by Wolfram Gloger 1996.
+ * Common data structures and functions for testing malloc performance.
+ */
+
+/* Testing level */
+#ifndef TEST
+#define TEST 0
+#endif
+
+/* For large allocation sizes, the time required by copying in
+   realloc() can dwarf all other execution times.  Avoid this with a
+   size threshold. */
+#ifndef REALLOC_MAX
+#define REALLOC_MAX	2000
+#endif
+
+struct bin {
+	unsigned char *ptr;
+	unsigned long size;
+};
+
+#if TEST > 0
+
+static void
+mem_init(unsigned char *ptr, unsigned long size)
+{
+	unsigned long i, j;
+
+	if(size == 0) return;
+	for(i=0; i<size; i+=2047) {
+		j = (unsigned long)ptr ^ i;
+		ptr[i] = ((j ^ (j>>8)) & 0xFF);
+	}
+	j = (unsigned long)ptr ^ (size-1);
+	ptr[size-1] = ((j ^ (j>>8)) & 0xFF);
+}
+
+static int
+mem_check(unsigned char *ptr, unsigned long size)
+{
+	unsigned long i, j;
+
+	if(size == 0) return 0;
+	for(i=0; i<size; i+=2047) {
+		j = (unsigned long)ptr ^ i;
+		if(ptr[i] != ((j ^ (j>>8)) & 0xFF)) return 1;
+	}
+	j = (unsigned long)ptr ^ (size-1);
+	if(ptr[size-1] != ((j ^ (j>>8)) & 0xFF)) return 2;
+	return 0;
+}
+
+static int
+zero_check(unsigned* ptr, unsigned long size)
+{
+	unsigned char* ptr2;
+
+	while(size >= sizeof(*ptr)) {
+		if(*ptr++ != 0)
+			return -1;
+		size -= sizeof(*ptr);
+	}
+	ptr2 = (unsigned char*)ptr;
+	while(size > 0) {
+		if(*ptr2++ != 0)
+			return -1;
+		--size;
+	}
+	return 0;
+}
+
+#endif /* TEST > 0 */
+
+/* Allocate a bin with malloc(), realloc() or memalign().  r must be a
+   random number >= 1024. */
+
+static void
+bin_alloc(struct bin *m, unsigned long size, int r)
+{
+#if TEST > 0
+	if(mem_check(m->ptr, m->size)) {
+		printf("memory corrupt!\n");
+		exit(1);
+	}
+#endif
+	r %= 1024;
+	/*printf("%d ", r);*/
+	if(r < 4) { /* memalign */
+		if(m->size > 0) free(m->ptr);
+		m->ptr = (unsigned char *)memalign(sizeof(int) << r, size);
+	} else if(r < 20) { /* calloc */
+		if(m->size > 0) free(m->ptr);
+		m->ptr = (unsigned char *)calloc(size, 1);
+#if TEST > 0
+		if(zero_check((unsigned*)m->ptr, size)) {
+			long i;
+			for(i=0; i<size; i++)
+				if(m->ptr[i] != 0)
+					break;
+			printf("calloc'ed memory non-zero (ptr=%p, i=%ld)!\n", m->ptr, i);
+			exit(1);
+		}
+#endif
+	} else if(r < 100 && m->size < REALLOC_MAX) { /* realloc */
+		if(m->size == 0) m->ptr = NULL;
+		m->ptr = realloc(m->ptr, size);
+	} else { /* plain malloc */
+		if(m->size > 0) free(m->ptr);
+		m->ptr = (unsigned char *)malloc(size);
+	}
+	if(!m->ptr) {
+		printf("out of memory (r=%d, size=%ld)!\n", r, (long)size);
+		exit(1);
+	}
+	m->size = size;
+#if TEST > 0
+	mem_init(m->ptr, m->size);
+#endif
+}
+
+/* Free a bin. */
+
+static void
+bin_free(struct bin *m)
+{
+	if(m->size == 0) return;
+#if TEST > 0
+	if(mem_check(m->ptr, m->size)) {
+		printf("memory corrupt!\n");
+		exit(1);
+	}
+#endif
+	free(m->ptr);
+	m->size = 0;
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/src/tests/ptmalloc/t-test1.c b/src/tests/ptmalloc/t-test1.c
new file mode 100644
index 0000000..15dc7c6
--- /dev/null
+++ b/src/tests/ptmalloc/t-test1.c
@@ -0,0 +1,285 @@
+/*
+ * $Id: t-test1.c,v 1.2 2004/11/04 14:58:45 wg Exp $
+ * by Wolfram Gloger 1996-1999, 2001, 2004
+ * A multi-thread test for malloc performance, maintaining one pool of
+ * allocated bins per thread.
+ */
+
+#if (defined __STDC__ && __STDC__) || defined __cplusplus
+# include <stdlib.h>
+#endif
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#if !USE_MALLOC
+#include <malloc.h>
+#else
+#include "malloc.h"
+#endif
+
+#include "lran2.h"
+#include "t-test.h"
+
+struct user_data {
+	int bins, max;
+	unsigned long size;
+	long seed;
+};
+#include "thread-st.h"
+
+#define N_TOTAL		10
+#ifndef N_THREADS
+#define N_THREADS	2
+#endif
+#ifndef N_TOTAL_PRINT
+#define N_TOTAL_PRINT 50
+#endif
+#ifndef MEMORY
+#define MEMORY		8000000l
+#endif
+#define SIZE		10000
+#define I_MAX		10000
+#define ACTIONS_MAX	30
+#ifndef TEST_FORK
+#define TEST_FORK 0
+#endif
+
+#define RANDOM(d,s)	(lran2(d) % (s))
+
+struct bin_info {
+	struct bin *m;
+	unsigned long size, bins;
+};
+
+#if TEST > 0
+
+void
+bin_test(struct bin_info *p)
+{
+	int b;
+
+	for(b=0; b<p->bins; b++) {
+		if(mem_check(p->m[b].ptr, p->m[b].size)) {
+			printf("memory corrupt!\n");
+			abort();
+		}
+	}
+}
+
+#endif
+
+void
+malloc_test(struct thread_st *st)
+{
+	int b, i, j, actions, pid = 1;
+	struct bin_info p;
+	struct lran2_st ld; /* data for random number generator */
+
+	lran2_init(&ld, st->u.seed);
+#if TEST_FORK>0
+	if(RANDOM(&ld, TEST_FORK) == 0) {
+		int status;
+
+#if !USE_THR
+		pid = fork();
+#else
+		pid = fork1();
+#endif
+		if(pid > 0) {
+		    /*printf("forked, waiting for %d...\n", pid);*/
+			waitpid(pid, &status, 0);
+			printf("done with %d...\n", pid);
+			if(!WIFEXITED(status)) {
+				printf("child term with signal %d\n", WTERMSIG(status));
+				exit(1);
+			}
+			return;
+		}
+		exit(0);
+	}
+#endif
+	p.m = (struct bin *)malloc(st->u.bins*sizeof(*p.m));
+	p.bins = st->u.bins;
+	p.size = st->u.size;
+	for(b=0; b<p.bins; b++) {
+		p.m[b].size = 0;
+		p.m[b].ptr = NULL;
+		if(RANDOM(&ld, 2) == 0)
+			bin_alloc(&p.m[b], RANDOM(&ld, p.size) + 1, lran2(&ld));
+	}
+	for(i=0; i<=st->u.max;) {
+#if TEST > 1
+		bin_test(&p);
+#endif
+		actions = RANDOM(&ld, ACTIONS_MAX);
+#if USE_MALLOC && MALLOC_DEBUG
+		if(actions < 2) { mallinfo(); }
+#endif
+		for(j=0; j<actions; j++) {
+			b = RANDOM(&ld, p.bins);
+			bin_free(&p.m[b]);
+		}
+		i += actions;
+		actions = RANDOM(&ld, ACTIONS_MAX);
+		for(j=0; j<actions; j++) {
+			b = RANDOM(&ld, p.bins);
+			bin_alloc(&p.m[b], RANDOM(&ld, p.size) + 1, lran2(&ld));
+#if TEST > 2
+			bin_test(&p);
+#endif
+		}
+#if 0 /* Test illegal free()s while setting MALLOC_CHECK_ */
+		for(j=0; j<8; j++) {
+			b = RANDOM(&ld, p.bins);
+			if(p.m[b].ptr) {
+			  int offset = (RANDOM(&ld, 11) - 5)*8;
+			  char *rogue = (char*)(p.m[b].ptr) + offset;
+			  /*printf("p=%p rogue=%p\n", p.m[b].ptr, rogue);*/
+			  free(rogue);
+			}
+		}
+#endif
+		i += actions;
+	}
+	for(b=0; b<p.bins; b++)
+		bin_free(&p.m[b]);
+	free(p.m);
+	if(pid == 0)
+		exit(0);
+}
+
+int n_total=0, n_total_max=N_TOTAL, n_running;
+
+int
+my_end_thread(struct thread_st *st)
+{
+	/* Thread st has finished.  Start a new one. */
+#if 0
+	printf("Thread %lx terminated.\n", (long)st->id);
+#endif
+	if(n_total >= n_total_max) {
+		n_running--;
+	} else if(st->u.seed++, thread_create(st)) {
+		printf("Creating thread #%d failed.\n", n_total);
+	} else {
+		n_total++;
+		if(n_total%N_TOTAL_PRINT == 0)
+			printf("n_total = %d\n", n_total);
+	}
+	return 0;
+}
+
+#if 0
+/* Protect address space for allocation of n threads by LinuxThreads.  */
+static void
+protect_stack(int n)
+{
+	char buf[2048*1024];
+	char* guard;
+	size_t guard_size = 2*2048*1024UL*(n+2);
+
+	buf[0] = '\0';
+	guard = (char*)(((unsigned long)buf - 4096)& ~4095UL) - guard_size;
+	printf("Setting up stack guard at %p\n", guard);
+	if(mmap(guard, guard_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
+			-1, 0)
+	   != guard)
+		printf("failed!\n");
+}
+#endif
+
+int
+main(int argc, char *argv[])
+{
+	int i, bins;
+	int n_thr=N_THREADS;
+	int i_max=I_MAX;
+	unsigned long size=SIZE;
+	struct thread_st *st;
+
+#if USE_MALLOC && USE_STARTER==2
+	ptmalloc_init();
+	printf("ptmalloc_init\n");
+#endif
+
+	if(argc > 1) n_total_max = atoi(argv[1]);
+	if(n_total_max < 1) n_thr = 1;
+	if(argc > 2) n_thr = atoi(argv[2]);
+	if(n_thr < 1) n_thr = 1;
+	if(n_thr > 100) n_thr = 100;
+	if(argc > 3) i_max = atoi(argv[3]);
+
+	if(argc > 4) size = atol(argv[4]);
+	if(size < 2) size = 2;
+
+	bins = MEMORY/(size*n_thr);
+	if(argc > 5) bins = atoi(argv[5]);
+	if(bins < 4) bins = 4;
+
+	/*protect_stack(n_thr);*/
+
+	thread_init();
+	printf("total=%d threads=%d i_max=%d size=%ld bins=%d\n",
+		   n_total_max, n_thr, i_max, size, bins);
+
+	st = (struct thread_st *)malloc(n_thr*sizeof(*st));
+	if(!st) exit(-1);
+
+#if !defined NO_THREADS && (defined __sun__ || defined sun)
+	/* I know of no other way to achieve proper concurrency with Solaris. */
+	thr_setconcurrency(n_thr);
+#endif
+
+	/* Start all n_thr threads. */
+	for(i=0; i<n_thr; i++) {
+		st[i].u.bins = bins;
+		st[i].u.max = i_max;
+		st[i].u.size = size;
+		st[i].u.seed = ((long)i_max*size + i) ^ bins;
+		st[i].sp = 0;
+		st[i].func = malloc_test;
+		if(thread_create(&st[i])) {
+			printf("Creating thread #%d failed.\n", i);
+			n_thr = i;
+			break;
+		}
+		printf("Created thread %lx.\n", (long)st[i].id);
+	}
+
+	/* Start an extra thread so we don't run out of stacks. */
+	if(0) {
+		struct thread_st lst;
+		lst.u.bins = 10; lst.u.max = 20; lst.u.size = 8000; lst.u.seed = 8999;
+		lst.sp = 0;
+		lst.func = malloc_test;
+		if(thread_create(&lst)) {
+			printf("Creating thread #%d failed.\n", i);
+		} else {
+			wait_for_thread(&lst, 1, NULL);
+		}
+	}
+
+	for(n_running=n_total=n_thr; n_running>0;) {
+		wait_for_thread(st, n_thr, my_end_thread);
+	}
+	for(i=0; i<n_thr; i++) {
+		free(st[i].sp);
+	}
+	free(st);
+#if USE_MALLOC
+	malloc_stats();
+#endif
+	printf("Done.\n");
+	return 0;
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/src/tests/ptmalloc/t-test2.c b/src/tests/ptmalloc/t-test2.c
new file mode 100644
index 0000000..9620563
--- /dev/null
+++ b/src/tests/ptmalloc/t-test2.c
@@ -0,0 +1,231 @@
+/*
+ * $Id: t-test2.c,v 1.3 2004/11/04 15:01:05 wg Exp $
+ * by Wolfram Gloger 1996-1999, 2001, 2004
+ * A multi-thread test for malloc performance, maintaining a single
+ * global pool of allocated bins.
+ */
+
+#if (defined __STDC__ && __STDC__) || defined __cplusplus
+# include <stdlib.h>
+#endif
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#if !USE_MALLOC
+#include <malloc.h>
+#else
+#include "malloc.h"
+#endif
+
+#include "lran2.h"
+#include "t-test.h"
+
+struct user_data {
+	int max;
+	unsigned long size;
+	long seed;
+};
+#include "thread-st.h"
+#include "malloc-machine.h" /* for mutex */
+
+#define N_TOTAL		10
+#ifndef N_THREADS
+#define N_THREADS	2
+#endif
+#ifndef N_TOTAL_PRINT
+#define N_TOTAL_PRINT 50
+#endif
+#define STACKSIZE	32768
+#ifndef MEMORY
+#define MEMORY		8000000l
+#endif
+#define SIZE		10000
+#define I_MAX		10000
+#define BINS_PER_BLOCK 20
+
+#define RANDOM(d,s)	(lran2(d) % (s))
+
+struct block {
+	struct bin b[BINS_PER_BLOCK];
+	mutex_t mutex;
+} *blocks;
+
+int n_blocks;
+
+#if TEST > 0
+
+void
+bin_test(void)
+{
+	int b, i;
+
+	for(b=0; b<n_blocks; b++) {
+		mutex_lock(&blocks[b].mutex);
+		for(i=0; i<BINS_PER_BLOCK; i++) {
+			if(mem_check(blocks[b].b[i].ptr, blocks[b].b[i].size)) {
+				printf("memory corrupt!\n");
+				exit(1);
+			}
+		}
+		mutex_unlock(&blocks[b].mutex);
+	}
+}
+
+#endif
+
+void
+malloc_test(struct thread_st *st)
+{
+	struct block *bl;
+	int i, b, r;
+	struct lran2_st ld; /* data for random number generator */
+	unsigned long rsize[BINS_PER_BLOCK];
+	int rnum[BINS_PER_BLOCK];
+
+	lran2_init(&ld, st->u.seed);
+	for(i=0; i<=st->u.max;) {
+#if TEST > 1
+		bin_test();
+#endif
+		bl = &blocks[RANDOM(&ld, n_blocks)];
+		r = RANDOM(&ld, 1024);
+		if(r < 200) { /* free only */
+			mutex_lock(&bl->mutex);
+			for(b=0; b<BINS_PER_BLOCK; b++)
+				bin_free(&bl->b[b]);
+			mutex_unlock(&bl->mutex);
+			i += BINS_PER_BLOCK;
+		} else { /* alloc/realloc */
+			/* Generate random numbers in advance. */
+			for(b=0; b<BINS_PER_BLOCK; b++) {
+				rsize[b] = RANDOM(&ld, st->u.size) + 1;
+				rnum[b] = lran2(&ld);
+			}
+			mutex_lock(&bl->mutex);
+			for(b=0; b<BINS_PER_BLOCK; b++)
+				bin_alloc(&bl->b[b], rsize[b], rnum[b]);
+			mutex_unlock(&bl->mutex);
+			i += BINS_PER_BLOCK;
+		}
+#if TEST > 2
+		bin_test();
+#endif
+	}
+}
+
+int n_total=0, n_total_max=N_TOTAL, n_running;
+
+int
+my_end_thread(struct thread_st *st)
+{
+	/* Thread st has finished.  Start a new one. */
+#if 0
+	printf("Thread %lx terminated.\n", (long)st->id);
+#endif
+	if(n_total >= n_total_max) {
+		n_running--;
+	} else if(st->u.seed++, thread_create(st)) {
+		printf("Creating thread #%d failed.\n", n_total);
+	} else {
+		n_total++;
+		if(n_total%N_TOTAL_PRINT == 0)
+			printf("n_total = %d\n", n_total);
+	}
+	return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+	int i, j, bins;
+	int n_thr=N_THREADS;
+	int i_max=I_MAX;
+	unsigned long size=SIZE;
+	struct thread_st *st;
+
+#if USE_MALLOC && USE_STARTER==2
+	ptmalloc_init();
+	printf("ptmalloc_init\n");
+#endif
+
+	if(argc > 1) n_total_max = atoi(argv[1]);
+	if(n_total_max < 1) n_thr = 1;
+	if(argc > 2) n_thr = atoi(argv[2]);
+	if(n_thr < 1) n_thr = 1;
+	if(n_thr > 100) n_thr = 100;
+	if(argc > 3) i_max = atoi(argv[3]);
+
+	if(argc > 4) size = atol(argv[4]);
+	if(size < 2) size = 2;
+
+	bins = MEMORY/size;
+	if(argc > 5) bins = atoi(argv[5]);
+	if(bins < BINS_PER_BLOCK) bins = BINS_PER_BLOCK;
+
+	n_blocks = bins/BINS_PER_BLOCK;
+	blocks = (struct block *)malloc(n_blocks*sizeof(*blocks));
+	if(!blocks)
+		exit(1);
+
+	thread_init();
+	printf("total=%d threads=%d i_max=%d size=%ld bins=%d\n",
+		   n_total_max, n_thr, i_max, size, n_blocks*BINS_PER_BLOCK);
+
+	for(i=0; i<n_blocks; i++) {
+		mutex_init(&blocks[i].mutex);
+		for(j=0; j<BINS_PER_BLOCK; j++) blocks[i].b[j].size = 0;
+	}
+
+	st = (struct thread_st *)malloc(n_thr*sizeof(*st));
+	if(!st) exit(-1);
+
+#if !defined NO_THREADS && (defined __sun__ || defined sun)
+	/* I know of no other way to achieve proper concurrency with Solaris. */
+	thr_setconcurrency(n_thr);
+#endif
+
+	/* Start all n_thr threads. */
+	for(i=0; i<n_thr; i++) {
+		st[i].u.max = i_max;
+		st[i].u.size = size;
+		st[i].u.seed = ((long)i_max*size + i) ^ n_blocks;
+		st[i].sp = 0;
+		st[i].func = malloc_test;
+		if(thread_create(&st[i])) {
+			printf("Creating thread #%d failed.\n", i);
+			n_thr = i;
+			break;
+		}
+		printf("Created thread %lx.\n", (long)st[i].id);
+	}
+
+	for(n_running=n_total=n_thr; n_running>0;) {
+		wait_for_thread(st, n_thr, my_end_thread);
+	}
+
+	for(i=0; i<n_blocks; i++) {
+		for(j=0; j<BINS_PER_BLOCK; j++)
+			bin_free(&blocks[i].b[j]);
+	}
+
+	for(i=0; i<n_thr; i++) {
+		free(st[i].sp);
+	}
+	free(st);
+	free(blocks);
+#if USE_MALLOC
+	malloc_stats();
+#endif
+	printf("Done.\n");
+	return 0;
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/src/tests/ptmalloc/thread-m.h b/src/tests/ptmalloc/thread-m.h
new file mode 100644
index 0000000..f1bf680
--- /dev/null
+++ b/src/tests/ptmalloc/thread-m.h
@@ -0,0 +1,233 @@
+/* Basic platform-independent macro definitions for mutexes and
+   thread-specific data.
+   Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Wolfram Gloger <wmglo@dent.med.uni-muenchen.de>, 1996.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* $Id: thread-m.h,v 1.1.1.4 1999/11/13 15:44:12 wg Exp $
+   One out of _LIBC, USE_PTHREADS, USE_THR or USE_SPROC should be
+   defined, otherwise the token NO_THREADS and dummy implementations
+   of the macros will be defined.  */
+
+#ifndef _THREAD_M_H
+#define _THREAD_M_H
+
+#undef thread_atfork_static
+
+#if defined(_LIBC) /* The GNU C library, a special case of Posix threads */
+
+#include <bits/libc-lock.h>
+
+#ifdef PTHREAD_MUTEX_INITIALIZER
+
+typedef pthread_t thread_id;
+
+/* mutex */
+typedef pthread_mutex_t	mutex_t;
+
+#define MUTEX_INITIALIZER	PTHREAD_MUTEX_INITIALIZER
+
+#define mutex_init(m)		\
+   (__pthread_mutex_init != NULL ? __pthread_mutex_init (m, NULL) : 0)
+#define mutex_lock(m)		\
+   (__pthread_mutex_lock != NULL ? __pthread_mutex_lock (m) : 0)
+#define mutex_trylock(m)	\
+   (__pthread_mutex_trylock != NULL ? __pthread_mutex_trylock (m) : 0)
+#define mutex_unlock(m)		\
+   (__pthread_mutex_unlock != NULL ? __pthread_mutex_unlock (m) : 0)
+
+#define thread_atfork(prepare, parent, child) \
+   (__pthread_atfork != NULL ? __pthread_atfork(prepare, parent, child) : 0)
+
+#elif defined(MUTEX_INITIALIZER)
+/* Assume hurd, with cthreads */
+
+/* Cthreads `mutex_t' is a pointer to a mutex, and malloc wants just the
+   mutex itself.  */
+#undef mutex_t
+#define mutex_t struct mutex
+
+#undef mutex_init
+#define mutex_init(m) (__mutex_init(m), 0)
+
+#undef mutex_lock
+#define mutex_lock(m) (__mutex_lock(m), 0)
+
+#undef mutex_unlock
+#define mutex_unlock(m) (__mutex_unlock(m), 0)
+
+#define mutex_trylock(m) (!__mutex_trylock(m))
+
+#define thread_atfork(prepare, parent, child) do {} while(0)
+#define thread_atfork_static(prepare, parent, child) \
+ text_set_element(_hurd_fork_prepare_hook, prepare); \
+ text_set_element(_hurd_fork_parent_hook, parent); \
+ text_set_element(_hurd_fork_child_hook, child);
+
+/* No we're *not* using pthreads.  */
+#define __pthread_initialize ((void (*)(void))0)
+
+#else
+
+#define NO_THREADS
+
+#endif /* MUTEX_INITIALIZER && PTHREAD_MUTEX_INITIALIZER */
+
+#ifndef NO_THREADS
+
+/* thread specific data for glibc */
+
+#include <bits/libc-tsd.h>
+
+typedef int tsd_key_t[0];       /* no key data structure, libc magic does it */
+__libc_tsd_define (, MALLOC)    /* declaration/common definition */
+#define tsd_key_create(key, destr)      ((void) (key))
+#define tsd_setspecific(key, data)      __libc_tsd_set (MALLOC, (data))
+#define tsd_getspecific(key, vptr)      ((vptr) = __libc_tsd_get (MALLOC))
+
+#endif
+
+#elif defined(USE_PTHREADS) /* Posix threads */
+
+#include <pthread.h>
+
+typedef pthread_t thread_id;
+
+/* mutex */
+typedef pthread_mutex_t mutex_t;
+
+#define MUTEX_INITIALIZER          PTHREAD_MUTEX_INITIALIZER
+#define mutex_init(m)              pthread_mutex_init(m, NULL)
+#define mutex_lock(m)              pthread_mutex_lock(m)
+#define mutex_trylock(m)           pthread_mutex_trylock(m)
+#define mutex_unlock(m)            pthread_mutex_unlock(m)
+
+/* thread specific data */
+#if defined(__sgi) || defined(USE_TSD_DATA_HACK)
+
+/* Hack for thread-specific data, e.g. on Irix 6.x.  We can't use
+   pthread_setspecific because that function calls malloc() itself.
+   The hack only works when pthread_t can be converted to an integral
+   type. */
+
+typedef void *tsd_key_t[256];
+#define tsd_key_create(key, destr) do { \
+  int i; \
+  for(i=0; i<256; i++) (*key)[i] = 0; \
+} while(0)
+#define tsd_setspecific(key, data) \
+ (key[(unsigned)pthread_self() % 256] = (data))
+#define tsd_getspecific(key, vptr) \
+ (vptr = key[(unsigned)pthread_self() % 256])
+
+#else
+
+typedef pthread_key_t tsd_key_t;
+
+#define tsd_key_create(key, destr) pthread_key_create(key, destr)
+#define tsd_setspecific(key, data) pthread_setspecific(key, data)
+#define tsd_getspecific(key, vptr) (vptr = pthread_getspecific(key))
+
+#endif
+
+/* at fork */
+#define thread_atfork(prepare, parent, child) \
+                                   pthread_atfork(prepare, parent, child)
+
+#elif USE_THR /* Solaris threads */
+
+#include <thread.h>
+
+typedef thread_t thread_id;
+
+#define MUTEX_INITIALIZER          { 0 }
+#define mutex_init(m)              mutex_init(m, USYNC_THREAD, NULL)
+
+/*
+ * Hack for thread-specific data on Solaris.  We can't use thr_setspecific
+ * because that function calls malloc() itself.
+ */
+typedef void *tsd_key_t[256];
+#define tsd_key_create(key, destr) do { \
+  int i; \
+  for(i=0; i<256; i++) (*key)[i] = 0; \
+} while(0)
+#define tsd_setspecific(key, data) (key[(unsigned)thr_self() % 256] = (data))
+#define tsd_getspecific(key, vptr) (vptr = key[(unsigned)thr_self() % 256])
+
+#define thread_atfork(prepare, parent, child) do {} while(0)
+
+#elif USE_SPROC /* SGI sproc() threads */
+
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/prctl.h>
+#include <abi_mutex.h>
+
+typedef int thread_id;
+
+typedef abilock_t mutex_t;
+
+#define MUTEX_INITIALIZER          { 0 }
+#define mutex_init(m)              init_lock(m)
+#define mutex_lock(m)              (spin_lock(m), 0)
+#define mutex_trylock(m)           acquire_lock(m)
+#define mutex_unlock(m)            release_lock(m)
+
+typedef int tsd_key_t;
+int tsd_key_next;
+#define tsd_key_create(key, destr) ((*key) = tsd_key_next++)
+#define tsd_setspecific(key, data) (((void **)(&PRDA->usr_prda))[key] = data)
+#define tsd_getspecific(key, vptr) (vptr = ((void **)(&PRDA->usr_prda))[key])
+
+#define thread_atfork(prepare, parent, child) do {} while(0)
+
+#else /* no _LIBC or USE_... are defined */
+
+#define NO_THREADS
+
+#endif /* defined(_LIBC) */
+
+#ifdef NO_THREADS /* No threads, provide dummy macros */
+
+typedef int thread_id;
+
+/* The mutex functions used to do absolutely nothing, i.e. lock,
+   trylock and unlock would always just return 0.  However, even
+   without any concurrently active threads, a mutex can be used
+   legitimately as an `in use' flag.  To make the code that is
+   protected by a mutex async-signal safe, these macros would have to
+   be based on atomic test-and-set operations, for example. */
+typedef int mutex_t;
+
+#define MUTEX_INITIALIZER          0
+#define mutex_init(m)              (*(m) = 0)
+#define mutex_lock(m)              ((*(m) = 1), 0)
+#define mutex_trylock(m)           (*(m) ? 1 : ((*(m) = 1), 0))
+#define mutex_unlock(m)            (*(m) = 0)
+
+typedef void *tsd_key_t;
+#define tsd_key_create(key, destr) do {} while(0)
+#define tsd_setspecific(key, data) ((key) = (data))
+#define tsd_getspecific(key, vptr) (vptr = (key))
+
+#define thread_atfork(prepare, parent, child) do {} while(0)
+
+#endif /* defined(NO_THREADS) */
+
+#endif /* !defined(_THREAD_M_H) */
diff --git a/src/tests/ptmalloc/thread-st.h b/src/tests/ptmalloc/thread-st.h
new file mode 100644
index 0000000..f97a0a3
--- /dev/null
+++ b/src/tests/ptmalloc/thread-st.h
@@ -0,0 +1,111 @@
+/*
+ * $Id: thread-st.h$
+ * pthread version
+ * by Wolfram Gloger 2004
+ */
+
+#include <pthread.h>
+#include <stdio.h>
+
+pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#ifndef USE_PTHREADS_STACKS
+#define USE_PTHREADS_STACKS 0
+#endif
+
+#ifndef STACKSIZE
+#define STACKSIZE	32768
+#endif
+
+struct thread_st {
+	char *sp;							/* stack pointer, can be 0 */
+	void (*func)(struct thread_st* st);	/* must be set by user */
+	pthread_t id;
+	int flags;
+	struct user_data u;
+};
+
+static void
+thread_init(void)
+{
+	printf("Using posix threads.\n");
+	pthread_cond_init(&finish_cond, NULL);
+	pthread_mutex_init(&finish_mutex, NULL);
+}
+
+static void *
+thread_wrapper(void *ptr)
+{
+	struct thread_st *st = (struct thread_st*)ptr;
+
+	/*printf("begin %p\n", st->sp);*/
+	st->func(st);
+	pthread_mutex_lock(&finish_mutex);
+	st->flags = 1;
+	pthread_mutex_unlock(&finish_mutex);
+	pthread_cond_signal(&finish_cond);
+	/*printf("end %p\n", st->sp);*/
+	return NULL;
+}
+
+/* Create a thread. */
+static int
+thread_create(struct thread_st *st)
+{
+	st->flags = 0;
+	{
+		pthread_attr_t* attr_p = 0;
+#if USE_PTHREADS_STACKS
+		pthread_attr_t attr;
+
+		pthread_attr_init (&attr);
+		if(!st->sp)
+			st->sp = malloc(STACKSIZE+16);
+		if(!st->sp)
+			return -1;
+		if(pthread_attr_setstacksize(&attr, STACKSIZE))
+			fprintf(stderr, "error setting stacksize");
+		else
+			pthread_attr_setstackaddr(&attr, st->sp + STACKSIZE);
+		/*printf("create %p\n", st->sp);*/
+		attr_p = &attr;
+#endif
+		return pthread_create(&st->id, attr_p, thread_wrapper, st);
+	}
+	return 0;
+}
+
+/* Wait for one of several subthreads to finish. */
+static void
+wait_for_thread(struct thread_st st[], int n_thr,
+				int (*end_thr)(struct thread_st*))
+{
+	int i;
+
+	pthread_mutex_lock(&finish_mutex);
+	for(;;) {
+		int term = 0;
+		for(i=0; i<n_thr; i++)
+			if(st[i].flags) {
+				/*printf("joining %p\n", st[i].sp);*/
+				if(pthread_join(st[i].id, NULL) == 0) {
+					st[i].flags = 0;
+					if(end_thr)
+						end_thr(&st[i]);
+				} else
+					fprintf(stderr, "can't join\n");
+				++term;
+			}
+		if(term > 0)
+			break;
+		pthread_cond_wait(&finish_cond, &finish_mutex);
+	}
+	pthread_mutex_unlock(&finish_mutex);
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * End:
+ */
diff --git a/src/tests/stacktrace_unittest.cc b/src/tests/stacktrace_unittest.cc
new file mode 100644
index 0000000..ee2f126
--- /dev/null
+++ b/src/tests/stacktrace_unittest.cc
@@ -0,0 +1,91 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "google/perftools/config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include "base/commandlineflags.h"
+#include "base/logging.h"
+#include "google/stacktrace.h"
+
+#ifdef HAVE_EXECINFO_H
+#include <execinfo.h>
+#endif
+
+void CheckStackTrace(int i);
+
+/* Obtain a backtrace, verify that we are the great-great-grandchild of
+ * CheckStackTrace, and maybe print the backtrace to stdout. 
+ */
+void CheckStackTraceLeaf(void) {
+  const int STACK_LEN = 10;
+  void *stack[STACK_LEN];
+  int size;
+
+  size = GetStackTrace(stack, STACK_LEN, 0);
+  printf("Obtained %d stack frames.\n", size);
+  CHECK_LE(size, STACK_LEN);
+  
+  // for some reason, CheckStackTraceLeaf doesn't show up in the backtrace
+  // stack[size - 1] is in CheckStackTrace4
+  // stack[size - 2] is in CheckStackTrace3
+  // stack[size - 3] is in CheckStackTrace2
+  // stack[size - 4] is in CheckStackTrace1
+  // stack[size - 5] is in CheckStackTrace
+  CHECK_GE(stack[size - 4], (void*) &CheckStackTrace);
+  CHECK_LE(stack[size - 4], (char*) &CheckStackTrace + 0x40);	// assume function is only 0x40 bytes long
+
+
+#ifdef HAVE_EXECINFO_H
+  {
+    char **strings = backtrace_symbols(stack, size);
+    
+    for (int i = 0; i < size; i++)
+      printf("%s\n", strings[i]);
+    printf("CheckStackTrace() addr: %p\n", &CheckStackTrace);
+    free(strings);
+  }
+#endif
+
+}
+
+/* Dummy functions to make the backtrace more interesting. */
+void CheckStackTrace4(int i) { for (int j = i; j >= 0; j--) CheckStackTraceLeaf(); }
+void CheckStackTrace3(int i) { for (int j = i; j >= 0; j--) CheckStackTrace4(j); }
+void CheckStackTrace2(int i) { for (int j = i; j >= 0; j--) CheckStackTrace3(j); }
+void CheckStackTrace1(int i) { for (int j = i; j >= 0; j--) CheckStackTrace2(j); }
+void CheckStackTrace(int i)  { for (int j = i; j >= 0; j--) CheckStackTrace1(j); }
+
+int main(int argc, char ** argv) {
+  
+  CheckStackTrace(0);
+  
+  printf("PASS\n");
+  return 0;
+}
diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc
new file mode 100644
index 0000000..d823c13
--- /dev/null
+++ b/src/tests/tcmalloc_unittest.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Paul Menage
+//
+// TODO(menage) Turn this into a real unittest ...
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "google/malloc_interface.h"
+
+#define BUFSIZE (100 << 10)
+
+int main(int argc, char **argv) {
+
+  
+  char *buf1 = (char *)malloc(BUFSIZE);
+  memset(buf1, 0, BUFSIZE);
+  printf("Allocated buf1 via malloc() at %p\n", buf1);
+
+  char *buf2 = new char[BUFSIZE];
+  memset(buf2, 0, BUFSIZE);
+  printf("Allocated buf2 via new at %p\n", buf2);
+  
+  free(buf1);
+  delete[] buf2;
+
+  char buffer[10 << 10];
+  MallocInterface::instance()->GetStats(buffer, sizeof(buffer));
+  printf("Malloc stats:\n%s\n", buffer);
+
+  return 0;
+}