summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com>2014-12-11 17:04:52 -0200
committerRaphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com>2014-12-11 17:04:52 -0200
commit0d9b42839b2f58417004bed2d3ee53ddf1faa9ba (patch)
treeae81574111a60b8ccb9bb416e11ba35d2844ca76
parent1ecc068be96b3483c68bbd918430e7704a7bc89f (diff)
downloadgperftools-0d9b42839b2f58417004bed2d3ee53ddf1faa9ba.tar.gz
New compiler flags to set the size and alignment of tcmalloc pages
Added two new compiler flags, --with-tcmalloc-pagesize and --with-tcmalloc-alignment, in order to set the tcmalloc internal page size and alignment without the need of a compiler directive and to make the choice of the page size independent of the alignment.
-rw-r--r--INSTALL20
-rw-r--r--configure.ac37
-rw-r--r--src/common.h36
3 files changed, 65 insertions, 28 deletions
diff --git a/INSTALL b/INSTALL
index f448076..ade4641 100644
--- a/INSTALL
+++ b/INSTALL
@@ -102,19 +102,19 @@ cost of using more space (due to internal fragmentation).
Internally, tcmalloc divides its memory into "pages." The default
page size is chosen to minimize memory use by reducing fragmentation.
The cost is that keeping track of these pages can cost tcmalloc time.
-We've added a new, experimental flag to tcmalloc that enables a larger
-page size. In general, this will increase the memory needs of
-applications using tcmalloc. However, in many cases it will speed up
-the applications as well, particularly if they allocate and free a lot
-of memory. We've seen average speedups of 3-5% on Google
-applications.
+We've added a new, flag to tcmalloc that enables a larger page size.
+In general, this will increase the memory needs of applications using
+tcmalloc. However, in many cases it will speed up the applications
+as well, particularly if they allocate and free a lot of memory. We've
+seen average speedups of 3-5% on Google applications.
-This feature is still very experimental; it's not even a configure
-flag yet. To build libtcmalloc with large pages, run
+To build libtcmalloc with large pages you need to use the
+--with-tcmalloc-pagesize=ARG compiler flag, e.g.:
- ./configure <normal flags> CXXFLAGS=-DTCMALLOC_LARGE_PAGES
+ ./configure <other flags> --with-tcmalloc-pagesize=32
-(or add -DTCMALLOC_LARGE_PAGES to your existing CXXFLAGS argument).
+The ARG argument can be 8, 32 or 64 which sets the internal page size to
+8K, 32K and 64K repectively. The default is 8K.
*** SMALL TCMALLOC CACHES: TRADING SPACE FOR TIME
diff --git a/configure.ac b/configure.ac
index 0d7f551..33f8ded 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,7 +22,6 @@ AM_INIT_AUTOMAKE([dist-zip])
AC_CONFIG_HEADERS([src/config.h])
AM_MAINTAINER_MODE()
-
# Export the version information (for tc_version and friends)
TC_VERSION_MAJOR=`expr "$PACKAGE_VERSION" : '\([[0-9]]*\)'`
TC_VERSION_MINOR=`expr "$PACKAGE_VERSION" : '[[0-9]]*\.\([[0-9]]*\)'`
@@ -42,6 +41,8 @@ default_enable_heap_profiler=yes
default_enable_heap_checker=yes
default_enable_debugalloc=yes
default_enable_minimal=no
+default_tcmalloc_pagesize=8
+default_tcmalloc_alignment=16
need_nanosleep=yes # Used later, to decide if to run ACX_NANOSLEEP
case "$host" in
*-mingw*) default_enable_minimal=yes; default_enable_debugalloc=no;
@@ -95,6 +96,40 @@ AC_ARG_ENABLE([libunwind],
[enable libunwind linking])],
[],
[enable_libunwind="$default_enable_libunwind"])
+AC_ARG_WITH([tcmalloc-pagesize],
+ [AS_HELP_STRING([--with-tcmalloc-pagesize],
+ [Set the tcmalloc internal page size to 8K, 32K or 64K])],
+ [],
+ [with_tcmalloc_pagesize=$default_tcmalloc_pagesize])
+AC_ARG_WITH([tcmalloc-alignment],
+ [AS_HELP_STRING([--with-tcmalloc-alignment],
+ [Set the tcmalloc internal page alignment to 8 or 16 bytes])],
+ [],
+ [with_tcmalloc_alignment=$default_tcmalloc_alignment])
+
+case "$with_tcmalloc_pagesize" in
+ 8)
+ #Default tcmalloc page size.
+ ;;
+ 32)
+ AC_DEFINE(TCMALLOC_32K_PAGES, 1,
+ [Define 32K of internal pages size for tcmalloc]);;
+ 64)
+ AC_DEFINE(TCMALLOC_64K_PAGES, 1,
+ [Define 64K of internal pages size for tcmalloc]);;
+ *)
+ AC_MSG_WARN([${with_tcmalloc_pagesize}K size not supported, using default tcmalloc page size.])
+esac
+case "$with_tcmalloc_alignment" in
+ 8)
+ AC_DEFINE(TCMALLOC_ALIGN_8BYTES, 1,
+ [Define 8 bytes of internal pages alignment for tcmalloc]);;
+ 16)
+ #Default tcmalloc page alignment.
+ ;;
+ *)
+ AC_MSG_WARN([${with_tcmalloc_alignment} bytes not supported, using default tcmalloc page alignment.])
+esac
# Checks for programs.
AC_PROG_CXX
diff --git a/src/common.h b/src/common.h
index 18bcad8..c3484d3 100644
--- a/src/common.h
+++ b/src/common.h
@@ -62,6 +62,19 @@ typedef uintptr_t Length;
// Configuration
//-------------------------------------------------------------------
+#if defined(TCMALLOC_ALIGN_8BYTES)
+// Unless we force to use 8 bytes alignment we use an alignment of
+// at least 16 bytes to statisfy requirements for some SSE types.
+// Keep in mind when using the 16 bytes alignment you can have a space
+// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
+static const size_t kMinAlign = 8;
+// Number of classes created until reach page size 128.
+static const size_t kBaseClasses = 16;
+#else
+static const size_t kMinAlign = 16;
+static const size_t kBaseClasses = 9;
+#endif
+
// Using large pages speeds up the execution at a cost of larger memory use.
// Deallocation may speed up by a factor as the page map gets 8x smaller, so
// lookups in the page map result in fewer L2 cache misses, which translates to
@@ -70,28 +83,17 @@ typedef uintptr_t Length;
// the thread cache allowance to avoid passing more free ranges to and from
// central lists. Also, larger pages are less likely to get freed.
// These two factors cause a bounded increase in memory use.
-
-#if defined(TCMALLOC_LARGE_PAGES)
+#if defined(TCMALLOC_32K_PAGES)
static const size_t kPageShift = 15;
-static const size_t kNumClasses = 78;
-static const size_t kMinAlign = 16;
-#elif defined(TCMALLOC_LARGE_PAGES64K)
+static const size_t kNumClasses = kBaseClasses + 69;
+#elif defined(TCMALLOC_64K_PAGES)
static const size_t kPageShift = 16;
-static const size_t kNumClasses = 82;
-static const size_t kMinAlign = 16;
-#elif defined(TCMALLOC_ALIGN_8BYTES)
-static const size_t kPageShift = 13;
-static const size_t kNumClasses = 95;
-// Unless we force to use 8 bytes alignment we use an alignment of
-// at least 16 bytes to statisfy requirements for some SSE types.
-// Keep in mind when using the 16 bytes alignment you can have a space
-// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
-static const size_t kMinAlign = 8;
+static const size_t kNumClasses = kBaseClasses + 73;
#else
static const size_t kPageShift = 13;
-static const size_t kNumClasses = 88;
-static const size_t kMinAlign = 16;
+static const size_t kNumClasses = kBaseClasses + 79;
#endif
+
static const size_t kMaxThreadCacheSize = 4 << 20;
static const size_t kPageSize = 1 << kPageShift;