summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-04-12 21:18:48 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2010-04-12 21:18:48 +0000
commit21c65ecb037d7d41dcd574c70cd7b7e9d5045462 (patch)
treea2678ebcd7088a366ad8fb8c8def66746d28bcb9
parent92beff88437b31f4a618640b88487e0f8dfb7017 (diff)
downloadgperftools-21c65ecb037d7d41dcd574c70cd7b7e9d5045462.tar.gz
* Speed up IsSymbolizedProfile by checking for NUL (csilvers)
* Fix heap sampling to use correct alloc size (bmaurer) * Make pprof ignore tc_new{,array}_nothrow (csilvers) * PORTING: Have IsHeapProfilerRunning return an int, for C (csilvers) * Avoid false-sharing of memory between caches (bmaurer) * Fix some variable shadowing (rt) * SVG-based ouptut in pprof; also, wget->curl (rsc) * Allow arbitrary prefix before obvious handler (rsc) * Advertise when using libunwind (ppluzhnikov) Also, the install file seems to have reverted back to the default at some previous point in time (autotools will do that occasionally). Change that back to have the perftools-specific text in there. git-svn-id: http://gperftools.googlecode.com/svn/trunk@92 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
-rw-r--r--INSTALL310
-rw-r--r--src/base/basictypes.h8
-rw-r--r--src/google/heap-profiler.h5
-rw-r--r--src/heap-checker.cc4
-rw-r--r--src/heap-profiler.cc4
-rw-r--r--src/page_heap_allocator.h15
-rwxr-xr-xsrc/pprof613
-rw-r--r--src/stacktrace_config.h1
-rw-r--r--src/system-alloc.cc2
-rw-r--r--src/system-alloc.h6
-rw-r--r--src/tcmalloc.cc107
-rw-r--r--src/thread_cache.h34
12 files changed, 888 insertions, 221 deletions
diff --git a/INSTALL b/INSTALL
index 23e5f25..48a4fa6 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,16 +1,253 @@
-Installation Instructions
-*************************
+Copyright 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+Foundation, Inc.
-Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free
-Software Foundation, Inc.
-
-This file is free documentation; the Free Software Foundation gives
+ This file is free documentation; the Free Software Foundation gives
unlimited permission to copy, distribute and modify it.
+
+Perftools-Specific Install Notes
+================================
+
+*** NOTE FOR 64-BIT LINUX SYSTEMS
+
+The glibc built-in stack-unwinder on 64-bit systems has some problems
+with the perftools libraries. (In particular, the cpu/heap profiler
+may be in the middle of malloc, holding some malloc-related locks when
+they invoke the stack unwinder. The built-in stack unwinder may call
+malloc recursively, which may require the thread to acquire a lock it
+already holds: deadlock.)
+
+For that reason, if you use a 64-bit system, we strongly recommend you
+install libunwind before trying to configure or install google
+perftools. libunwind can be found at
+
+ http://download.savannah.gnu.org/releases/libunwind/libunwind-0.99-beta.tar.gz
+
+Even if you already have libunwind installed, you should check the
+version. Versions older than this will not work properly; too-new
+versions introduce new code that does not work well with perftools
+(because libunwind can call malloc, which will lead to deadlock).
+
+CAUTION: if you install libunwind from the url above, be aware that
+you may have trouble if you try to statically link your binary with
+perftools: that is, if you link with 'gcc -static -lgcc_eh ...'. This
+is because both libunwind and libgcc implement the same C++ exception
+handling APIs, but they implement them differently on some platforms.
+This is not likely to be a problem on ia64, but may be on x86-64.
+
+Also, if you link binaries statically, make sure that you add
+-Wl,--eh-frame-hdr to your linker options. This is required so that
+libunwind can find the information generated by the compiler required
+for stack unwinding.
+
+Using -static is rare, though, so unless you know this will affect you
+it probably won't.
+
+If you cannot or do not wish to install libunwind, you can still try
+to use the built-in stack unwinder. The built-in stack unwinder
+requires that your application, the tcmalloc library, and system
+libraries like libc, all be compiled with a frame pointer. This is
+*not* the default for x86-64.
+
+If you are on x86-64 system, know that you have a set of system
+libraries with frame-pointers enabled, and compile all your
+applications with -fno-omit-frame-pointer, then you can enable the
+built-in perftools stack unwinder by passing the
+--enable-frame-pointers flag to configure.
+
+Even with the use of libunwind, there are still known problems with
+stack unwinding on 64-bit systems, particularly x86-64. See the
+"64-BIT ISSUES" section in README.
+
+If you encounter problems, try compiling perftools with './configure
+--enable-frame-pointers'. Note you will need to compile your
+application with frame pointers (via 'gcc -fno-omit-frame-pointer
+...') in this case.
+
+
+*** NOTE FOR ___tls_get_addr ERROR
+
+When compiling perftools on some old systems, like RedHat 8, you may
+get an error like this:
+ ___tls_get_addr: symbol not found
+
+This means that you have a system where some parts are updated enough
+to support Thread Local Storage, but others are not. The perftools
+configure script can't always detect this kind of case, leading to
+that error. To fix it, just comment out the line
+ #define HAVE_TLS 1
+in your config.h file before building.
+
+
+*** TCMALLOC AND DLOPEN
+
+To improve performance, we use the "initial exec" model of Thread
+Local Storage in tcmalloc. The price for this is the library will not
+work correctly if it is loaded via dlopen(). This should not be a
+problem, since loading a malloc-replacement library via dlopen is
+asking for trouble in any case: some data will be allocated with one
+malloc, some with another. If, for some reason, you *do* need to use
+dlopen on tcmalloc, the easiest way is to use a version of tcmalloc
+with TLS turned off; see the ___tls_get_addr note above.
+
+
+*** COMPILING ON NON-LINUX SYSTEMS
+
+Perftools has been tested on the following systems:
+ FreeBSD 6.0 (x86)
+ Linux Fedora Core 3 (x86)
+ Linux Fedora Core 4 (x86)
+ Linux Fedora Core 5 (x86)
+ Linux Fedora Core 6 (x86)
+ Linux Ubuntu 6.06.1 (x86)
+ Linux Ubuntu 6.06.1 (x86_64)
+ Linux RedHat 9 (x86)
+ Linux Debian 4.0 (PPC)
+ Mac OS X 10.3.9 (Panther) (PowerPC)
+ Mac OS X 10.4.8 (Tiger) (PowerPC)
+ Mac OS X 10.4.8 (Tiger) (x86)
+ Mac OS X 10.5 (Leopard) (x86)
+ Solaris 10 (x86)
+ Windows XP, Visual Studio 2003 (VC++ 7) (x86)
+ Windows XP, Visual Studio 2005 (VC++ 8) (x86)
+ Windows XP, MinGW 5.1.3 (x86)
+ Windows XP, Cygwin 5.1 (x86)
+
+It works in its full generality on the Linux systems
+tested (though see 64-bit notes above). Portions of perftools work on
+the other systems. The basic memory-allocation library,
+tcmalloc_minimal, works on all systems. The cpu-profiler also works
+fairly widely. However, the heap-profiler and heap-checker are not
+yet as widely supported. In general, the 'configure' script will
+detect what OS you are building for, and only build the components
+that work on that OS.
+
+Note that tcmalloc_minimal is perfectly usable as a malloc/new
+replacement, so it is possible to use tcmalloc on all the systems
+above, by linking in libtcmalloc_minimal.
+
+** FreeBSD:
+
+ The following binaries build and run successfully (creating
+ libtcmalloc_minimal.so and libprofile.so in the process):
+ % ./configure
+ % make tcmalloc_minimal_unittest tcmalloc_minimal_large_unittest \
+ addressmap_unittest atomicops_unittest frag_unittest \
+ low_level_alloc_unittest markidle_unittest memalign_unittest \
+ packed_cache_test stacktrace_unittest system_alloc_unittest \
+ thread_dealloc_unittest profiler_unittest.sh
+ % ./tcmalloc_minimal_unittest # to run this test
+ % [etc] # to run other tests
+
+ Three caveats: first, frag_unittest tries to allocate 400M of memory,
+ and if you have less virtual memory on your system, the test may
+ fail with a bad_alloc exception.
+
+ Second, profiler_unittest.sh sometimes fails in the "fork" test.
+ This is because stray SIGPROF signals from the parent process are
+ making their way into the child process. (This may be a kernel
+ bug that only exists in older kernels.) The profiling code itself
+ is working fine. This only affects programs that call fork(); for
+ most programs, the cpu profiler is entirely safe to use.
+
+ Third, perftools depends on /proc to get shared library
+ information. If you are running a FreeBSD system without proc,
+ perftools will not be able to map addresses to functions. Some
+ unittests will fail as a result.
+
+ Finally, the new test introduced in perftools-1.2,
+ profile_handler_unittest, fails on FreeBSD. It has something to do
+ with how the itimer works. The cpu profiler test passes, so I
+ believe the functionality is correct and the issue is with the test
+ somehow. If anybody is an expert on itimers and SIGPROF in
+ FreeBSD, and would like to debug this, I'd be glad to hear the
+ results!
+
+ libtcmalloc.so successfully builds, and the "advanced" tcmalloc
+ functionality all works except for the leak-checker, which has
+ Linux-specific code:
+ % make heap-profiler_unittest.sh maybe_threads_unittest.sh \
+ tcmalloc_unittest tcmalloc_both_unittest \
+ tcmalloc_large_unittest # THESE WORK
+ % make -k heap-checker_unittest.sh \
+ heap-checker-death_unittest.sh # THESE DO NOT
+
+ Note that unless you specify --enable-heap-checker explicitly,
+ 'make' will not build the heap-checker unittests on a FreeBSD
+ system.
+
+ I have not tested other *BSD systems, but they are probably similar.
+
+** Mac OS X:
+
+ I've tested OS X 10.5 [Leopard], OS X 10.4 [Tiger] and OS X 10.3
+ [Panther] on both intel (x86) and PowerPC systems. For Panther
+ systems, perftools does not work at all: it depends on a header
+ file, OSAtomic.h, which is new in 10.4. (It's possible to get the
+ code working for Panther/i386 without too much work; if you're
+ interested in exploring this, drop an e-mail.)
+
+ For the other seven systems, the binaries and libraries that
+ successfully build are exactly the same as for FreeBSD. See that
+ section for a list of binaries and instructions on building them.
+
+** Solaris 10 x86:
+
+ I've only tested using the GNU C++ compiler, not the Sun C++
+ compiler. Using g++ requires setting the PATH appropriately when
+ configuring.
+
+ % PATH=${PATH}:/usr/sfw/bin/:/usr/ccs/bin ./configure
+ % PATH=${PATH}:/usr/sfw/bin/:/usr/ccs/bin make [...]
+
+ Again, the binaries and libraries that successfully build are
+ exactly the same as for FreeBSD. (However, while libprofiler.so can
+ be used to generate profiles, pprof is not very successful at
+ reading them -- necessary helper programs like nm don't seem
+ to be installed by default on Solaris, or perhaps are only
+ installed as part of the Sun C++ compiler package.) See that
+ section for a list of binaries, and instructions on building them.
+
+** Windows:
+
+ Work on Windows is rather preliminary: we haven't found a good way
+ to get stack traces in release mode on windows (that is, when FPO
+ is enabled), so the heap profiling may not be reliable in that
+ case. Also, heap-checking and CPU profiling do not yet work at
+ all. But as in other ports, the basic tcmalloc library
+ functionality, overriding malloc and new and such (and even
+ windows-specific functions like _aligned_malloc!), is working fine,
+ at least with VC++ 7.1 (Visual Studio 2003) and VC++ 8.0
+ (Visual Studio 2005), in both debug and release modes. See
+ README.windows for instructions on how to install on Windows using
+ Visual Studio.
+
+ Cygwin can compile some but not all of perftools. Furthermore,
+ there is a problem with exception-unwinding in cygwin (it can call
+ malloc, which can call the exception-unwinding-setup code, which
+ can lead to an infinite loop). I've comitted a workaround to the
+ exception unwinding problem, but it only works in debug mode and
+ when statically linking in tcmalloc. I hope to have a more proper
+ fix in a later release. To configure under cygwin, run
+
+ ./configure --disable-shared CXXFLAGS=-g && make
+
+ Most of cygwin will compile (cygwin doesn't allow weak symbols, so
+ the heap-checker and a few other pieces of functionality will not
+ compile). 'make' will compile those libraries and tests that can
+ be compiled. You can run 'make check' to make sure the basic
+ functionality is working.
+
+ This Windows functionality is also available using MinGW and Msys,
+ In this case, you can use the regular './configure && make'
+ process. 'make install' should also work. The Makefile will limit
+ itself to those libraries and binaries that work on windows.
+
+
Basic Installation
==================
-These are generic installation instructions.
+ These are generic installation instructions.
The `configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
@@ -70,9 +307,9 @@ The simplest way to compile this package is:
Compilers and Options
=====================
-Some systems require unusual options for compilation or linking that the
-`configure' script does not know about. Run `./configure --help' for
-details on some of the pertinent environment variables.
+ Some systems require unusual options for compilation or linking that
+the `configure' script does not know about. Run `./configure --help'
+for details on some of the pertinent environment variables.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
@@ -85,7 +322,7 @@ is an example:
Compiling For Multiple Architectures
====================================
-You can compile the package for more than one kind of computer at the
+ You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you must use a version of `make' that
supports the `VPATH' variable, such as GNU `make'. `cd' to the
@@ -102,19 +339,19 @@ for another architecture.
Installation Names
==================
-By default, `make install' installs the package's commands under
-`/usr/local/bin', include files under `/usr/local/include', etc. You
-can specify an installation prefix other than `/usr/local' by giving
-`configure' the option `--prefix=PREFIX'.
+ By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc. You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
-pass the option `--exec-prefix=PREFIX' to `configure', the package uses
-PREFIX as the prefix for installing programs and libraries.
-Documentation and other data files still use the regular prefix.
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
In addition, if you use an unusual directory layout you can give
-options like `--bindir=DIR' to specify different values for particular
+options like `--bindir=PATH' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them.
@@ -125,7 +362,7 @@ option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
Optional Features
=================
-Some packages pay attention to `--enable-FEATURE' options to
+ Some packages pay attention to `--enable-FEATURE' options to
`configure', where FEATURE indicates an optional part of the package.
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
is something like `gnu-as' or `x' (for the X Window System). The
@@ -140,11 +377,11 @@ you can use the `configure' options `--x-includes=DIR' and
Specifying the System Type
==========================
-There may be some features `configure' cannot figure out automatically,
-but needs to determine by the type of machine the package will run on.
-Usually, assuming the package is built to be run on the _same_
-architectures, `configure' can figure that out, but if it prints a
-message saying it cannot guess the machine type, give it the
+ There may be some features `configure' cannot figure out
+automatically, but needs to determine by the type of machine the package
+will run on. Usually, assuming the package is built to be run on the
+_same_ architectures, `configure' can figure that out, but if it prints
+a message saying it cannot guess the machine type, give it the
`--build=TYPE' option. TYPE can either be a short name for the system
type, such as `sun4', or a canonical name which has the form:
@@ -159,7 +396,7 @@ where SYSTEM can have one of these forms:
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
-use the option `--target=TYPE' to select the type of system they will
+use the `--target=TYPE' option to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
@@ -170,9 +407,9 @@ eventually be run) with `--host=TYPE'.
Sharing Defaults
================
-If you want to set default values for `configure' scripts to share, you
-can create a site shell script called `config.site' that gives default
-values for variables like `CC', `cache_file', and `prefix'.
+ If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
`configure' looks for `PREFIX/share/config.site' if it exists, then
`PREFIX/etc/config.site' if it exists. Or, you can set the
`CONFIG_SITE' environment variable to the location of the site script.
@@ -181,7 +418,7 @@ A warning: not all `configure' scripts look for a site script.
Defining Variables
==================
-Variables not defined in a site shell script can be set in the
+ Variables not defined in a site shell script can be set in the
environment passed to `configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
@@ -189,18 +426,14 @@ them in the `configure' command line, using `VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
-causes the specified `gcc' to be used as the C compiler (unless it is
-overridden in the site shell script). Here is a another example:
-
- /bin/bash ./configure CONFIG_SHELL=/bin/bash
-
-Here the `CONFIG_SHELL=/bin/bash' operand causes subsequent
-configuration-related scripts to be executed by `/bin/bash'.
+will cause the specified gcc to be used as the C compiler (unless it is
+overridden in the site shell script).
`configure' Invocation
======================
-`configure' recognizes the following options to control how it operates.
+ `configure' recognizes the following options to control how it
+operates.
`--help'
`-h'
@@ -233,4 +466,3 @@ configuration-related scripts to be executed by `/bin/bash'.
`configure' also accepts some other, not widely useful, options. Run
`configure --help' for more details.
-
diff --git a/src/base/basictypes.h b/src/base/basictypes.h
index 9991413..ab9cdab 100644
--- a/src/base/basictypes.h
+++ b/src/base/basictypes.h
@@ -308,6 +308,14 @@ class AssignAttributeStartEnd {
#endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__
+#if defined(HAVE___ATTRIBUTE__) && (defined(__i386__) || defined(__x86_64__))
+# define CACHELINE_SIZE 64
+# define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
+#else
+# define CACHELINE_ALIGNED
+#endif // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__)
+
+
// The following enum should be used only as a constructor argument to indicate
// that the variable has static storage class, and that the constructor should
// do nothing to its state. It indicates to the reader that it is legal to
diff --git a/src/google/heap-profiler.h b/src/google/heap-profiler.h
index 5efaf64..57cb97a 100644
--- a/src/google/heap-profiler.h
+++ b/src/google/heap-profiler.h
@@ -71,12 +71,13 @@ extern "C" {
*/
PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix);
-/* Returns true if we are currently profiling the heap. This is true
+/* Returns non-zero if we are currently profiling the heap. (Returns
+ * an int rather than a bool so it's usable from C.) This is true
* between calls to HeapProfilerStart() and HeapProfilerStop(), and
* also if the program has been run with HEAPPROFILER, or some other
* way to turn on whole-program profiling.
*/
-bool IsHeapProfilerRunning();
+int IsHeapProfilerRunning();
/* Stop heap profiling. Can be restarted again with HeapProfilerStart(),
* but the currently accumulated profiling information will be cleared.
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index b539ed8..84e6cf3 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -1623,7 +1623,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
{
// Heap activity in other threads is paused during this function
// (i.e. until we got all profile difference info).
- SpinLockHolder l(&heap_checker_lock);
+ SpinLockHolder hl(&heap_checker_lock);
if (heap_checker_on == false) {
if (name_ != NULL) { // leak checking enabled when created the checker
RAW_LOG(WARNING, "Heap leak checker got turned off after checker "
@@ -1759,7 +1759,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
SuggestPprofCommand(pprof_file);
{
- SpinLockHolder l(&heap_checker_lock);
+ SpinLockHolder hl(&heap_checker_lock);
heap_profile->ReleaseSnapshot(leaks);
Allocator::Free(pprof_file);
}
diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc
index a1c643a..3055f4c 100644
--- a/src/heap-profiler.cc
+++ b/src/heap-profiler.cc
@@ -524,9 +524,9 @@ extern "C" void HeapProfilerStart(const char* prefix) {
filename_prefix[prefix_length] = '\0';
}
-extern "C" bool IsHeapProfilerRunning() {
+extern "C" int IsHeapProfilerRunning() {
SpinLockHolder l(&heap_lock);
- return is_on;
+ return is_on ? 1 : 0; // return an int, because C code doesn't have bool
}
extern "C" void HeapProfilerStop() {
diff --git a/src/page_heap_allocator.h b/src/page_heap_allocator.h
index 20e1ab1..3f75939 100644
--- a/src/page_heap_allocator.h
+++ b/src/page_heap_allocator.h
@@ -44,7 +44,7 @@ class PageHeapAllocator {
// allocated and their constructors might not have run by the time some
// other static variable tries to allocate memory.
void Init() {
- ASSERT(kAlignedSize <= kAllocIncrement);
+ ASSERT(sizeof(T) <= kAllocIncrement);
inuse_ = 0;
free_area_ = NULL;
free_avail_ = 0;
@@ -60,8 +60,9 @@ class PageHeapAllocator {
result = free_list_;
free_list_ = *(reinterpret_cast<void**>(result));
} else {
- if (free_avail_ < kAlignedSize) {
- // Need more room
+ if (free_avail_ < sizeof(T)) {
+ // Need more room. We assume that MetaDataAlloc returns
+ // suitably aligned memory.
free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
if (free_area_ == NULL) {
CRASH("FATAL ERROR: Out of memory trying to allocate internal "
@@ -71,8 +72,8 @@ class PageHeapAllocator {
free_avail_ = kAllocIncrement;
}
result = free_area_;
- free_area_ += kAlignedSize;
- free_avail_ -= kAlignedSize;
+ free_area_ += sizeof(T);
+ free_avail_ -= sizeof(T);
}
inuse_++;
return reinterpret_cast<T*>(result);
@@ -90,10 +91,6 @@ class PageHeapAllocator {
// How much to allocate from system at a time
static const int kAllocIncrement = 128 << 10;
- // Aligned size of T
- static const size_t kAlignedSize
- = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
-
// Free area from which to carve new objects
char* free_area_;
size_t free_avail_;
diff --git a/src/pprof b/src/pprof
index f8b20a4..d70ee30 100755
--- a/src/pprof
+++ b/src/pprof
@@ -92,9 +92,7 @@ my $GV = "gv";
my $KCACHEGRIND = "kcachegrind";
my $PS2PDF = "ps2pdf";
# These are used for dynamic profiles
-my $WGET = "wget";
-my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets
-my $CURL = "curl";
+my $URL_FETCHER = "curl -s";
# These are the web pages that servers need to support for dynamic profiles
my $HEAP_PAGE = "/pprof/heap";
@@ -176,12 +174,14 @@ Output type:
--text Generate text report
--callgrind Generate callgrind format to stdout
--gv Generate Postscript and display
+ --web Generate SVG and display
--list=<regexp> Generate source listing of matching routines
--disasm=<regexp> Generate disassembly of matching routines
--symbols Print demangled symbol names found at given addresses
--dot Generate DOT file to stdout
--ps Generate Postcript to stdout
--pdf Generate PDF to stdout
+ --svg Generate SVG to stdout
--gif Generate GIF to stdout
--raw Generate symbolized pprof data (useful with remote fetch)
@@ -224,6 +224,8 @@ pprof /bin/ls ls.prof
Enters "interactive" mode
pprof --text /bin/ls ls.prof
Outputs one line per procedure
+pprof --web /bin/ls ls.prof
+ Displays annotated call-graph in web browser
pprof --gv /bin/ls ls.prof
Displays annotated call-graph via 'gv'
pprof --gv --focus=Mutex /bin/ls ls.prof
@@ -234,6 +236,9 @@ pprof --list=getdir /bin/ls ls.prof
(Per-line) annotated source listing for getdir()
pprof --disasm=getdir /bin/ls ls.prof
(Per-PC) annotated disassembly for getdir()
+
+pprof http://localhost:1234/
+ Enters "interactive" mode
pprof --text localhost:1234
Outputs one line per procedure for localhost:1234
pprof --raw localhost:1234 > ./local.raw
@@ -293,10 +298,12 @@ sub Init() {
$main::opt_disasm = "";
$main::opt_symbols = 0;
$main::opt_gv = 0;
+ $main::opt_web = 0;
$main::opt_dot = 0;
$main::opt_ps = 0;
$main::opt_pdf = 0;
$main::opt_gif = 0;
+ $main::opt_svg = 0;
$main::opt_raw = 0;
$main::opt_nodecount = 80;
@@ -331,6 +338,9 @@ sub Init() {
# Are we using $SYMBOL_PAGE?
$main::use_symbol_page = 0;
+ # Files returned by TempName.
+ %main::tempnames = ();
+
# Type of profile we are dealing with
# Supported types:
# cpu
@@ -356,9 +366,11 @@ sub Init() {
"disasm=s" => \$main::opt_disasm,
"symbols!" => \$main::opt_symbols,
"gv!" => \$main::opt_gv,
+ "web!" => \$main::opt_web,
"dot!" => \$main::opt_dot,
"ps!" => \$main::opt_ps,
"pdf!" => \$main::opt_pdf,
+ "svg!" => \$main::opt_svg,
"gif!" => \$main::opt_gif,
"raw!" => \$main::opt_raw,
"interactive!" => \$main::opt_interactive,
@@ -434,9 +446,11 @@ sub Init() {
($main::opt_disasm eq '' ? 0 : 1) +
($main::opt_symbols == 0 ? 0 : 1) +
$main::opt_gv +
+ $main::opt_web +
$main::opt_dot +
$main::opt_ps +
$main::opt_pdf +
+ $main::opt_svg +
$main::opt_gif +
$main::opt_raw +
$main::opt_interactive +
@@ -511,20 +525,6 @@ sub Init() {
ConfigureObjTools($main::prog)
}
- # Check what flags our commandline utilities support
- if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) {
- my @lines = <TFILE>;
- if (grep(/unrecognized/, @lines) > 0) {
- # grep found 'unrecognized' token from WGET, clear WGET flags
- $WGET_FLAGS = "";
- }
- close(TFILE);
- }
- # TODO(csilvers): check all the other binaries and objtools to see
- # if they are installed and what flags they support, and store that
- # in a data structure here, rather than scattering these tests about.
- # Then, ideally, rewrite code to use wget OR curl OR GET or ...
-
# Break the opt_list_prefix into the prefix_list array
@prefix_list = split (',', $main::opt_lib_prefix);
@@ -635,9 +635,24 @@ sub Main() {
} else {
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
if ($main::opt_gv) {
- RunGV(PsTempName($main::next_tmpfile), "");
+ RunGV(TempName($main::next_tmpfile, "ps"), "");
+ } elsif ($main::opt_web) {
+ my $tmp = TempName($main::next_tmpfile, "svg");
+ RunWeb($tmp);
+ # The command we run might hand the file name off
+ # to an already running browser instance and then exit.
+ # Normally, we'd remove $tmp on exit (right now),
+ # but fork a child to remove $tmp a little later, so that the
+ # browser has time to load it first.
+ delete $main::tempnames{$tmp};
+ if (fork() == 0) {
+ sleep 5;
+ unlink($tmp);
+ exit(0);
+ }
}
} else {
+ cleanup();
exit(1);
}
}
@@ -683,6 +698,36 @@ sub RunGV {
}
}
+sub RunWeb {
+ my $fname = shift;
+ print STDERR "Loading web page file:///$fname\n";
+
+ if (`uname` =~ /Darwin/) {
+ # OS X: open will use standard preference for SVG files.
+ system("/usr/bin/open", $fname);
+ return;
+ }
+
+ # Some kind of Unix; try generic symlinks, then specific browsers.
+ # (Stop once we find one.)
+ # Works best if the browser is already running.
+ my @alt = (
+ "/etc/alternatives/gnome-www-browser",
+ "/etc/alternatives/x-www-browser",
+ "google-chrome",
+ "firefox",
+ );
+ foreach my $b (@alt) {
+ if (-f $b) {
+ if (system($b, $fname) == 0) {
+ return;
+ }
+ }
+ }
+
+ print STDERR "Could not load web browser.\n";
+}
+
sub RunKcachegrind {
my $fname = shift;
my $bg = shift; # "" or " &" if we should run in background
@@ -739,10 +784,10 @@ sub InteractiveCommand {
print STDERR "\n";
return 0;
}
- if (m/^ *quit/) {
+ if (m/^\s*quit/) {
return 0;
}
- if (m/^ *help/) {
+ if (m/^\s*help/) {
InteractiveHelpMessage();
return 1;
}
@@ -754,7 +799,7 @@ sub InteractiveCommand {
$main::opt_gv = 0;
$main::opt_cum = 0;
- if (m/^ *(text|top)(\d*) *(.*)/) {
+ if (m/^\s*(text|top)(\d*)\s*(.*)/) {
$main::opt_text = 1;
my $line_limit = ($2 ne "") ? int($2) : 10;
@@ -773,14 +818,14 @@ sub InteractiveCommand {
PrintText($symbols, $flat, $cumulative, $total, $line_limit);
return 1;
}
- if (m/^ *callgrind *([^ \n]*)/) {
+ if (m/^\s*callgrind\s*([^ \n]*)/) {
$main::opt_callgrind = 1;
# Get derived profiles
my $calls = ExtractCalls($symbols, $orig_profile);
my $filename = $1;
if ( $1 eq '' ) {
- $filename = CallgrindTempName($main::next_tmpfile);
+ $filename = TempName($main::next_tmpfile, "callgrind");
}
PrintCallgrind($calls, $filename);
if ( $1 eq '' ) {
@@ -790,7 +835,7 @@ sub InteractiveCommand {
return 1;
}
- if (m/^ *list *(.+)/) {
+ if (m/^\s*list\s*(.+)/) {
$main::opt_list = 1;
my $routine;
@@ -807,7 +852,7 @@ sub InteractiveCommand {
PrintListing($libs, $flat, $cumulative, $routine);
return 1;
}
- if (m/^ *disasm *(.+)/) {
+ if (m/^\s*disasm\s*(.+)/) {
$main::opt_disasm = 1;
my $routine;
@@ -825,12 +870,18 @@ sub InteractiveCommand {
PrintDisassembly($libs, $flat, $cumulative, $routine, $total);
return 1;
}
- if (m/^ *gv *(.*)/) {
- $main::opt_gv = 1;
+ if (m/^\s*(gv|web)\s*(.*)/) {
+ $main::opt_gv = 0;
+ $main::opt_web = 0;
+ if ($1 eq "gv") {
+ $main::opt_gv = 1;
+ } elsif ($1 eq "web") {
+ $main::opt_web = 1;
+ }
my $focus;
my $ignore;
- ($focus, $ignore) = ParseInteractiveArgs($1);
+ ($focus, $ignore) = ParseInteractiveArgs($2);
# Process current profile to account for various settings
my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
@@ -841,11 +892,19 @@ sub InteractiveCommand {
my $cumulative = CumulativeProfile($reduced);
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
- RunGV(PsTempName($main::next_tmpfile), " &");
+ if ($main::opt_gv) {
+ RunGV(TempName($main::next_tmpfile, "ps"), " &");
+ } elsif ($main::opt_web) {
+ RunWeb(TempName($main::next_tmpfile, "svg"));
+ }
$main::next_tmpfile++;
}
return 1;
}
+ if (m/^\s*$/) {
+ return 1;
+ }
+ print STDERR "Unknown command: try 'help'.\n";
return 1;
}
@@ -894,6 +953,14 @@ Commands:
the "focus" regular expression matches a routine name on the stack
trace.
+ web
+ web [focus] [-ignore1] [-ignore2]
+ Like GV, but displays profile in your web browser instead of using
+ Ghostview. Works best if your web browser is already running.
+ To change the browser that gets used:
+ On Linux, set the /etc/alternatives/gnome-www-browser symlink.
+ On OS X, change the Finder association for SVG files.
+
list [routine_regexp] [-ignore1] [-ignore2]
Show source listing of routines whose names match "routine_regexp"
@@ -950,14 +1017,12 @@ sub ParseInteractiveArgs {
##### Output code #####
-sub PsTempName {
+sub TempName {
my $fnum = shift;
- return "$main::tmpfile_ps" . "." . "$fnum" . ".ps";
-}
-
-sub CallgrindTempName {
- my $fnum = shift;
- return "$main::tmpfile_ps" . "." . "$fnum" . ".callgrind";
+ my $ext = shift;
+ my $file = "$main::tmpfile_ps.$fnum.$ext";
+ $main::tempnames{$file} = 1;
+ return $file;
}
# Print profile data in packed binary format (64-bit) to standard out
@@ -1599,7 +1664,6 @@ sub PrintDot {
}
if ($last < 0) {
print STDERR "No nodes to print\n";
- cleanup();
return 0;
}
@@ -1612,11 +1676,14 @@ sub PrintDot {
# Open DOT output file
my $output;
if ($main::opt_gv) {
- $output = "| $DOT -Tps2 >" . PsTempName($main::next_tmpfile);
+ $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
} elsif ($main::opt_ps) {
$output = "| $DOT -Tps2";
} elsif ($main::opt_pdf) {
$output = "| $DOT -Tps2 | $PS2PDF - -";
+ } elsif ($main::opt_web || $main::opt_svg) {
+ # We need to post-process the SVG, so write to a temporary file always.
+ $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg");
} elsif ($main::opt_gif) {
$output = "| $DOT -Tgif";
} else {
@@ -1727,7 +1794,10 @@ sub PrintDot {
my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
if ($fraction > 1) { $fraction = 1; }
my $w = $fraction * 2;
- #if ($w < 1) { $w = 1; }
+ if ($w < 1 && ($main::opt_web || $main::opt_svg)) {
+ # SVG output treats line widths < 1 poorly.
+ $w = 1;
+ }
# Dot sometimes segfaults if given edge weights that are too large, so
# we cap the weights at a large value
@@ -1751,11 +1821,312 @@ sub PrintDot {
}
print DOT ("}\n");
-
close(DOT);
+
+ if ($main::opt_web || $main::opt_svg) {
+ # Rewrite SVG to be more usable inside web browser.
+ RewriteSvg(TempName($main::next_tmpfile, "svg"));
+ }
+
return 1;
}
+sub RewriteSvg {
+ my $svgfile = shift;
+
+ open(SVG, $svgfile) || die "open temp svg: $!";
+ my @svg = <SVG>;
+ close(SVG);
+ unlink $svgfile;
+ my $svg = join('', @svg);
+
+ # Dot's SVG output is
+ #
+ # <svg width="___" height="___"
+ # viewBox="___" xmlns=...>
+ # <g id="graph0" transform="...">
+ # ...
+ # </g>
+ # </svg>
+ #
+ # Change it to
+ #
+ # <svg width="100%" height="100%"
+ # xmlns=...>
+ # $svg_javascript
+ # <g id="viewport" transform="translate(0,0)">
+ # <g id="graph0" transform="...">
+ # ...
+ # </g>
+ # </g>
+ # </svg>
+
+ # Fix width, height; drop viewBox.
+ $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/;
+
+ # Insert script, viewport <g> above first <g>
+ my $svg_javascript = SvgJavascript();
+ my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n";
+ $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/;
+
+ # Insert final </g> above </svg>.
+ $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/;
+ $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/;
+
+ if ($main::opt_svg) {
+ # --svg: write to standard output.
+ print $svg;
+ } else {
+ # Write back to temporary file.
+ open(SVG, ">$svgfile") || die "open $svgfile: $!";
+ print SVG $svg;
+ close(SVG);
+ }
+}
+
+sub SvgJavascript {
+ return <<'EOF';
+<script type="text/ecmascript"><![CDATA[
+// SVGPan
+// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/
+// Local modification: if(true || ...) below to force panning, never moving.
+
+/**
+ * SVGPan library 1.2
+ * ====================
+ *
+ * Given an unique existing element with id "viewport", including the
+ * the library into any SVG adds the following capabilities:
+ *
+ * - Mouse panning
+ * - Mouse zooming (using the wheel)
+ * - Object dargging
+ *
+ * Known issues:
+ *
+ * - Zooming (while panning) on Safari has still some issues
+ *
+ * Releases:
+ *
+ * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui
+ * Fixed a bug with browser mouse handler interaction
+ *
+ * 1.1, Wed Feb 3 17:39:33 GMT 2010, Zeng Xiaohui
+ * Updated the zoom code to support the mouse wheel on Safari/Chrome
+ *
+ * 1.0, Andrea Leofreddi
+ * First release
+ *
+ * This code is licensed under the following BSD license:
+ *
+ * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are those of the
+ * authors and should not be interpreted as representing official policies, either expressed
+ * or implied, of Andrea Leofreddi.
+ */
+
+var root = document.documentElement;
+
+var state = 'none', stateTarget, stateOrigin, stateTf;
+
+setupHandlers(root);
+
+/**
+ * Register handlers
+ */
+function setupHandlers(root){
+ setAttributes(root, {
+ "onmouseup" : "add(evt)",
+ "onmousedown" : "handleMouseDown(evt)",
+ "onmousemove" : "handleMouseMove(evt)",
+ "onmouseup" : "handleMouseUp(evt)",
+ //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element
+ });
+
+ if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0)
+ window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari
+ else
+ window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others
+
+ var g = svgDoc.getElementById("svg");
+ g.width = "100%";
+ g.height = "100%";
+}
+
+/**
+ * Instance an SVGPoint object with given event coordinates.
+ */
+function getEventPoint(evt) {
+ var p = root.createSVGPoint();
+
+ p.x = evt.clientX;
+ p.y = evt.clientY;
+
+ return p;
+}
+
+/**
+ * Sets the current transform matrix of an element.
+ */
+function setCTM(element, matrix) {
+ var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")";
+
+ element.setAttribute("transform", s);
+}
+
+/**
+ * Dumps a matrix to a string (useful for debug).
+ */
+function dumpMatrix(matrix) {
+ var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n 0, 0, 1 ]";
+
+ return s;
+}
+
+/**
+ * Sets attributes of an element.
+ */
+function setAttributes(element, attributes){
+ for (i in attributes)
+ element.setAttributeNS(null, i, attributes[i]);
+}
+
+/**
+ * Handle mouse move event.
+ */
+function handleMouseWheel(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var delta;
+
+ if(evt.wheelDelta)
+ delta = evt.wheelDelta / 3600; // Chrome/Safari
+ else
+ delta = evt.detail / -90; // Mozilla
+
+ var z = 1 + delta; // Zoom factor: 0.9/1.1
+
+ var g = svgDoc.getElementById("viewport");
+
+ var p = getEventPoint(evt);
+
+ p = p.matrixTransform(g.getCTM().inverse());
+
+ // Compute new scale matrix in current mouse position
+ var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y);
+
+ setCTM(g, g.getCTM().multiply(k));
+
+ stateTf = stateTf.multiply(k.inverse());
+}
+
+/**
+ * Handle mouse move event.
+ */
+function handleMouseMove(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var g = svgDoc.getElementById("viewport");
+
+ if(state == 'pan') {
+ // Pan mode
+ var p = getEventPoint(evt).matrixTransform(stateTf);
+
+ setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y));
+ } else if(state == 'move') {
+ // Move mode
+ var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse());
+
+ setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM()));
+
+ stateOrigin = p;
+ }
+}
+
+/**
+ * Handle click event.
+ */
+function handleMouseDown(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var g = svgDoc.getElementById("viewport");
+
+ if(true || evt.target.tagName == "svg") {
+ // Pan mode
+ state = 'pan';
+
+ stateTf = g.getCTM().inverse();
+
+ stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
+ } else {
+ // Move mode
+ state = 'move';
+
+ stateTarget = evt.target;
+
+ stateTf = g.getCTM().inverse();
+
+ stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
+ }
+}
+
+/**
+ * Handle mouse button release event.
+ */
+function handleMouseUp(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ if(state == 'pan' || state == 'move') {
+ // Quit pan mode
+ state = '';
+ }
+}
+
+]]></script>
+EOF
+}
+
# Translate a stack of addresses into a stack of symbols
sub TranslateStack {
my $symbols = shift;
@@ -2310,28 +2681,11 @@ sub AddEntries {
AddEntry($profile, (join "\n", @k), $count);
}
-sub IsSymbolizedProfileFile {
- my $file_name = shift;
-
- if (!(-e $file_name) || !(-r $file_name)) {
- return 0;
- }
-
- $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash
- my $symbol_marker = $&;
- # Check if the file contains a symbol-section marker.
- open(TFILE, "<$file_name");
- my @lines = <TFILE>;
- my $result = grep(/^--- *$symbol_marker/, @lines);
- close(TFILE);
- return $result > 0;
-}
-
##### Code to profile a server dynamically #####
sub CheckSymbolPage {
my $url = SymbolPageURL();
- open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |");
+ open(SYMBOL, "$URL_FETCHER '$url' |");
my $line = <SYMBOL>;
$line =~ s/\r//g; # turn windows-looking lines into unix-looking lines
close(SYMBOL);
@@ -2350,33 +2704,33 @@ sub CheckSymbolPage {
sub IsProfileURL {
my $profile_name = shift;
- my ($host, $port, $path) = ParseProfileURL($profile_name);
+ my ($host, $port, $prefix, $path) = ParseProfileURL($profile_name);
return defined($host) and defined($port) and defined($path);
}
sub ParseProfileURL {
my $profile_name = shift;
if (defined($profile_name) &&
- $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|.*($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) {
- # $6 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after
+ $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|(.*?)($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) {
+ # $7 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after
# the hostname, as long as that everything is the empty string,
# a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc.
- # So "$6 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "".
- return ($2, $3, $6 || $5);
+ # So "$7 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "".
+ return ($2, $3, $6, $7 || $5);
}
return ();
}
# We fetch symbols from the first profile argument.
sub SymbolPageURL {
- my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
- return "http://$host:$port$SYMBOL_PAGE";
+ my ($host, $port, $prefix, $path) = ParseProfileURL($main::pfile_args[0]);
+ return "http://$host:$port$prefix$SYMBOL_PAGE";
}
sub FetchProgramName() {
- my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
- my $url = "http://$host:$port$PROGRAM_NAME_PAGE";
- my $command_line = "$WGET $WGET_FLAGS -qO- '$url'";
+ my ($host, $port, $prefix, $path) = ParseProfileURL($main::pfile_args[0]);
+ my $url = "http://$host:$port$prefix$PROGRAM_NAME_PAGE";
+ my $command_line = "$URL_FETCHER '$url'";
open(CMDLINE, "$command_line |") or error($command_line);
my $cmdline = <CMDLINE>;
$cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -2393,7 +2747,7 @@ sub FetchProgramName() {
# curl. Redirection happens on borg hosts.
sub ResolveRedirectionForCurl {
my $url = shift;
- my $command_line = "$CURL -s --head '$url'";
+ my $command_line = "$URL_FETCHER --head '$url'";
open(CMDLINE, "$command_line |") or error($command_line);
while (<CMDLINE>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -2405,6 +2759,20 @@ sub ResolveRedirectionForCurl {
return $url;
}
+# Add a timeout flat to URL_FETCHER
+sub AddFetchTimeout {
+ my $fetcher = shift;
+ my $timeout = shift;
+ if (defined($timeout)) {
+ if ($fetcher =~ m/\bcurl -s/) {
+ $fetcher .= sprintf(" --max-time %d", $timeout);
+ } elsif ($fetcher =~ m/\brpcget\b/) {
+ $fetcher .= sprintf(" --deadline=%d", $timeout);
+ }
+ }
+ return $fetcher;
+}
+
# Reads a symbol map from the file handle name given as $1, returning
# the resulting symbol map. Also processes variables relating to symbols.
# Currently, the only variable processed is 'binary=<value>' which updates
@@ -2460,10 +2828,14 @@ sub FetchSymbols {
close(POSTFILE);
my $url = SymbolPageURL();
- # Here we use curl for sending data via POST since old
- # wget doesn't have --post-file option.
- $url = ResolveRedirectionForCurl($url);
- my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
+
+ my $command_line;
+ if ($URL_FETCHER =~ m/\bcurl -s/) {
+ $url = ResolveRedirectionForCurl($url);
+ $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'";
+ } else {
+ $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'";
+ }
# We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
my $cppfilt = $obj_tool_map{"c++filt"};
open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
@@ -2508,7 +2880,7 @@ sub BaseName {
sub MakeProfileBaseName {
my ($binary_name, $profile_name) = @_;
- my ($host, $port, $path) = ParseProfileURL($profile_name);
+ my ($host, $port, $prefix, $path) = ParseProfileURL($profile_name);
my $binary_shortname = BaseName($binary_name);
return sprintf("%s.%s.%s-port%s",
$binary_shortname, $main::op_time, $host, $port);
@@ -2523,7 +2895,7 @@ sub FetchDynamicProfile {
if (!IsProfileURL($profile_name)) {
return $profile_name;
} else {
- my ($host, $port, $path) = ParseProfileURL($profile_name);
+ my ($host, $port, $prefix, $path) = ParseProfileURL($profile_name);
if ($path eq "" || $path eq "/") {
# Missing type specifier defaults to cpu-profile
$path = $PROFILE_PAGE;
@@ -2532,10 +2904,10 @@ sub FetchDynamicProfile {
my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
my $url;
- my $wget_timeout;
+ my $fetch_timeout = undef;
if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) {
if ($path =~ m/$PROFILE_PAGE/) {
- $url = sprintf("http://$host:$port$path?seconds=%d",
+ $url = sprintf("http://$host:$port$prefix$path?seconds=%d",
$main::opt_seconds);
} else {
if ($profile_name =~ m/[?]/) {
@@ -2546,16 +2918,14 @@ sub FetchDynamicProfile {
$url = sprintf("http://$profile_name" . "seconds=%d",
$main::opt_seconds);
}
- $wget_timeout = sprintf("--timeout=%d",
- int($main::opt_seconds * 1.01 + 60));
+ $fetch_timeout = $main::opt_seconds * 1.01 + 60;
} else {
# For non-CPU profiles, we add a type-extension to
# the target profile file name.
my $suffix = $path;
$suffix =~ s,/,.,g;
$profile_file .= "$suffix";
- $url = "http://$host:$port$path";
- $wget_timeout = "";
+ $url = "http://$host:$port$prefix$path";
}
my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof");
@@ -2570,7 +2940,8 @@ sub FetchDynamicProfile {
return $real_profile;
}
- my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'";
+ my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
+ my $cmd = "$fetcher '$url' > '$tmp_profile'";
if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){
print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n";
if ($encourage_patience) {
@@ -2624,6 +2995,7 @@ sub FetchDynamicProfilesRecurse {
} else {
$position = 1 | ($position << 1);
TryCollectProfile($maxlevel, $level, $position);
+ cleanup();
exit(0);
}
}
@@ -2764,6 +3136,44 @@ BEGIN {
}
}
+# Return the next line from the profile file, assuming it's a text
+# line (which in this case means, doesn't start with a NUL byte). If
+# it's not a text line, return "". At EOF, return undef, like perl does.
+# Input file should be in binmode.
+sub ReadProfileLine {
+ local *PROFILE = shift;
+ my $firstchar = "";
+ my $line = "";
+ read(PROFILE, $firstchar, 1);
+ seek(PROFILE, -1, 1); # unread the firstchar
+ if ($firstchar eq "\0") {
+ return "";
+ }
+ $line = <PROFILE>;
+ if (defined($line)) {
+ $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines
+ }
+ return $line;
+}
+
+sub IsSymbolizedProfileFile {
+ my $file_name = shift;
+ if (!(-e $file_name) || !(-r $file_name)) {
+ return 0;
+ }
+ # Check if the file contains a symbol-section marker.
+ open(TFILE, "<$file_name");
+ binmode TFILE;
+ my $firstline = ReadProfileLine(*TFILE);
+ close(TFILE);
+ if (!$firstline) {
+ return 0;
+ }
+ $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ my $symbol_marker = $&;
+ return $firstline =~ /^--- *$symbol_marker/;
+}
+
# Parse profile generated by common/profiler.cc and return a reference
# to a map:
# $result->{version} Version number of profile file
@@ -2798,28 +3208,17 @@ sub ReadProfile {
# whole firstline, since it may be gigabytes(!) of data.
open(PROFILE, "<$fname") || error("$fname: $!\n");
binmode PROFILE; # New perls do UTF-8 processing
- my $firstchar = "";
- my $header = "";
- read(PROFILE, $firstchar, 1);
- seek(PROFILE, -1, 1); # unread the firstchar
- if ($firstchar ne "\0") {
- $header = <PROFILE>;
- $header =~ s/\r//g; # turn windows-looking lines into unix-looking lines
+ my $header = ReadProfileLine(*PROFILE);
+ if (!defined($header)) { # means "at EOF"
+ error("Profile is empty.\n");
}
my $symbols;
if ($header =~ m/^--- *$symbol_marker/o) {
- # read the symbol section of the symbolized profile file
+ # Read the symbol section of the symbolized profile file.
$symbols = ReadSymbols(*PROFILE{IO});
-
- # read the next line to get the header for the remaining profile
- $header = "";
- read(PROFILE, $firstchar, 1);
- seek(PROFILE, -1, 1); # unread the firstchar
- if ($firstchar ne "\0") {
- $header = <PROFILE>;
- $header =~ s/\r//g;
- }
+ # Read the next line to get the header for the remaining profile.
+ $header = ReadProfileLine(*PROFILE) || "";
}
my $result;
@@ -3905,6 +4304,8 @@ sub ConfigureObjTools {
if ($file_type =~ /Mach-O/) {
# OS X uses otool to examine Mach-O files, rather than objdump.
$obj_tool_map{"otool"} = "otool";
+ $obj_tool_map{"addr2line"} = "false"; # no addr2line
+ $obj_tool_map{"objdump"} = "false"; # no objdump
}
# Go fill in %obj_tool_map with the pathnames to use:
@@ -3951,9 +4352,8 @@ sub ConfigureTool {
sub cleanup {
unlink($main::tmpfile_sym);
- for (my $i = 0; $i < $main::next_tmpfile; $i++) {
- unlink(PsTempName($i));
- }
+ unlink(keys %main::tempnames);
+
# We leave any collected profiles in $HOME/pprof in case the user wants
# to look at them later. We print a message informing them of this.
if ((scalar(@main::profile_files) > 0) &&
@@ -3996,7 +4396,7 @@ sub GetProcedureBoundariesViaNm {
my $routine = "";
while (<NM>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
- if (m/^([0-9a-f]+) (.) (..*)/) {
+ if (m/^\s*([0-9a-f]+) (.) (..*)/) {
my $start_val = $1;
my $type = $2;
my $this_routine = $3;
@@ -4058,7 +4458,6 @@ sub GetProcedureBoundariesViaNm {
$symbol_table->{$routine} = [HexExtend($last_start),
HexExtend($last_start)];
}
-
return $symbol_table;
}
@@ -4106,7 +4505,11 @@ sub GetProcedureBoundaries {
my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
" $image 2>/dev/null $cppfilt_flag",
"$nm -D -n $flatten_flag $demangle_flag" .
- " $image 2>/dev/null $cppfilt_flag");
+ " $image 2>/dev/null $cppfilt_flag",
+ # 6nm is for Go binaries
+ "6nm $image 2>/dev/null | sort",
+ );
+
# If the executable is an MS Windows PDB-format executable, we'll
# have set up obj_tool_map("nm_pdb"). In this case, we actually
# want to use both unix nm and windows-specific nm_pdb, since
diff --git a/src/stacktrace_config.h b/src/stacktrace_config.h
index b58ab1d..18f16ab 100644
--- a/src/stacktrace_config.h
+++ b/src/stacktrace_config.h
@@ -53,6 +53,7 @@
# define STACKTRACE_SKIP_CONTEXT_ROUTINES 1
# elif defined(HAVE_LIBUNWIND_H) // a proxy for having libunwind installed
# define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h"
+# define STACKTRACE_USES_LIBUNWIND 1
# elif defined(__linux)
# error Cannnot calculate stack trace: need either libunwind or frame-pointers (see INSTALL file)
# else
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
index 6d2e1c6..2505959 100644
--- a/src/system-alloc.cc
+++ b/src/system-alloc.cc
@@ -78,7 +78,7 @@ union MemoryAligner {
void* p;
double d;
size_t s;
-};
+} CACHELINE_ALIGNED;
static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
diff --git a/src/system-alloc.h b/src/system-alloc.h
index 44b0333..832cbd0 100644
--- a/src/system-alloc.h
+++ b/src/system-alloc.h
@@ -48,7 +48,11 @@
// may optionally return more bytes than asked for (i.e. return an
// entire "huge" page if a huge page allocator is in use).
//
-// The returned pointer is a multiple of "alignment" if non-zero.
+// The returned pointer is a multiple of "alignment" if non-zero. The
+// returned pointer will always be aligned suitably for holding a
+// void*, double, or size_t. In addition, if this platform defines
+// CACHELINE_ALIGNED, the return pointer will always be cacheline
+// aligned.
//
// Returns NULL when out of memory.
extern void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes,
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 48cf328..c288139 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -798,7 +798,25 @@ static TCMallocGuard module_enter_exit_hook;
// Helpers for the exported routines below
//-------------------------------------------------------------------
-static Span* DoSampledAllocation(size_t size) {
+static inline bool CheckCachedSizeClass(void *ptr) {
+ PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+ size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
+ return cached_value == 0 ||
+ cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
+}
+
+static inline void* CheckedMallocResult(void *result) {
+ ASSERT(result == NULL || CheckCachedSizeClass(result));
+ return result;
+}
+
+static inline void* SpanToMallocResult(Span *span) {
+ Static::pageheap()->CacheSizeClass(span->start, 0);
+ return
+ CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
+}
+
+static void* DoSampledAllocation(size_t size) {
// Grab the stack trace outside the heap lock
StackTrace tmp;
tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1);
@@ -823,26 +841,7 @@ static Span* DoSampledAllocation(size_t size) {
span->objects = stack;
tcmalloc::DLL_Prepend(Static::sampled_objects(), span);
- return span;
-}
-
-static inline bool CheckCachedSizeClass(void *ptr) {
- PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
- size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
- return cached_value == 0 ||
- cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
-}
-
-static inline void* CheckedMallocResult(void *result)
-{
- ASSERT(result == 0 || CheckCachedSizeClass(result));
- return result;
-}
-
-static inline void* SpanToMallocResult(Span *span) {
- Static::pageheap()->CacheSizeClass(span->start, 0);
- return
- CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
+ return SpanToMallocResult(span);
}
// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with
@@ -888,24 +887,38 @@ inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) {
return tc_new_mode ? cpp_memalign(align, size) : do_memalign(align, size);
}
+// Must be called with the page lock held.
+inline bool should_report_large(Length num_pages) {
+ const int64 threshold = large_alloc_threshold;
+ if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
+ // Increase the threshold by 1/8 every time we generate a report.
+ // We cap the threshold at 8GB to avoid overflow problems.
+ large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
+ ? threshold + threshold/8 : 8ll<<30);
+ return true;
+ }
+ return false;
+
// Helper for do_malloc().
-inline void* do_malloc_pages(Length num_pages) {
- Span *span;
- bool report_large = false;
- {
+inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
+ void* result;
+ bool report_large;
+
+ Length num_pages = tcmalloc::pages(size);
+ size = num_pages << kPageShift;
+
+ if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
+ result = DoSampledAllocation(size);
+
SpinLockHolder h(Static::pageheap_lock());
- span = Static::pageheap()->New(num_pages);
- const int64 threshold = large_alloc_threshold;
- if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
- // Increase the threshold by 1/8 every time we generate a report.
- // We cap the threshold at 8GB to avoid overflow problems.
- large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
- ? threshold + threshold/8 : 8ll<<30);
- report_large = true;
- }
+ report_large = should_report_large(num_pages);
+ } else {
+ SpinLockHolder h(Static::pageheap_lock());
+ Span* span = Static::pageheap()->New(num_pages);
+ result = (span == NULL ? NULL : SpanToMallocResult(span));
+ report_large = should_report_large(num_pages);
}
- void* result = (span == NULL ? NULL : SpanToMallocResult(span));
if (report_large) {
ReportLargeAlloc(num_pages, result);
}
@@ -917,17 +930,19 @@ inline void* do_malloc(size_t size) {
// The following call forces module initialization
ThreadCache* heap = ThreadCache::GetCache();
- if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
- Span* span = DoSampledAllocation(size);
- if (span != NULL) {
- ret = SpanToMallocResult(span);
+ if (size <= kMaxSize) {
+ size_t cl = Static::sizemap()->SizeClass(size);
+ size = Static::sizemap()->class_to_size(cl);
+
+ if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
+ ret = DoSampledAllocation(size);
+ } else {
+ // The common case, and also the simplest. This just pops the
+ // size-appropriate freelist, after replenishing it if it's empty.
+ ret = CheckedMallocResult(heap->Allocate(size, cl));
}
- } else if (size <= kMaxSize) {
- // The common case, and also the simplest. This just pops the
- // size-appropriate freelist, after replenishing it if it's empty.
- ret = CheckedMallocResult(heap->Allocate(size));
} else {
- ret = do_malloc_pages(tcmalloc::pages(size));
+ ret = do_malloc_pages(heap, size);
}
if (ret == NULL) errno = ENOMEM;
return ret;
@@ -1108,8 +1123,8 @@ void* do_memalign(size_t align, size_t size) {
}
if (cl < kNumClasses) {
ThreadCache* heap = ThreadCache::GetCache();
- return CheckedMallocResult(heap->Allocate(
- Static::sizemap()->class_to_size(cl)));
+ size = Static::sizemap()->class_to_size(cl);
+ return CheckedMallocResult(heap->Allocate(size, cl));
}
}
diff --git a/src/thread_cache.h b/src/thread_cache.h
index 4c6a233..1165447 100644
--- a/src/thread_cache.h
+++ b/src/thread_cache.h
@@ -79,7 +79,9 @@ class ThreadCache {
// Total byte size in cache
size_t Size() const { return size_; }
- void* Allocate(size_t size);
+ // Allocate an object of the given size and class. The size given
+ // must be the same as the size of the class in the size map.
+ void* Allocate(size_t size, size_t cl);
void Deallocate(void* ptr, size_t size_class);
void Scavenge();
@@ -293,15 +295,18 @@ class ThreadCache {
// across all ThreadCaches. Protected by Static::pageheap_lock.
static ssize_t unclaimed_cache_space_;
- // Warning: the offset of list_ affects performance. On general
- // principles, we don't like list_[x] to span multiple L1 cache
- // lines. However, merely placing list_ at offset 0 here seems to
- // cause cache conflicts.
+ // This class is laid out with the most frequently used fields
+ // first so that hot elements are placed on the same cache line.
size_t size_; // Combined size of data
size_t max_size_; // size_ > max_size_ --> Scavenge()
- pthread_t tid_; // Which thread owns it
+
+ // We sample allocations, biased by the size of the allocation
+ Sampler sampler_; // A sampler
+
FreeList list_[kNumClasses]; // Array indexed by size-class
+
+ pthread_t tid_; // Which thread owns it
bool in_setspecific_; // In call to pthread_setspecific?
// Allocate a new heap. REQUIRES: Static::pageheap_lock is held.
@@ -313,9 +318,10 @@ class ThreadCache {
static void DeleteCache(ThreadCache* heap);
static void RecomputePerThreadCacheSize();
- // We sample allocations, biased by the size of the allocation
- Sampler sampler_; // A sampler
-};
+ // Ensure that this class is cacheline-aligned. This is critical for
+ // performance, as false sharing would negate many of the benefits
+ // of a per-thread cache.
+} CACHELINE_ALIGNED;
// Allocator for thread heaps
// This is logically part of the ThreadCache class, but MSVC, at
@@ -331,15 +337,15 @@ inline bool ThreadCache::SampleAllocation(size_t k) {
return sampler_.SampleAllocation(k);
}
-inline void* ThreadCache::Allocate(size_t size) {
+inline void* ThreadCache::Allocate(size_t size, size_t cl) {
ASSERT(size <= kMaxSize);
- const size_t cl = Static::sizemap()->SizeClass(size);
- const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl);
+ ASSERT(size == Static::sizemap()->ByteSizeForClass(cl));
+
FreeList* list = &list_[cl];
if (list->empty()) {
- return FetchFromCentralCache(cl, alloc_size);
+ return FetchFromCentralCache(cl, size);
}
- size_ -= alloc_size;
+ size_ -= size;
return list->Pop();
}