// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- // Copyright (c) 2009, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // --- // Author: Craig Silverstein // // This forks out to pprof to do the actual symbolizing. We might // be better off writing our own in C++. #include "config.h" #include "symbolize.h" #include #ifdef HAVE_UNISTD_H #include // for write() #endif #ifdef HAVE_SYS_SOCKET_H #include // for socketpair() -- needed by Symbolize #endif #ifdef HAVE_SYS_WAIT_H #include // for wait() -- needed by Symbolize #endif #ifdef HAVE_POLL_H #include #endif #ifdef __MACH__ #include // for GetProgramInvocationName() #include // for PATH_MAX #endif #if defined(__CYGWIN__) || defined(__CYGWIN32__) #include // for get_osfhandle() #endif #include #include "base/commandlineflags.h" #include "base/logging.h" #include "base/sysinfo.h" using std::string; using tcmalloc::DumpProcSelfMaps; // from sysinfo.h DEFINE_string(symbolize_pprof, EnvToString("PPROF_PATH", "pprof"), "Path to pprof to call for reporting function names."); // heap_profile_table_pprof may be referenced after destructors are // called (since that's when leak-checking is done), so we make // a more-permanent copy that won't ever get destroyed. static string* g_pprof_path = new string(FLAGS_symbolize_pprof); // Returns NULL if we're on an OS where we can't get the invocation name. // Using a static var is ok because we're not called from a thread. static const char* GetProgramInvocationName() { #if defined(HAVE_PROGRAM_INVOCATION_NAME) #ifdef __UCLIBC__ extern const char* program_invocation_name; // uclibc provides this #else extern char* program_invocation_name; // gcc provides this #endif return program_invocation_name; #elif defined(__MACH__) // We don't want to allocate memory for this since we may be // calculating it when memory is corrupted. static char program_invocation_name[PATH_MAX]; if (program_invocation_name[0] == '\0') { // first time calculating uint32_t length = sizeof(program_invocation_name); if (_NSGetExecutablePath(program_invocation_name, &length)) return NULL; } return program_invocation_name; #else return NULL; // figure out a way to get argv[0] #endif } // Prints an error message when you can't run Symbolize(). static void PrintError(const char* reason) { RAW_LOG(ERROR, "*** WARNING: Cannot convert addresses to symbols in output below.\n" "*** Reason: %s\n" "*** If you cannot fix this, try running pprof directly.\n", reason); } void SymbolTable::Add(const void* addr) { symbolization_table_[addr] = ""; } const char* SymbolTable::GetSymbol(const void* addr) { return symbolization_table_[addr]; } // Updates symbolization_table with the pointers to symbol names corresponding // to its keys. The symbol names are stored in out, which is allocated and // freed by the caller of this routine. // Note that the forking/etc is not thread-safe or re-entrant. That's // ok for the purpose we need -- reporting leaks detected by heap-checker // -- but be careful if you decide to use this routine for other purposes. // Returns number of symbols read on error. If can't symbolize, returns 0 // and emits an error message about why. int SymbolTable::Symbolize() { #if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H) PrintError("Perftools does not know how to call a sub-process on this O/S"); return 0; #else const char* argv0 = GetProgramInvocationName(); if (argv0 == NULL) { // can't call symbolize if we can't figure out our name PrintError("Cannot figure out the name of this executable (argv0)"); return 0; } if (access(g_pprof_path->c_str(), R_OK) != 0) { PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)"); return 0; } // All this work is to do two-way communication. ugh. int *child_in = NULL; // file descriptors int *child_out = NULL; // for now, we don't worry about child_err int child_fds[5][2]; // socketpair may be called up to five times below // The client program may close its stdin and/or stdout and/or stderr // thus allowing socketpair to reuse file descriptors 0, 1 or 2. // In this case the communication between the forked processes may be broken // if either the parent or the child tries to close or duplicate these // descriptors. The loop below produces two pairs of file descriptors, each // greater than 2 (stderr). for (int i = 0; i < 5; i++) { if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) { for (int j = 0; j < i; j++) { close(child_fds[j][0]); close(child_fds[j][1]); PrintError("Cannot create a socket pair"); return 0; } } else { if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) { if (child_in == NULL) { child_in = child_fds[i]; } else { child_out = child_fds[i]; for (int j = 0; j < i; j++) { if (child_fds[j] == child_in) continue; close(child_fds[j][0]); close(child_fds[j][1]); } break; } } } } switch (fork()) { case -1: { // error close(child_in[0]); close(child_in[1]); close(child_out[0]); close(child_out[1]); PrintError("Unknown error calling fork()"); return 0; } case 0: { // child close(child_in[1]); // child uses the 0's, parent uses the 1's close(child_out[1]); // child uses the 0's, parent uses the 1's close(0); close(1); if (dup2(child_in[0], 0) == -1) _exit(1); if (dup2(child_out[0], 1) == -1) _exit(2); // Unset vars that might cause trouble when we fork unsetenv("CPUPROFILE"); unsetenv("HEAPPROFILE"); unsetenv("HEAPCHECK"); unsetenv("PERFTOOLS_VERBOSE"); execlp(g_pprof_path->c_str(), g_pprof_path->c_str(), "--symbols", argv0, NULL); _exit(3); // if execvp fails, it's bad news for us } default: { // parent close(child_in[0]); // child uses the 0's, parent uses the 1's close(child_out[0]); // child uses the 0's, parent uses the 1's #ifdef HAVE_POLL_H // Waiting for 1ms seems to give the OS time to notice any errors. poll(0, 0, 1); // For maximum safety, we check to make sure the execlp // succeeded before trying to write. (Otherwise we'll get a // SIGPIPE.) For systems without poll.h, we'll just skip this // check, and trust that the user set PPROF_PATH correctly! struct pollfd pfd = { child_in[1], POLLOUT, 0 }; if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) || (pfd.revents & (POLLHUP|POLLERR))) { PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)"); return 0; } #endif #if defined(__CYGWIN__) || defined(__CYGWIN32__) // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd. Convert. const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]); DumpProcSelfMaps(symbols_handle); #else DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin #endif // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each // address to feed to pprof. const int kOutBufSize = 24 * symbolization_table_.size(); char *pprof_buffer = new char[kOutBufSize]; int written = 0; for (SymbolMap::const_iterator iter = symbolization_table_.begin(); iter != symbolization_table_.end(); ++iter) { written += snprintf(pprof_buffer + written, kOutBufSize - written, // pprof expects format to be 0xXXXXXX "0x%" PRIxPTR "\n", reinterpret_cast(iter->first)); } write(child_in[1], pprof_buffer, strlen(pprof_buffer)); close(child_in[1]); // that's all we need to write const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size(); int total_bytes_read = 0; delete[] symbol_buffer_; symbol_buffer_ = new char[kSymbolBufferSize]; memset(symbol_buffer_, '\0', kSymbolBufferSize); while (1) { int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read, kSymbolBufferSize - total_bytes_read); if (bytes_read < 0) { close(child_out[1]); PrintError("Cannot read data from pprof"); return 0; } else if (bytes_read == 0) { close(child_out[1]); wait(NULL); break; } else { total_bytes_read += bytes_read; } } // We have successfully read the output of pprof into out. Make sure // the last symbol is full (we can tell because it ends with a \n). if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n') return 0; // make the symbolization_table_ values point to the output vector SymbolMap::iterator fill = symbolization_table_.begin(); int num_symbols = 0; const char *current_name = symbol_buffer_; for (int i = 0; i < total_bytes_read; i++) { if (symbol_buffer_[i] == '\n') { fill->second = current_name; symbol_buffer_[i] = '\0'; current_name = symbol_buffer_ + i + 1; fill++; num_symbols++; } } return num_symbols; } } PrintError("Unkown error (should never occur!)"); return 0; // shouldn't be reachable #endif }