"""Tools to dump debug info for each OS.""" import itertools import logging import os import sys import tempfile from datetime import datetime from abc import ABCMeta, abstractmethod from collections import namedtuple from distutils import spawn from buildscripts.resmokelib.hang_analyzer.process import call, callo, find_program from buildscripts.resmokelib.hang_analyzer.process_list import Pinfo from buildscripts.resmokelib import config as resmoke_config Dumpers = namedtuple('Dumpers', ['dbg', 'jstack']) def get_dumpers(root_logger: logging.Logger, dbg_output: str): """ Return OS-appropriate dumpers. :param root_logger: Top-level logger :param dbg_output: 'stdout' or 'file' """ dbg = None jstack = None if sys.platform.startswith("linux"): dbg = GDBDumper(root_logger, dbg_output) jstack = JstackDumper() elif sys.platform == "win32" or sys.platform == "cygwin": dbg = WindowsDumper(root_logger, dbg_output) jstack = JstackWindowsDumper() elif sys.platform == "darwin": dbg = LLDBDumper(root_logger, dbg_output) jstack = JstackDumper() return Dumpers(dbg=dbg, jstack=jstack) class Dumper(metaclass=ABCMeta): """ Abstract base class for OS-specific dumpers. :param dbg_output: 'stdout' or 'file' :param root_logger: Top-level logger """ def __init__(self, root_logger: logging.Logger, dbg_output: str): """Initialize dumper.""" self._root_logger = root_logger self._dbg_output = dbg_output @abstractmethod def dump_info( self, pinfo: Pinfo, take_dump: bool, ): """ Perform dump for a process. :param pinfo: A Pinfo describing the process :param take_dump: Whether to take a core dump """ raise NotImplementedError("dump_info must be implemented in OS-specific subclasses") @abstractmethod def get_dump_ext(self): """Return the dump file extension.""" raise NotImplementedError("get_dump_ext must be implemented in OS-specific subclasses") @abstractmethod def _find_debugger(self, debugger): """ Find the installed debugger. :param debugger: debugger executable. """ raise NotImplementedError("_find_debugger must be implemented in OS-specific subclasses") @abstractmethod def _prefix(self): """Return the commands to set up a debugger process.""" raise NotImplementedError("_prefix must be implemented in OS-specific subclasses") @abstractmethod def _process_specific(self, pinfo: Pinfo, take_dump: bool, logger: logging.Logger = None): """ Return the commands that attach to each process, dump info and detach. :param pinfo: A Pinfo describing the process :param take_dump: Whether to take a core dump :param logger: Logger to output dump info to """ raise NotImplementedError("_process_specific must be implemented in OS-specific subclasses") @abstractmethod def _postfix(self): """Return the commands to exit the debugger.""" raise NotImplementedError("_postfix must be implemented in OS-specific subclasses") class WindowsDumper(Dumper): """WindowsDumper class.""" def _find_debugger(self, debugger): """Find the installed debugger.""" # We are looking for c:\Program Files (x86)\Windows Kits\8.1\Debuggers\x64 cdb = spawn.find_executable(debugger) if cdb is not None: return cdb from win32com.shell import shell, shellcon # pylint: disable=import-outside-toplevel # Cygwin via sshd does not expose the normal environment variables # Use the shell api to get the variable instead root_dir = shell.SHGetFolderPath(0, shellcon.CSIDL_PROGRAM_FILESX86, None, 0) # Construct the debugger search paths in most-recent order debugger_paths = [os.path.join(root_dir, "Windows Kits", "10", "Debuggers", "x64")] for idx in reversed(range(0, 2)): debugger_paths.append( os.path.join(root_dir, "Windows Kits", "8." + str(idx), "Debuggers", "x64")) for dbg_path in debugger_paths: self._root_logger.info("Checking for debugger in %s", dbg_path) if os.path.exists(dbg_path): return os.path.join(dbg_path, debugger) return None def _prefix(self): """Return the commands to set up a debugger process.""" cmds = [ ".symfix", # Fixup symbol path "!sym noisy", # Enable noisy symbol loading ".symopt +0x10", # Enable line loading (off by default in CDB, on by default in WinDBG) ".reload", # Reload symbols ] return cmds def _process_specific(self, pinfo, take_dump, logger=None): """Return the commands that attach to each process, dump info and detach.""" assert isinstance(pinfo.pidv, int) if take_dump: # Dump to file, dump_..mdmp dump_file = "dump_%s.%d.%s" % (os.path.splitext(pinfo.name)[0], pinfo.pidv, self.get_dump_ext()) dump_command = ".dump /ma %s" % dump_file self._root_logger.info("Dumping core to %s", dump_file) cmds = [ dump_command, ".detach", # Detach ] else: cmds = [ "!peb", # Dump current exe, & environment variables "lm", # Dump loaded modules "!uniqstack -pn", # Dump All unique Threads with function arguments "!cs -l", # Dump all locked critical sections ".detach", # Detach ] return cmds def _postfix(self): """Return the commands to exit the debugger.""" cmds = [ "q" # Quit ] return cmds def dump_info(self, pinfo, take_dump): """Dump useful information to the console.""" debugger = "cdb.exe" dbg = self._find_debugger(debugger) if dbg is None: self._root_logger.warning("Debugger %s not found, skipping dumping of %s", debugger, str(pinfo.pidv)) return self._root_logger.info("Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, str(pinfo.pidv)) for pid in pinfo.pidv: logger = _get_process_logger(self._dbg_output, pinfo.name, pid=pid) process = Pinfo(name=pinfo.name, pidv=pid) cmds = self._prefix() + self._process_specific(process, take_dump) + self._postfix() call([dbg, '-c', ";".join(cmds), '-p', str(pid)], logger) self._root_logger.info("Done analyzing %s process with PID %d", pinfo.name, pid) def get_dump_ext(self): """Return the dump file extension.""" return "mdmp" # LLDB dumper is for MacOS X class LLDBDumper(Dumper): """LLDBDumper class.""" @staticmethod def _find_debugger(debugger): """Find the installed debugger.""" return find_program(debugger, ['/usr/bin']) def _prefix(self): pass def _process_specific(self, pinfo, take_dump, logger=None): """Return the commands that attach to each process, dump info and detach.""" cmds = [] if take_dump: dump_files = self._dump_files(pinfo) for pid in pinfo.pidv: # Dump to file, dump_..core dump_file = dump_files[pid] dump_command = "process save-core %s" % dump_file self._root_logger.info("Dumping core to %s", dump_file) cmds += [ "platform shell kill -CONT %d" % pid, "attach -p %d" % pid, dump_command, "process detach", "platform shell kill -STOP %d" % pid, ] else: for pid in pinfo.pidv: cmds += [ "platform shell kill -CONT %d" % pid, "attach -p %d" % pid, "target modules list", "thread backtrace all", "process detach", "platform shell kill -STOP %d" % pid, ] return cmds def _postfix(self): """Return the commands to exit the debugger.""" cmds = [ "settings set interpreter.prompt-on-quit false", "quit", ] return cmds def dump_info(self, pinfo, take_dump): """Dump info.""" debugger = "lldb" dbg = self._find_debugger(debugger) logger = _get_process_logger(self._dbg_output, pinfo.name) if dbg is None: self._root_logger.warning("Debugger %s not found, skipping dumping of %s", debugger, str(pinfo.pidv)) return self._root_logger.info("Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, str(pinfo.pidv)) lldb_version = callo([dbg, "--version"], logger) logger.info(lldb_version) # Do we have the XCode or LLVM version of lldb? # Old versions of lldb do not work well when taking commands via a file # XCode (7.2): lldb-340.4.119 # LLVM - lldb version 3.7.0 ( revision ) if 'version' not in lldb_version: # We have XCode's lldb lldb_version = lldb_version[lldb_version.index("lldb-"):] lldb_version = lldb_version.replace('lldb-', '') lldb_major_version = int(lldb_version[:lldb_version.index('.')]) if lldb_major_version < 340: logger.warning("Debugger lldb is too old, please upgrade to XCode 7.2") return cmds = self._process_specific(pinfo, take_dump) + self._postfix() tf = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8') for cmd in cmds: tf.write(cmd + "\n") tf.flush() # Works on in MacOS 10.9 & later #call([dbg] + list( itertools.chain.from_iterable([['-o', b] for b in cmds])), logger) call(['cat', tf.name], logger) call([dbg, '--source', tf.name], logger) self._root_logger.info("Done analyzing %s processes with PIDs %s", pinfo.name, str(pinfo.pidv)) if take_dump: need_sigabrt = {} files = self._dump_files(pinfo) for pid in files: if not os.path.exists(files[pid]): need_sigabrt[pid] = files[pid] if need_sigabrt: raise DumpError(need_sigabrt) def get_dump_ext(self): """Return the dump file extension.""" return "core" def _dump_files(self, pinfo): """Return a dict mapping pids to core dump filenames that this dumper can create.""" files = {} for pid in pinfo.pidv: files[pid] = "dump_%s.%d.%s" % (pinfo.name, pid, self.get_dump_ext()) return files # GDB dumper is for Linux class GDBDumper(Dumper): """GDBDumper class.""" def __init__(self, root_logger: logging.Logger, dbg_output: str, timeout_seconds_for_gdb_process=720): """Initialize GDBDumper.""" if resmoke_config.EVERGREEN_TASK_ID is None: # Set 24 hours time out for hang analyzer being run in locally timeout_seconds_for_gdb_process = 86400 #Timeout for hang analyzer, default timeout is 12mins(out of total 15mins) in Evergreen self._timeout_seconds_for_gdb_process = timeout_seconds_for_gdb_process super().__init__(root_logger, dbg_output) def _reduce_timeout_for_gdb_process(self, timeout_period: int): """Reduce timeout for remaining gdb processes.""" self._timeout_seconds_for_gdb_process -= timeout_period def _find_debugger(self, debugger): """Find the installed debugger.""" return find_program(debugger, ['/opt/mongodbtoolchain/v4/bin', '/usr/bin']) def _prefix(self): """Return the commands to set up a debugger process.""" script_dir = "buildscripts" gdb_dir = os.path.join(script_dir, "gdb") mongo_script = os.path.join(gdb_dir, "mongo.py") mongo_printers_script = os.path.join(gdb_dir, "mongo_printers.py") mongo_lock_script = os.path.join(gdb_dir, "mongo_lock.py") add_venv_sys_path = f"py sys.path.extend({sys.path})" # Makes venv packages available in GDB source_mongo = "source %s" % mongo_script source_mongo_printers = "source %s" % mongo_printers_script source_mongo_lock = "source %s" % mongo_lock_script cmds = [ "set interactive-mode off", "set print thread-events off", # Suppress GDB messages of threads starting/finishing. "set python print-stack full", add_venv_sys_path, source_mongo, source_mongo_printers, source_mongo_lock, ] return cmds def _process_specific(self, pinfo, take_dump, logger=None): """Return the commands that attach to each process, dump info and detach.""" cmds = [] if take_dump: for pid in pinfo.pidv: # Dump to file, dump_..core dump_file = "dump_%s.%d.%s" % (pinfo.name, pid, self.get_dump_ext()) dump_command = "gcore %s" % dump_file self._root_logger.info("Dumping core to %s", dump_file) cmds += [ "attach %d" % pid, "handle SIGSTOP ignore noprint", # Lock the scheduler, before running commands, which execute code in the attached process. "set scheduler-locking on", dump_command, "detach", ] else: mongodb_dump_locks = "mongodb-dump-locks" mongodb_show_locks = "mongodb-show-locks" mongodb_uniqstack = "mongodb-uniqstack mongodb-bt-if-active" mongodb_javascript_stack = "mongodb-javascript-stack" mongod_dump_sessions = "mongod-dump-sessions" mongodb_dump_mutexes = "mongodb-dump-mutexes" mongodb_dump_recovery_units = "mongodb-dump-recovery-units" mongodb_dump_storage_engine_info = "mongodb-dump-storage-engine-info" for pid in pinfo.pidv: if not logger.mongo_process_filename: set_logging_on_commands = [] set_logging_off_commands = [] raw_stacks_commands = [] else: base, ext = os.path.splitext(logger.mongo_process_filename) set_logging_on_commands = [ 'set logging file %s_%d%s' % (base, pid, ext), 'set logging on' ] set_logging_off_commands = ['set logging off'] raw_stacks_filename = "%s_%d_raw_stacks%s" % (base, pid, ext) raw_stacks_commands = [ 'echo \\nWriting raw stacks to %s.\\n' % raw_stacks_filename, # This sends output to log file rather than stdout until we turn logging off. 'set logging redirect on', 'set logging file ' + raw_stacks_filename, 'set logging on', 'thread apply all bt', 'set logging off', 'set logging redirect off', ] mongodb_waitsfor_graph = "mongodb-waitsfor-graph debugger_waitsfor_%s_%d.gv" % \ (pinfo.name, pid) cmds += set_logging_on_commands + [ "attach %d" % pid, "handle SIGSTOP ignore noprint", "info sharedlibrary", "info threads", # Dump a simple list of commands to get the thread name ] + set_logging_off_commands + raw_stacks_commands + set_logging_on_commands + [ mongodb_uniqstack, # Lock the scheduler, before running commands, which execute code in the attached process. "set scheduler-locking on", mongodb_dump_locks, mongodb_show_locks, mongodb_waitsfor_graph, mongodb_javascript_stack, mongod_dump_sessions, mongodb_dump_mutexes, mongodb_dump_recovery_units, mongodb_dump_storage_engine_info, "detach", ] + set_logging_off_commands return cmds def _postfix(self): """Return the commands to exit the debugger.""" cmds = ["set confirm off", "quit"] return cmds def dump_info(self, pinfo, take_dump): """Dump info.""" debugger = "gdb" dbg = self._find_debugger(debugger) logger = _get_process_logger(self._dbg_output, pinfo.name) _start_time = datetime.now() if dbg is None: self._root_logger.warning("Debugger %s not found, skipping dumping of %s", debugger, str(pinfo.pidv)) return if self._timeout_seconds_for_gdb_process <= 0: self._root_logger.warning( "Skipping dumping of %s processes with PIDs %s because the time limit expired", pinfo.name, str(pinfo.pidv)) return self._root_logger.info("Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, str(pinfo.pidv)) call([dbg, "--version"], logger) cmds = self._prefix() + self._process_specific(pinfo, take_dump, logger) + self._postfix() # gcore is both a command within GDB and a script packaged alongside gdb. The gcore script # invokes the gdb binary with --readnever to avoid spending time loading the debug symbols # prior to taking the core dump. The debug symbols are unneeded to generate the core dump. # # For reference: # https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/gcore.in;h=34860de630cf0ee766e102eb82f7a3fddba6b368#l101 skip_reading_symbols_on_take_dump = ["--readnever"] if take_dump else [] # TODO: SERVER-75862 # Live process dumping is causing system unresponsive, which is resulting in loss of core dumps # and other shutdown/clean up tasks failing to be run. Disabling the live process dump is a # temporary workaround while the root cause of the system unresponsive is fully understood. if take_dump: call([dbg, "--quiet", "--nx"] + skip_reading_symbols_on_take_dump + list( itertools.chain.from_iterable([['-ex', b] for b in cmds])), logger, self._timeout_seconds_for_gdb_process, pinfo) time_period = (datetime.now() - _start_time).total_seconds() self._reduce_timeout_for_gdb_process(time_period) self._root_logger.info("Done analyzing %s processes with PIDs %s", pinfo.name, str(pinfo.pidv)) def get_dump_ext(self): """Return the dump file extension.""" return "core" @staticmethod def _find_gcore(): """Find the installed gcore.""" dbg = "/usr/bin/gcore" if os.path.exists(dbg): return dbg return None # jstack is a JDK utility class JstackDumper(object): """JstackDumper class.""" @staticmethod def _find_debugger(debugger): """Find the installed jstack debugger.""" return find_program(debugger, ['/usr/bin']) def dump_info(self, root_logger, dbg_output, pid, process_name): """Dump java thread stack traces to the console.""" debugger = "jstack" jstack = self._find_debugger(debugger) logger = _get_process_logger(dbg_output, process_name, pid=pid) if jstack is None: logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid) return root_logger.info("Debugger %s, analyzing %s process with PID %d", jstack, process_name, pid) call([jstack, "-l", str(pid)], logger) root_logger.info("Done analyzing %s process with PID %d", process_name, pid) # jstack is a JDK utility class JstackWindowsDumper(object): """JstackWindowsDumper class.""" @staticmethod def dump_info(root_logger, pid): """Dump java thread stack traces to the logger.""" root_logger.warning("Debugger jstack not supported, skipping dumping of %d", pid) def _get_process_logger(dbg_output, pname: str, pid: int = None): """Return the process logger from options specified.""" process_logger = logging.Logger("process", level=logging.DEBUG) process_logger.mongo_process_filename = None if 'stdout' in dbg_output: s_handler = logging.StreamHandler(sys.stdout) s_handler.setFormatter(logging.Formatter(fmt="%(message)s")) process_logger.addHandler(s_handler) if 'file' in dbg_output: if pid: filename = "debugger_%s_%d.log" % (os.path.splitext(pname)[0], pid) else: filename = "debugger_%s.log" % (os.path.splitext(pname)[0]) process_logger.mongo_process_filename = filename f_handler = logging.FileHandler(filename=filename, mode="w") f_handler.setFormatter(logging.Formatter(fmt="%(message)s")) process_logger.addHandler(f_handler) return process_logger class DumpError(Exception): """ Exception raised for errors while dumping processes. Tracks what cores still need to be generated. """ def __init__(self, dump_pids, message=("Failed to create core dumps for some processes," " SIGABRT will be sent as a fallback if -k is set.")): """Initialize error.""" self.dump_pids = dump_pids self.message = message super().__init__(self.message)