diff options
author | Jordan Rupprecht <rupprecht@google.com> | 2019-05-14 21:58:59 +0000 |
---|---|---|
committer | Jordan Rupprecht <rupprecht@google.com> | 2019-05-14 21:58:59 +0000 |
commit | b6bc976d7be8ee56d3be4b6dbd2f3ab0a4021c86 (patch) | |
tree | f5ed5db8cb5d237a073ea00c4d4cd63153a16a6c /lib/asan/scripts/asan_symbolize.py | |
parent | 05342ccc9cff16425c0a831fddd510879544a0bf (diff) | |
parent | 098ca93185735ec3687106d0967a70fc99a85059 (diff) | |
download | compiler-rt-b6bc976d7be8ee56d3be4b6dbd2f3ab0a4021c86.tar.gz |
Creating branches/google/stable and tags/google/stable/2019-05-14 from r360103google/stable
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/google/stable@360714 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/asan/scripts/asan_symbolize.py')
-rwxr-xr-x | lib/asan/scripts/asan_symbolize.py | 599 |
1 files changed, 535 insertions, 64 deletions
diff --git a/lib/asan/scripts/asan_symbolize.py b/lib/asan/scripts/asan_symbolize.py index 2dbb05283..4fb3355d7 100755 --- a/lib/asan/scripts/asan_symbolize.py +++ b/lib/asan/scripts/asan_symbolize.py @@ -1,26 +1,36 @@ #!/usr/bin/env python #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# # -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # #===------------------------------------------------------------------------===# +""" +Example of use: + asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log + +PLUGINS + +This script provides a way for external plug-ins to hook into the behaviour of +various parts of this script (see `--plugins`). This is useful for situations +where it is necessary to handle site-specific quirks (e.g. binaries with debug +symbols only accessible via a remote service) without having to modify the +script itself. + +""" import argparse import bisect import getopt +import logging import os import re import subprocess import sys symbolizers = {} -DEBUG = False demangle = False binutils_prefix = None -sysroot_path = None -binary_name_filter = None fix_filename_patterns = None logfile = sys.stdin allow_system_symbolizer = True @@ -35,9 +45,6 @@ def fix_filename(file_name): file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) return file_name -def sysroot_path_filter(binary_name): - return sysroot_path + binary_name - def is_valid_arch(s): return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s", "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"] @@ -88,8 +95,7 @@ class LLVMSymbolizer(Symbolizer): if self.system == 'Darwin': for hint in self.dsym_hints: cmd.append('--dsym-hint=%s' % hint) - if DEBUG: - print(' '.join(cmd)) + logging.debug(' '.join(cmd)) try: result = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, @@ -106,8 +112,7 @@ class LLVMSymbolizer(Symbolizer): result = [] try: symbolizer_input = '"%s" %s' % (binary, offset) - if DEBUG: - print(symbolizer_input) + logging.debug(symbolizer_input) self.pipe.stdin.write("%s\n" % symbolizer_input) while True: function_name = self.pipe.stdout.readline().rstrip() @@ -152,8 +157,7 @@ class Addr2LineSymbolizer(Symbolizer): if demangle: cmd += ['--demangle'] cmd += ['-e', self.binary] - if DEBUG: - print(' '.join(cmd)) + logging.debug(' '.join(cmd)) return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=0, @@ -222,8 +226,7 @@ class DarwinSymbolizer(Symbolizer): self.open_atos() def open_atos(self): - if DEBUG: - print('atos -o %s -arch %s' % (self.binary, self.arch)) + logging.debug('atos -o %s -arch %s', self.binary, self.arch) cmdline = ['atos', '-o', self.binary, '-arch', self.arch] self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) @@ -241,8 +244,7 @@ class DarwinSymbolizer(Symbolizer): # A well-formed atos response looks like this: # foo(type1, type2) (in object.name) (filename.cc:80) match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) - if DEBUG: - print('atos_line: ', atos_line) + logging.debug('atos_line: %s', atos_line) if match: function_name = match.group(1) function_name = re.sub('\(.*?\)', '', function_name) @@ -363,7 +365,8 @@ class BreakpadSymbolizer(Symbolizer): class SymbolizationLoop(object): - def __init__(self, binary_name_filter=None, dsym_hint_producer=None): + def __init__(self, plugin_proxy=None, dsym_hint_producer=None): + self.plugin_proxy = plugin_proxy if sys.platform == 'win32': # ASan on Windows uses dbghelp.dll to symbolize in-process, which works # even in sandboxed processes. Nothing needs to be done here. @@ -371,7 +374,6 @@ class SymbolizationLoop(object): else: # Used by clients who may want to supply a different binary name. # E.g. in Chrome several binaries may share a single .dSYM. - self.binary_name_filter = binary_name_filter self.dsym_hint_producer = dsym_hint_producer self.system = os.uname()[0] if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']: @@ -455,8 +457,7 @@ class SymbolizationLoop(object): match = re.match(stack_trace_line_format, line) if not match: return [self.current_line] - if DEBUG: - print(line) + logging.debug(line) _, frameno_str, addr, binary, offset = match.groups() arch = "" # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h" @@ -472,52 +473,522 @@ class SymbolizationLoop(object): # Assume that frame #0 is the first frame of new stack trace. self.frame_no = 0 original_binary = binary - if self.binary_name_filter: - binary = self.binary_name_filter(binary) + binary = self.plugin_proxy.filter_binary_path(binary) + if binary is None: + # The binary filter has told us this binary can't be symbolized. + logging.debug('Skipping symbolication of binary "%s"', original_binary) + return [self.current_line] symbolized_line = self.symbolize_address(addr, binary, offset, arch) if not symbolized_line: if original_binary != binary: symbolized_line = self.symbolize_address(addr, original_binary, offset, arch) return self.get_symbolized_lines(symbolized_line) +class AsanSymbolizerPlugInProxy(object): + """ + Serves several purposes: + - Manages the lifetime of plugins (must be used a `with` statement). + - Provides interface for calling into plugins from within this script. + """ + def __init__(self): + self._plugins = [ ] + self._plugin_names = set() + + def _load_plugin_from_file_impl_py_gt_2(self, file_path, globals_space): + with open(file_path, 'r') as f: + exec(f.read(), globals_space, None) + + def load_plugin_from_file(self, file_path): + logging.info('Loading plugins from "{}"'.format(file_path)) + globals_space = dict(globals()) + # Provide function to register plugins + def register_plugin(plugin): + logging.info('Registering plugin %s', plugin.get_name()) + self.add_plugin(plugin) + globals_space['register_plugin'] = register_plugin + if sys.version_info.major < 3: + execfile(file_path, globals_space, None) + else: + # Indirection here is to avoid a bug in older Python 2 versions: + # `SyntaxError: unqualified exec is not allowed in function ...` + self._load_plugin_from_file_impl_py_gt_2(file_path, globals_space) + + def add_plugin(self, plugin): + assert isinstance(plugin, AsanSymbolizerPlugIn) + self._plugins.append(plugin) + self._plugin_names.add(plugin.get_name()) + plugin._receive_proxy(self) + + def remove_plugin(self, plugin): + assert isinstance(plugin, AsanSymbolizerPlugIn) + self._plugins.remove(plugin) + self._plugin_names.remove(plugin.get_name()) + logging.debug('Removing plugin %s', plugin.get_name()) + plugin.destroy() + + def has_plugin(self, name): + """ + Returns true iff the plugin name is currently + being managed by AsanSymbolizerPlugInProxy. + """ + return name in self._plugin_names + + def register_cmdline_args(self, parser): + plugins = list(self._plugins) + for plugin in plugins: + plugin.register_cmdline_args(parser) + + def process_cmdline_args(self, pargs): + # Use copy so we can remove items as we iterate. + plugins = list(self._plugins) + for plugin in plugins: + keep = plugin.process_cmdline_args(pargs) + assert isinstance(keep, bool) + if not keep: + self.remove_plugin(plugin) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + for plugin in self._plugins: + plugin.destroy() + # Don't suppress raised exceptions + return False + + def _filter_single_value(self, function_name, input_value): + """ + Helper for filter style plugin functions. + """ + new_value = input_value + for plugin in self._plugins: + result = getattr(plugin, function_name)(new_value) + if result is None: + return None + new_value = result + return new_value + + def filter_binary_path(self, binary_path): + """ + Consult available plugins to filter the path to a binary + to make it suitable for symbolication. -if __name__ == '__main__': - parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description='ASan symbolization script', - epilog='Example of use:\n' - 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" ' - '-s "$HOME/SymbolFiles" < asan.log') - parser.add_argument('path_to_cut', nargs='*', - help='pattern to be cut from the result file path ') - parser.add_argument('-d','--demangle', action='store_true', - help='demangle function names') - parser.add_argument('-s', metavar='SYSROOT', + Returns `None` if symbolication should not be attempted for this + binary. + """ + return self._filter_single_value('filter_binary_path', binary_path) + + def filter_module_desc(self, module_desc): + """ + Consult available plugins to determine the module + description suitable for symbolication. + + Returns `None` if symbolication should not be attempted for this module. + """ + assert isinstance(module_desc, ModuleDesc) + return self._filter_single_value('filter_module_desc', module_desc) + +class AsanSymbolizerPlugIn(object): + """ + This is the interface the `asan_symbolize.py` code uses to talk + to plugins. + """ + @classmethod + def get_name(cls): + """ + Returns the name of the plugin. + """ + return cls.__name__ + + def _receive_proxy(self, proxy): + assert isinstance(proxy, AsanSymbolizerPlugInProxy) + self.proxy = proxy + + def register_cmdline_args(self, parser): + """ + Hook for registering command line arguments to be + consumed in `process_cmdline_args()`. + + `parser` - Instance of `argparse.ArgumentParser`. + """ + pass + + def process_cmdline_args(self, pargs): + """ + Hook for handling parsed arguments. Implementations + should not modify `pargs`. + + `pargs` - Instance of `argparse.Namespace` containing + parsed command line arguments. + + Return `True` if plug-in should be used, otherwise + return `False`. + """ + return True + + def destroy(self): + """ + Hook called when a plugin is about to be destroyed. + Implementations should free any allocated resources here. + """ + pass + + # Symbolization hooks + def filter_binary_path(self, binary_path): + """ + Given a binary path return a binary path suitable for symbolication. + + Implementations should return `None` if symbolication of this binary + should be skipped. + """ + return binary_path + + def filter_module_desc(self, module_desc): + """ + Given a ModuleDesc object (`module_desc`) return + a ModuleDesc suitable for symbolication. + + Implementations should return `None` if symbolication of this binary + should be skipped. + """ + return module_desc + +class ModuleDesc(object): + def __init__(self, name, arch, start_addr, end_addr, module_path, uuid): + self.name = name + self.arch = arch + self.start_addr = start_addr + self.end_addr = end_addr + # Module path from an ASan report. + self.module_path = module_path + # Module for performing symbolization, by default same as above. + self.module_path_for_symbolization = module_path + self.uuid = uuid + assert self.is_valid() + + def __str__(self): + assert self.is_valid() + return "{name} {arch} {start_addr:#016x}-{end_addr:#016x} {module_path} {uuid}".format( + name=self.name, + arch=self.arch, + start_addr=self.start_addr, + end_addr=self.end_addr, + module_path=self.module_path if self.module_path == self.module_path_for_symbolization else '{} ({})'.format(self.module_path_for_symbolization, self.module_path), + uuid=self.uuid + ) + + def is_valid(self): + if not isinstance(self.name, str): + return False + if not isinstance(self.arch, str): + return False + if not isinstance(self.start_addr, int): + return False + if self.start_addr < 0: + return False + if not isinstance(self.end_addr, int): + return False + if self.end_addr <= self.start_addr: + return False + if not isinstance(self.module_path, str): + return False + if not os.path.isabs(self.module_path): + return False + if not isinstance(self.module_path_for_symbolization, str): + return False + if not os.path.isabs(self.module_path_for_symbolization): + return False + if not isinstance(self.uuid, str): + return False + return True + +class GetUUIDFromBinaryException(Exception): + def __init__(self, msg): + super(GetUUIDFromBinaryException, self).__init__(msg) + +_get_uuid_from_binary_cache = dict() + +def get_uuid_from_binary(path_to_binary, arch=None): + cache_key = (path_to_binary, arch) + cached_value = _get_uuid_from_binary_cache.get(cache_key) + if cached_value: + return cached_value + if not os.path.exists(path_to_binary): + raise GetUUIDFromBinaryException('Binary "{}" does not exist'.format(path_to_binary)) + cmd = [ '/usr/bin/otool', '-l'] + if arch: + cmd.extend(['-arch', arch]) + cmd.append(path_to_binary) + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + # Look for this output: + # cmd LC_UUID + # cmdsize 24 + # uuid 4CA778FE-5BF9-3C45-AE59-7DF01B2BE83F + if isinstance(output, str): + output_str = output + else: + assert isinstance(output, bytes) + output_str = output.decode() + assert isinstance(output_str, str) + lines = output_str.split('\n') + uuid = None + for index, line in enumerate(lines): + stripped_line = line.strip() + if not stripped_line.startswith('cmd LC_UUID'): + continue + uuid_line = lines[index+2].strip() + if not uuid_line.startswith('uuid'): + raise GetUUIDFromBinaryException('Malformed output: "{}"'.format(uuid_line)) + split_uuid_line = uuid_line.split() + uuid = split_uuid_line[1] + break + if uuid is None: + raise GetUUIDFromBinaryException('Failed to retrieve UUID') + else: + # Update cache + _get_uuid_from_binary_cache[cache_key] = uuid + return uuid + +class ModuleMap(object): + def __init__(self): + self._module_name_to_description_map = dict() + + def add_module(self, desc): + assert isinstance(desc, ModuleDesc) + assert desc.name not in self._module_name_to_description_map + self._module_name_to_description_map[desc.name] = desc + + def find_module_by_name(self, name): + return self._module_name_to_description_map.get(name, None) + + def __str__(self): + s = '{} modules:\n'.format(self.num_modules) + for module_desc in sorted(self._module_name_to_description_map.values(), key=lambda v: v.start_addr): + s += str(module_desc) + '\n' + return s + + @property + def num_modules(self): + return len(self._module_name_to_description_map) + + @property + def modules(self): + return set(self._module_name_to_description_map.values()) + + def get_module_path_for_symbolication(self, module_name, proxy): + module_desc = self.find_module_by_name(module_name) + if module_desc is None: + return None + # Allow a plug-in to change the module description to make it + # suitable for symbolication or avoid symbolication altogether. + module_desc = proxy.filter_module_desc(module_desc) + if module_desc is None: + return None + try: + uuid = get_uuid_from_binary(module_desc.module_path_for_symbolization, arch = module_desc.arch) + if uuid != module_desc.uuid: + logging.warning("Detected UUID mismatch {} != {}".format(uuid, module_desc.uuid)) + # UUIDs don't match. Tell client to not symbolize this. + return None + except GetUUIDFromBinaryException as e: + logging.error('Failed to binary from UUID: %s', str(e)) + return None + return module_desc.module_path_for_symbolization + + @staticmethod + def parse_from_file(module_map_path): + if not os.path.exists(module_map_path): + raise Exception('module map "{}" does not exist'.format(module_map_path)) + with open(module_map_path, 'r') as f: + mm = None + # E.g. + # 0x2db4000-0x102ddc000 /path/to (arm64) <0D6BBDE0-FF90-3680-899D-8E6F9528E04C> + hex_regex = lambda name: r'0x(?P<' + name + r'>[0-9a-f]+)' + module_path_regex = r'(?P<path>.+)' + arch_regex = r'\((?P<arch>.+)\)' + uuid_regex = r'<(?P<uuid>[0-9A-Z-]+)>' + line_regex = r'^{}-{}\s+{}\s+{}\s+{}'.format( + hex_regex('start_addr'), + hex_regex('end_addr'), + module_path_regex, + arch_regex, + uuid_regex + ) + matcher = re.compile(line_regex) + line_num = 0 + line = 'dummy' + while line != '': + line = f.readline() + line_num += 1 + if mm is None: + if line.startswith('Process module map:'): + mm = ModuleMap() + continue + if line.startswith('End of module map'): + break + m_obj = matcher.match(line) + if not m_obj: + raise Exception('Failed to parse line {} "{}"'.format(line_num, line)) + arch = m_obj.group('arch') + start_addr = int(m_obj.group('start_addr'), base=16) + end_addr = int(m_obj.group('end_addr'), base=16) + module_path = m_obj.group('path') + uuid = m_obj.group('uuid') + module_desc = ModuleDesc( + name=os.path.basename(module_path), + arch=arch, + start_addr=start_addr, + end_addr=end_addr, + module_path=module_path, + uuid=uuid + ) + mm.add_module(module_desc) + if mm is not None: + logging.debug('Loaded Module map from "{}":\n{}'.format( + f.name, + str(mm)) + ) + return mm + +class SysRootFilterPlugIn(AsanSymbolizerPlugIn): + """ + Simple plug-in to add sys root prefix to all binary paths + used for symbolication. + """ + def __init__(self): + self.sysroot_path = "" + + def register_cmdline_args(self, parser): + parser.add_argument('-s', dest='sys_root', metavar='SYSROOT', help='set path to sysroot for sanitized binaries') - parser.add_argument('-c', metavar='CROSS_COMPILE', - help='set prefix for binutils') - parser.add_argument('-l','--logfile', default=sys.stdin, - type=argparse.FileType('r'), - help='set log file name to parse, default is stdin') - parser.add_argument('--force-system-symbolizer', action='store_true', - help='don\'t use llvm-symbolizer') - args = parser.parse_args() - if args.path_to_cut: - fix_filename_patterns = args.path_to_cut - if args.demangle: - demangle = True - if args.s: - binary_name_filter = sysroot_path_filter - sysroot_path = args.s - if args.c: - binutils_prefix = args.c - if args.logfile: - logfile = args.logfile + + def process_cmdline_args(self, pargs): + if pargs.sys_root is None: + # Not being used so remove ourselves. + return False + self.sysroot_path = pargs.sys_root + return True + + def filter_binary_path(self, path): + return self.sysroot_path + path + +class ModuleMapPlugIn(AsanSymbolizerPlugIn): + def __init__(self): + self._module_map = None + def register_cmdline_args(self, parser): + parser.add_argument('--module-map', + help='Path to text file containing module map' + 'output. See print_module_map ASan option.') + def process_cmdline_args(self, pargs): + if not pargs.module_map: + return False + self._module_map = ModuleMap.parse_from_file(args.module_map) + if self._module_map is None: + msg = 'Failed to find module map' + logging.error(msg) + raise Exception(msg) + return True + def filter_binary_path(self, binary_path): + if os.path.isabs(binary_path): + # This is a binary path so transform into + # a module name + module_name = os.path.basename(binary_path) + else: + module_name = binary_path + return self._module_map.get_module_path_for_symbolication(module_name, self.proxy) + +def add_logging_args(parser): + parser.add_argument('--log-dest', + default=None, + help='Destination path for script logging (default stderr).', + ) + parser.add_argument('--log-level', + choices=['debug', 'info', 'warning', 'error', 'critical'], + default='info', + help='Log level for script (default: %(default)s).' + ) + +def setup_logging(): + # Set up a parser just for parsing the logging arguments. + # This is necessary because logging should be configured before we + # perform the main argument parsing. + parser = argparse.ArgumentParser(add_help=False) + add_logging_args(parser) + pargs, unparsed_args = parser.parse_known_args() + + log_level = getattr(logging, pargs.log_level.upper()) + if log_level == logging.DEBUG: + log_format = '%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s' else: - logfile = sys.stdin - if args.force_system_symbolizer: - force_system_symbolizer = True - if force_system_symbolizer: - assert(allow_system_symbolizer) - loop = SymbolizationLoop(binary_name_filter) - loop.process_logfile() + log_format = '%(levelname)s: %(message)s' + basic_config = { + 'level': log_level, + 'format': log_format + } + log_dest = pargs.log_dest + if log_dest: + basic_config['filename'] = log_dest + logging.basicConfig(**basic_config) + logging.debug('Logging level set to "{}" and directing output to "{}"'.format( + pargs.log_level, + 'stderr' if log_dest is None else log_dest) + ) + return unparsed_args + +def add_load_plugin_args(parser): + parser.add_argument('-p', '--plugins', + help='Load plug-in', nargs='+', default=[]) + +def setup_plugins(plugin_proxy, args): + parser = argparse.ArgumentParser(add_help=False) + add_load_plugin_args(parser) + pargs , unparsed_args = parser.parse_known_args() + for plugin_path in pargs.plugins: + plugin_proxy.load_plugin_from_file(plugin_path) + # Add built-in plugins. + plugin_proxy.add_plugin(ModuleMapPlugIn()) + plugin_proxy.add_plugin(SysRootFilterPlugIn()) + return unparsed_args + +if __name__ == '__main__': + remaining_args = setup_logging() + with AsanSymbolizerPlugInProxy() as plugin_proxy: + remaining_args = setup_plugins(plugin_proxy, remaining_args) + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description='ASan symbolization script', + epilog=__doc__) + parser.add_argument('path_to_cut', nargs='*', + help='pattern to be cut from the result file path ') + parser.add_argument('-d','--demangle', action='store_true', + help='demangle function names') + parser.add_argument('-c', metavar='CROSS_COMPILE', + help='set prefix for binutils') + parser.add_argument('-l','--logfile', default=sys.stdin, + type=argparse.FileType('r'), + help='set log file name to parse, default is stdin') + parser.add_argument('--force-system-symbolizer', action='store_true', + help='don\'t use llvm-symbolizer') + # Add logging arguments so that `--help` shows them. + add_logging_args(parser) + # Add load plugin arguments so that `--help` shows them. + add_load_plugin_args(parser) + plugin_proxy.register_cmdline_args(parser) + args = parser.parse_args(remaining_args) + plugin_proxy.process_cmdline_args(args) + if args.path_to_cut: + fix_filename_patterns = args.path_to_cut + if args.demangle: + demangle = True + if args.c: + binutils_prefix = args.c + if args.logfile: + logfile = args.logfile + else: + logfile = sys.stdin + if args.force_system_symbolizer: + force_system_symbolizer = True + if force_system_symbolizer: + assert(allow_system_symbolizer) + loop = SymbolizationLoop(plugin_proxy) + loop.process_logfile() |