summaryrefslogtreecommitdiff
path: root/src/3rdparty/v8/tools/ll_prof.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/v8/tools/ll_prof.py')
-rwxr-xr-xsrc/3rdparty/v8/tools/ll_prof.py1011
1 files changed, 0 insertions, 1011 deletions
diff --git a/src/3rdparty/v8/tools/ll_prof.py b/src/3rdparty/v8/tools/ll_prof.py
deleted file mode 100755
index 216929d..0000000
--- a/src/3rdparty/v8/tools/ll_prof.py
+++ /dev/null
@@ -1,1011 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2012 the V8 project authors. All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-# * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import bisect
-import collections
-import ctypes
-import disasm
-import mmap
-import optparse
-import os
-import re
-import subprocess
-import sys
-import time
-
-
-USAGE="""usage: %prog [OPTION]...
-
-Analyses V8 and perf logs to produce profiles.
-
-Perf logs can be collected using a command like:
- $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
- # -R: collect all data
- # -e cycles: use cpu-cycles event (run "perf list" for details)
- # -c 10000: write a sample after each 10000 events
- # -f: force output file overwrite
- # -i: limit profiling to our process and the kernel
- # --ll-prof shell flag enables the right V8 logs
-This will produce a binary trace file (perf.data) that %prog can analyse.
-
-IMPORTANT:
- The kernel has an internal maximum for events per second, it is 100K by
- default. That's not enough for "-c 10000". Set it to some higher value:
- $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
- You can also make the warning about kernel address maps go away:
- $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
-
-We have a convenience script that handles all of the above for you:
- $ tools/run-llprof.sh ./d8 bench.js
-
-Examples:
- # Print flat profile with annotated disassembly for the 10 top
- # symbols. Use default log names and include the snapshot log.
- $ %prog --snapshot --disasm-top=10
-
- # Print flat profile with annotated disassembly for all used symbols.
- # Use default log names and include kernel symbols into analysis.
- $ %prog --disasm-all --kernel
-
- # Print flat profile. Use custom log names.
- $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
-"""
-
-
-JS_ORIGIN = "js"
-JS_SNAPSHOT_ORIGIN = "js-snapshot"
-
-class Code(object):
- """Code object."""
-
- _id = 0
- UNKNOWN = 0
- V8INTERNAL = 1
- FULL_CODEGEN = 2
- OPTIMIZED = 3
-
- def __init__(self, name, start_address, end_address, origin, origin_offset):
- self.id = Code._id
- Code._id += 1
- self.name = name
- self.other_names = None
- self.start_address = start_address
- self.end_address = end_address
- self.origin = origin
- self.origin_offset = origin_offset
- self.self_ticks = 0
- self.self_ticks_map = None
- self.callee_ticks = None
- if name.startswith("LazyCompile:*"):
- self.codetype = Code.OPTIMIZED
- elif name.startswith("LazyCompile:"):
- self.codetype = Code.FULL_CODEGEN
- elif name.startswith("v8::internal::"):
- self.codetype = Code.V8INTERNAL
- else:
- self.codetype = Code.UNKNOWN
-
- def AddName(self, name):
- assert self.name != name
- if self.other_names is None:
- self.other_names = [name]
- return
- if not name in self.other_names:
- self.other_names.append(name)
-
- def FullName(self):
- if self.other_names is None:
- return self.name
- self.other_names.sort()
- return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
-
- def IsUsed(self):
- return self.self_ticks > 0 or self.callee_ticks is not None
-
- def Tick(self, pc):
- self.self_ticks += 1
- if self.self_ticks_map is None:
- self.self_ticks_map = collections.defaultdict(lambda: 0)
- offset = pc - self.start_address
- self.self_ticks_map[offset] += 1
-
- def CalleeTick(self, callee):
- if self.callee_ticks is None:
- self.callee_ticks = collections.defaultdict(lambda: 0)
- self.callee_ticks[callee] += 1
-
- def PrintAnnotated(self, arch, options):
- if self.self_ticks_map is None:
- ticks_map = []
- else:
- ticks_map = self.self_ticks_map.items()
- # Convert the ticks map to offsets and counts arrays so that later
- # we can do binary search in the offsets array.
- ticks_map.sort(key=lambda t: t[0])
- ticks_offsets = [t[0] for t in ticks_map]
- ticks_counts = [t[1] for t in ticks_map]
- # Get a list of disassembled lines and their addresses.
- lines = self._GetDisasmLines(arch, options)
- if len(lines) == 0:
- return
- # Print annotated lines.
- address = lines[0][0]
- total_count = 0
- for i in xrange(len(lines)):
- start_offset = lines[i][0] - address
- if i == len(lines) - 1:
- end_offset = self.end_address - self.start_address
- else:
- end_offset = lines[i + 1][0] - address
- # Ticks (reported pc values) are not always precise, i.e. not
- # necessarily point at instruction starts. So we have to search
- # for ticks that touch the current instruction line.
- j = bisect.bisect_left(ticks_offsets, end_offset)
- count = 0
- for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
- if offset < start_offset:
- break
- count += cnt
- total_count += count
- count = 100.0 * count / self.self_ticks
- if count >= 0.01:
- print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
- else:
- print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
- print
- assert total_count == self.self_ticks, \
- "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
-
- def __str__(self):
- return "%s [0x%x, 0x%x) size: %d origin: %s" % (
- self.name,
- self.start_address,
- self.end_address,
- self.end_address - self.start_address,
- self.origin)
-
- def _GetDisasmLines(self, arch, options):
- if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
- inplace = False
- filename = options.log + ".ll"
- else:
- inplace = True
- filename = self.origin
- return disasm.GetDisasmLines(filename,
- self.origin_offset,
- self.end_address - self.start_address,
- arch,
- inplace)
-
-
-class CodePage(object):
- """Group of adjacent code objects."""
-
- SHIFT = 20 # 1M pages
- SIZE = (1 << SHIFT)
- MASK = ~(SIZE - 1)
-
- @staticmethod
- def PageAddress(address):
- return address & CodePage.MASK
-
- @staticmethod
- def PageId(address):
- return address >> CodePage.SHIFT
-
- @staticmethod
- def PageAddressFromId(id):
- return id << CodePage.SHIFT
-
- def __init__(self, address):
- self.address = address
- self.code_objects = []
-
- def Add(self, code):
- self.code_objects.append(code)
-
- def Remove(self, code):
- self.code_objects.remove(code)
-
- def Find(self, pc):
- code_objects = self.code_objects
- for i, code in enumerate(code_objects):
- if code.start_address <= pc < code.end_address:
- code_objects[0], code_objects[i] = code, code_objects[0]
- return code
- return None
-
- def __iter__(self):
- return self.code_objects.__iter__()
-
-
-class CodeMap(object):
- """Code object map."""
-
- def __init__(self):
- self.pages = {}
- self.min_address = 1 << 64
- self.max_address = -1
-
- def Add(self, code, max_pages=-1):
- page_id = CodePage.PageId(code.start_address)
- limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
- pages = 0
- while page_id < limit_id:
- if max_pages >= 0 and pages > max_pages:
- print >>sys.stderr, \
- "Warning: page limit (%d) reached for %s [%s]" % (
- max_pages, code.name, code.origin)
- break
- if page_id in self.pages:
- page = self.pages[page_id]
- else:
- page = CodePage(CodePage.PageAddressFromId(page_id))
- self.pages[page_id] = page
- page.Add(code)
- page_id += 1
- pages += 1
- self.min_address = min(self.min_address, code.start_address)
- self.max_address = max(self.max_address, code.end_address)
-
- def Remove(self, code):
- page_id = CodePage.PageId(code.start_address)
- limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
- removed = False
- while page_id < limit_id:
- if page_id not in self.pages:
- page_id += 1
- continue
- page = self.pages[page_id]
- page.Remove(code)
- removed = True
- page_id += 1
- return removed
-
- def AllCode(self):
- for page in self.pages.itervalues():
- for code in page:
- if CodePage.PageAddress(code.start_address) == page.address:
- yield code
-
- def UsedCode(self):
- for code in self.AllCode():
- if code.IsUsed():
- yield code
-
- def Print(self):
- for code in self.AllCode():
- print code
-
- def Find(self, pc):
- if pc < self.min_address or pc >= self.max_address:
- return None
- page_id = CodePage.PageId(pc)
- if page_id not in self.pages:
- return None
- return self.pages[page_id].Find(pc)
-
-
-class CodeInfo(object):
- """Generic info about generated code objects."""
-
- def __init__(self, arch, header_size):
- self.arch = arch
- self.header_size = header_size
-
-
-class SnapshotLogReader(object):
- """V8 snapshot log reader."""
-
- _SNAPSHOT_CODE_NAME_RE = re.compile(
- r"snapshot-code-name,(\d+),\"(.*)\"")
-
- def __init__(self, log_name):
- self.log_name = log_name
-
- def ReadNameMap(self):
- log = open(self.log_name, "r")
- try:
- snapshot_pos_to_name = {}
- for line in log:
- match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
- if match:
- pos = int(match.group(1))
- name = match.group(2)
- snapshot_pos_to_name[pos] = name
- finally:
- log.close()
- return snapshot_pos_to_name
-
-
-class LogReader(object):
- """V8 low-level (binary) log reader."""
-
- _ARCH_TO_POINTER_TYPE_MAP = {
- "ia32": ctypes.c_uint32,
- "arm": ctypes.c_uint32,
- "mips": ctypes.c_uint32,
- "x64": ctypes.c_uint64
- }
-
- _CODE_CREATE_TAG = "C"
- _CODE_MOVE_TAG = "M"
- _CODE_DELETE_TAG = "D"
- _SNAPSHOT_POSITION_TAG = "P"
- _CODE_MOVING_GC_TAG = "G"
-
- def __init__(self, log_name, code_map, snapshot_pos_to_name):
- self.log_file = open(log_name, "r")
- self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
- self.log_pos = 0
- self.code_map = code_map
- self.snapshot_pos_to_name = snapshot_pos_to_name
- self.address_to_snapshot_name = {}
-
- self.arch = self.log[:self.log.find("\0")]
- self.log_pos += len(self.arch) + 1
- assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
- "Unsupported architecture %s" % self.arch
- pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
-
- self.code_create_struct = LogReader._DefineStruct([
- ("name_size", ctypes.c_int32),
- ("code_address", pointer_type),
- ("code_size", ctypes.c_int32)])
-
- self.code_move_struct = LogReader._DefineStruct([
- ("from_address", pointer_type),
- ("to_address", pointer_type)])
-
- self.code_delete_struct = LogReader._DefineStruct([
- ("address", pointer_type)])
-
- self.snapshot_position_struct = LogReader._DefineStruct([
- ("address", pointer_type),
- ("position", ctypes.c_int32)])
-
- def ReadUpToGC(self):
- while self.log_pos < self.log.size():
- tag = self.log[self.log_pos]
- self.log_pos += 1
-
- if tag == LogReader._CODE_MOVING_GC_TAG:
- self.address_to_snapshot_name.clear()
- return
-
- if tag == LogReader._CODE_CREATE_TAG:
- event = self.code_create_struct.from_buffer(self.log, self.log_pos)
- self.log_pos += ctypes.sizeof(event)
- start_address = event.code_address
- end_address = start_address + event.code_size
- if start_address in self.address_to_snapshot_name:
- name = self.address_to_snapshot_name[start_address]
- origin = JS_SNAPSHOT_ORIGIN
- else:
- name = self.log[self.log_pos:self.log_pos + event.name_size]
- origin = JS_ORIGIN
- self.log_pos += event.name_size
- origin_offset = self.log_pos
- self.log_pos += event.code_size
- code = Code(name, start_address, end_address, origin, origin_offset)
- conficting_code = self.code_map.Find(start_address)
- if conficting_code:
- if not (conficting_code.start_address == code.start_address and
- conficting_code.end_address == code.end_address):
- self.code_map.Remove(conficting_code)
- else:
- LogReader._HandleCodeConflict(conficting_code, code)
- # TODO(vitalyr): this warning is too noisy because of our
- # attempts to reconstruct code log from the snapshot.
- # print >>sys.stderr, \
- # "Warning: Skipping duplicate code log entry %s" % code
- continue
- self.code_map.Add(code)
- continue
-
- if tag == LogReader._CODE_MOVE_TAG:
- event = self.code_move_struct.from_buffer(self.log, self.log_pos)
- self.log_pos += ctypes.sizeof(event)
- old_start_address = event.from_address
- new_start_address = event.to_address
- if old_start_address == new_start_address:
- # Skip useless code move entries.
- continue
- code = self.code_map.Find(old_start_address)
- if not code:
- print >>sys.stderr, "Warning: Not found %x" % old_start_address
- continue
- assert code.start_address == old_start_address, \
- "Inexact move address %x for %s" % (old_start_address, code)
- self.code_map.Remove(code)
- size = code.end_address - code.start_address
- code.start_address = new_start_address
- code.end_address = new_start_address + size
- self.code_map.Add(code)
- continue
-
- if tag == LogReader._CODE_DELETE_TAG:
- event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
- self.log_pos += ctypes.sizeof(event)
- old_start_address = event.address
- code = self.code_map.Find(old_start_address)
- if not code:
- print >>sys.stderr, "Warning: Not found %x" % old_start_address
- continue
- assert code.start_address == old_start_address, \
- "Inexact delete address %x for %s" % (old_start_address, code)
- self.code_map.Remove(code)
- continue
-
- if tag == LogReader._SNAPSHOT_POSITION_TAG:
- event = self.snapshot_position_struct.from_buffer(self.log,
- self.log_pos)
- self.log_pos += ctypes.sizeof(event)
- start_address = event.address
- snapshot_pos = event.position
- if snapshot_pos in self.snapshot_pos_to_name:
- self.address_to_snapshot_name[start_address] = \
- self.snapshot_pos_to_name[snapshot_pos]
- continue
-
- assert False, "Unknown tag %s" % tag
-
- def Dispose(self):
- self.log.close()
- self.log_file.close()
-
- @staticmethod
- def _DefineStruct(fields):
- class Struct(ctypes.Structure):
- _fields_ = fields
- return Struct
-
- @staticmethod
- def _HandleCodeConflict(old_code, new_code):
- assert (old_code.start_address == new_code.start_address and
- old_code.end_address == new_code.end_address), \
- "Conficting code log entries %s and %s" % (old_code, new_code)
- if old_code.name == new_code.name:
- return
- # Code object may be shared by a few functions. Collect the full
- # set of names.
- old_code.AddName(new_code.name)
-
-
-class Descriptor(object):
- """Descriptor of a structure in the binary trace log."""
-
- CTYPE_MAP = {
- "u16": ctypes.c_uint16,
- "u32": ctypes.c_uint32,
- "u64": ctypes.c_uint64
- }
-
- def __init__(self, fields):
- class TraceItem(ctypes.Structure):
- _fields_ = Descriptor.CtypesFields(fields)
-
- def __str__(self):
- return ", ".join("%s: %s" % (field, self.__getattribute__(field))
- for field, _ in TraceItem._fields_)
-
- self.ctype = TraceItem
-
- def Read(self, trace, offset):
- return self.ctype.from_buffer(trace, offset)
-
- @staticmethod
- def CtypesFields(fields):
- return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
-
-
-# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
-# for the gory details.
-
-
-# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
-TRACE_HEADER_DESC = Descriptor([
- ("magic", "u64"),
- ("size", "u64"),
- ("attr_size", "u64"),
- ("attrs_offset", "u64"),
- ("attrs_size", "u64"),
- ("data_offset", "u64"),
- ("data_size", "u64"),
- ("event_types_offset", "u64"),
- ("event_types_size", "u64")
-])
-
-
-# Reference: /usr/include/linux/perf_event.h
-PERF_EVENT_ATTR_DESC = Descriptor([
- ("type", "u32"),
- ("size", "u32"),
- ("config", "u64"),
- ("sample_period_or_freq", "u64"),
- ("sample_type", "u64"),
- ("read_format", "u64"),
- ("flags", "u64"),
- ("wakeup_events_or_watermark", "u32"),
- ("bp_type", "u32"),
- ("bp_addr", "u64"),
- ("bp_len", "u64")
-])
-
-
-# Reference: /usr/include/linux/perf_event.h
-PERF_EVENT_HEADER_DESC = Descriptor([
- ("type", "u32"),
- ("misc", "u16"),
- ("size", "u16")
-])
-
-
-# Reference: kernel/events/core.c
-PERF_MMAP_EVENT_BODY_DESC = Descriptor([
- ("pid", "u32"),
- ("tid", "u32"),
- ("addr", "u64"),
- ("len", "u64"),
- ("pgoff", "u64")
-])
-
-
-# perf_event_attr.sample_type bits control the set of
-# perf_sample_event fields.
-PERF_SAMPLE_IP = 1 << 0
-PERF_SAMPLE_TID = 1 << 1
-PERF_SAMPLE_TIME = 1 << 2
-PERF_SAMPLE_ADDR = 1 << 3
-PERF_SAMPLE_READ = 1 << 4
-PERF_SAMPLE_CALLCHAIN = 1 << 5
-PERF_SAMPLE_ID = 1 << 6
-PERF_SAMPLE_CPU = 1 << 7
-PERF_SAMPLE_PERIOD = 1 << 8
-PERF_SAMPLE_STREAM_ID = 1 << 9
-PERF_SAMPLE_RAW = 1 << 10
-
-
-# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
-PERF_SAMPLE_EVENT_BODY_FIELDS = [
- ("ip", "u64", PERF_SAMPLE_IP),
- ("pid", "u32", PERF_SAMPLE_TID),
- ("tid", "u32", PERF_SAMPLE_TID),
- ("time", "u64", PERF_SAMPLE_TIME),
- ("addr", "u64", PERF_SAMPLE_ADDR),
- ("id", "u64", PERF_SAMPLE_ID),
- ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
- ("cpu", "u32", PERF_SAMPLE_CPU),
- ("res", "u32", PERF_SAMPLE_CPU),
- ("period", "u64", PERF_SAMPLE_PERIOD),
- # Don't want to handle read format that comes after the period and
- # before the callchain and has variable size.
- ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
- # Raw data follows the callchain and is ignored.
-]
-
-
-PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
-
-
-PERF_RECORD_MMAP = 1
-PERF_RECORD_SAMPLE = 9
-
-
-class TraceReader(object):
- """Perf (linux-2.6/tools/perf) trace file reader."""
-
- _TRACE_HEADER_MAGIC = 4993446653023372624
-
- def __init__(self, trace_name):
- self.trace_file = open(trace_name, "r")
- self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
- self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
- if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
- print >>sys.stderr, "Warning: unsupported trace header magic"
- self.offset = self.trace_header.data_offset
- self.limit = self.trace_header.data_offset + self.trace_header.data_size
- assert self.limit <= self.trace.size(), \
- "Trace data limit exceeds trace file size"
- self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
- assert self.trace_header.attrs_size != 0, \
- "No perf event attributes found in the trace"
- perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
- self.trace_header.attrs_offset)
- self.sample_event_body_desc = self._SampleEventBodyDesc(
- perf_event_attr.sample_type)
- self.callchain_supported = \
- (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
- if self.callchain_supported:
- self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
- self.ip_size = ctypes.sizeof(self.ip_struct)
-
- def ReadEventHeader(self):
- if self.offset >= self.limit:
- return None, 0
- offset = self.offset
- header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
- self.offset += header.size
- return header, offset
-
- def ReadMmap(self, header, offset):
- mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
- offset + self.header_size)
- # Read null-terminated filename.
- filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
- offset + header.size]
- mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
- return mmap_info
-
- def ReadSample(self, header, offset):
- sample = self.sample_event_body_desc.Read(self.trace,
- offset + self.header_size)
- if not self.callchain_supported:
- return sample
- sample.ips = []
- offset += self.header_size + ctypes.sizeof(sample)
- for _ in xrange(sample.nr):
- sample.ips.append(
- self.ip_struct.from_buffer(self.trace, offset).value)
- offset += self.ip_size
- return sample
-
- def Dispose(self):
- self.trace.close()
- self.trace_file.close()
-
- def _SampleEventBodyDesc(self, sample_type):
- assert (sample_type & PERF_SAMPLE_READ) == 0, \
- "Can't hande read format in samples"
- fields = [(field, format)
- for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
- if (bit & sample_type) != 0]
- return Descriptor(fields)
-
-
-OBJDUMP_SECTION_HEADER_RE = re.compile(
- r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
-OBJDUMP_SYMBOL_LINE_RE = re.compile(
- r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
-OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
- r"^DYNAMIC SYMBOL TABLE")
-OBJDUMP_SKIP_RE = re.compile(
- r"^.*ld\.so\.cache$")
-KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
-PERF_KERNEL_ALLSYMS_RE = re.compile(
- r".*kallsyms.*")
-KERNEL_ALLSYMS_LINE_RE = re.compile(
- r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
-
-
-class LibraryRepo(object):
- def __init__(self):
- self.infos = []
- self.names = set()
- self.ticks = {}
-
- def Load(self, mmap_info, code_map, options):
- # Skip kernel mmaps when requested using the fact that their tid
- # is 0.
- if mmap_info.tid == 0 and not options.kernel:
- return True
- if OBJDUMP_SKIP_RE.match(mmap_info.filename):
- return True
- if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
- return self._LoadKernelSymbols(code_map)
- self.infos.append(mmap_info)
- mmap_info.ticks = 0
- mmap_info.unique_name = self._UniqueMmapName(mmap_info)
- if not os.path.exists(mmap_info.filename):
- return True
- # Request section headers (-h), symbols (-t), and dynamic symbols
- # (-T) from objdump.
- # Unfortunately, section headers span two lines, so we have to
- # keep the just seen section name (from the first line in each
- # section header) in the after_section variable.
- if mmap_info.filename.endswith(".ko"):
- dynamic_symbols = ""
- else:
- dynamic_symbols = "-T"
- process = subprocess.Popen(
- "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
- shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
- pipe = process.stdout
- after_section = None
- code_sections = set()
- reloc_sections = set()
- dynamic = False
- try:
- for line in pipe:
- if after_section:
- if line.find("CODE") != -1:
- code_sections.add(after_section)
- if line.find("RELOC") != -1:
- reloc_sections.add(after_section)
- after_section = None
- continue
-
- match = OBJDUMP_SECTION_HEADER_RE.match(line)
- if match:
- after_section = match.group(1)
- continue
-
- if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
- dynamic = True
- continue
-
- match = OBJDUMP_SYMBOL_LINE_RE.match(line)
- if match:
- start_address = int(match.group(1), 16)
- origin_offset = start_address
- flags = match.group(2)
- section = match.group(3)
- if section in code_sections:
- if dynamic or section in reloc_sections:
- start_address += mmap_info.addr
- size = int(match.group(4), 16)
- name = match.group(5)
- origin = mmap_info.filename
- code_map.Add(Code(name, start_address, start_address + size,
- origin, origin_offset))
- finally:
- pipe.close()
- assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
-
- def Tick(self, pc):
- for i, mmap_info in enumerate(self.infos):
- if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
- mmap_info.ticks += 1
- self.infos[0], self.infos[i] = mmap_info, self.infos[0]
- return True
- return False
-
- def _UniqueMmapName(self, mmap_info):
- name = mmap_info.filename
- index = 1
- while name in self.names:
- name = "%s-%d" % (mmap_info.filename, index)
- index += 1
- self.names.add(name)
- return name
-
- def _LoadKernelSymbols(self, code_map):
- if not os.path.exists(KERNEL_ALLSYMS_FILE):
- print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
- return False
- kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
- code = None
- for line in kallsyms:
- match = KERNEL_ALLSYMS_LINE_RE.match(line)
- if match:
- start_address = int(match.group(1), 16)
- end_address = start_address
- name = match.group(2)
- if code:
- code.end_address = start_address
- code_map.Add(code, 16)
- code = Code(name, start_address, end_address, "kernel", 0)
- return True
-
-
-def PrintReport(code_map, library_repo, arch, ticks, options):
- print "Ticks per symbol:"
- used_code = [code for code in code_map.UsedCode()]
- used_code.sort(key=lambda x: x.self_ticks, reverse=True)
- for i, code in enumerate(used_code):
- code_ticks = code.self_ticks
- print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
- code.FullName(), code.origin)
- if options.disasm_all or i < options.disasm_top:
- code.PrintAnnotated(arch, options)
- print
- print "Ticks per library:"
- mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
- mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
- for mmap_info in mmap_infos:
- mmap_ticks = mmap_info.ticks
- print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
- mmap_info.unique_name)
-
-
-def PrintDot(code_map, options):
- print "digraph G {"
- for code in code_map.UsedCode():
- if code.self_ticks < 10:
- continue
- print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
- if code.callee_ticks:
- for callee, ticks in code.callee_ticks.iteritems():
- print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
- print "}"
-
-
-if __name__ == "__main__":
- parser = optparse.OptionParser(USAGE)
- parser.add_option("--snapshot-log",
- default="obj/release/snapshot.log",
- help="V8 snapshot log file name [default: %default]")
- parser.add_option("--log",
- default="v8.log",
- help="V8 log file name [default: %default]")
- parser.add_option("--snapshot",
- default=False,
- action="store_true",
- help="process V8 snapshot log [default: %default]")
- parser.add_option("--trace",
- default="perf.data",
- help="perf trace file name [default: %default]")
- parser.add_option("--kernel",
- default=False,
- action="store_true",
- help="process kernel entries [default: %default]")
- parser.add_option("--disasm-top",
- default=0,
- type="int",
- help=("number of top symbols to disassemble and annotate "
- "[default: %default]"))
- parser.add_option("--disasm-all",
- default=False,
- action="store_true",
- help=("disassemble and annotate all used symbols "
- "[default: %default]"))
- parser.add_option("--dot",
- default=False,
- action="store_true",
- help="produce dot output (WIP) [default: %default]")
- parser.add_option("--quiet", "-q",
- default=False,
- action="store_true",
- help="no auxiliary messages [default: %default]")
- parser.add_option("--gc-fake-mmap",
- default="/tmp/__v8_gc__",
- help="gc fake mmap file [default: %default]")
- parser.add_option("--objdump",
- default="/usr/bin/objdump",
- help="objdump tool to use [default: %default]")
- parser.add_option("--host-root",
- default="",
- help="Path to the host root [default: %default]")
- options, args = parser.parse_args()
-
- if not options.quiet:
- if options.snapshot:
- print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
- options.log,
- options.log)
- else:
- print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
- print "Perf trace file: %s" % options.trace
-
- V8_GC_FAKE_MMAP = options.gc_fake_mmap
- HOST_ROOT = options.host_root
- if os.path.exists(options.objdump):
- disasm.OBJDUMP_BIN = options.objdump
- OBJDUMP_BIN = options.objdump
- else:
- print "Cannot find %s, falling back to default objdump" % options.objdump
-
- # Stats.
- events = 0
- ticks = 0
- missed_ticks = 0
- really_missed_ticks = 0
- optimized_ticks = 0
- generated_ticks = 0
- v8_internal_ticks = 0
- mmap_time = 0
- sample_time = 0
-
- # Process the snapshot log to fill the snapshot name map.
- snapshot_name_map = {}
- if options.snapshot:
- snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
- snapshot_name_map = snapshot_log_reader.ReadNameMap()
-
- # Initialize the log reader.
- code_map = CodeMap()
- log_reader = LogReader(log_name=options.log + ".ll",
- code_map=code_map,
- snapshot_pos_to_name=snapshot_name_map)
- if not options.quiet:
- print "Generated code architecture: %s" % log_reader.arch
- print
- sys.stdout.flush()
-
- # Process the code and trace logs.
- library_repo = LibraryRepo()
- log_reader.ReadUpToGC()
- trace_reader = TraceReader(options.trace)
- while True:
- header, offset = trace_reader.ReadEventHeader()
- if not header:
- break
- events += 1
- if header.type == PERF_RECORD_MMAP:
- start = time.time()
- mmap_info = trace_reader.ReadMmap(header, offset)
- if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
- log_reader.ReadUpToGC()
- else:
- library_repo.Load(mmap_info, code_map, options)
- mmap_time += time.time() - start
- elif header.type == PERF_RECORD_SAMPLE:
- ticks += 1
- start = time.time()
- sample = trace_reader.ReadSample(header, offset)
- code = code_map.Find(sample.ip)
- if code:
- code.Tick(sample.ip)
- if code.codetype == Code.OPTIMIZED:
- optimized_ticks += 1
- elif code.codetype == Code.FULL_CODEGEN:
- generated_ticks += 1
- elif code.codetype == Code.V8INTERNAL:
- v8_internal_ticks += 1
- else:
- missed_ticks += 1
- if not library_repo.Tick(sample.ip) and not code:
- really_missed_ticks += 1
- if trace_reader.callchain_supported:
- for ip in sample.ips:
- caller_code = code_map.Find(ip)
- if caller_code:
- if code:
- caller_code.CalleeTick(code)
- code = caller_code
- sample_time += time.time() - start
-
- if options.dot:
- PrintDot(code_map, options)
- else:
- PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
-
- if not options.quiet:
- def PrintTicks(number, total, description):
- print("%10d %5.1f%% ticks in %s" %
- (number, 100.0*number/total, description))
- print
- print "Stats:"
- print "%10d total trace events" % events
- print "%10d total ticks" % ticks
- print "%10d ticks not in symbols" % missed_ticks
- unaccounted = "unaccounted ticks"
- if really_missed_ticks > 0:
- unaccounted += " (probably in the kernel, try --kernel)"
- PrintTicks(really_missed_ticks, ticks, unaccounted)
- PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
- PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
- PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
- print "%10d total symbols" % len([c for c in code_map.AllCode()])
- print "%10d used symbols" % len([c for c in code_map.UsedCode()])
- print "%9.2fs library processing time" % mmap_time
- print "%9.2fs tick processing time" % sample_time
-
- log_reader.Dispose()
- trace_reader.Dispose()