diff options
Diffstat (limited to 'deps/v8/tools/ll_prof.py')
-rwxr-xr-x | deps/v8/tools/ll_prof.py | 63 |
1 files changed, 7 insertions, 56 deletions
diff --git a/deps/v8/tools/ll_prof.py b/deps/v8/tools/ll_prof.py index 216929d1e2..3afe179d2f 100755 --- a/deps/v8/tools/ll_prof.py +++ b/deps/v8/tools/ll_prof.py @@ -45,7 +45,7 @@ USAGE="""usage: %prog [OPTION]... Analyses V8 and perf logs to produce profiles. Perf logs can be collected using a command like: - $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof + $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof # -R: collect all data # -e cycles: use cpu-cycles event (run "perf list" for details) # -c 10000: write a sample after each 10000 events @@ -54,16 +54,6 @@ Perf logs can be collected using a command like: # --ll-prof shell flag enables the right V8 logs This will produce a binary trace file (perf.data) that %prog can analyse. -IMPORTANT: - The kernel has an internal maximum for events per second, it is 100K by - default. That's not enough for "-c 10000". Set it to some higher value: - $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate - You can also make the warning about kernel address maps go away: - $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict - -We have a convenience script that handles all of the above for you: - $ tools/run-llprof.sh ./d8 bench.js - Examples: # Print flat profile with annotated disassembly for the 10 top # symbols. Use default log names and include the snapshot log. @@ -85,10 +75,6 @@ class Code(object): """Code object.""" _id = 0 - UNKNOWN = 0 - V8INTERNAL = 1 - FULL_CODEGEN = 2 - OPTIMIZED = 3 def __init__(self, name, start_address, end_address, origin, origin_offset): self.id = Code._id @@ -102,14 +88,6 @@ class Code(object): self.self_ticks = 0 self.self_ticks_map = None self.callee_ticks = None - if name.startswith("LazyCompile:*"): - self.codetype = Code.OPTIMIZED - elif name.startswith("LazyCompile:"): - self.codetype = Code.FULL_CODEGEN - elif name.startswith("v8::internal::"): - self.codetype = Code.V8INTERNAL - else: - self.codetype = Code.UNKNOWN def AddName(self, name): assert self.name != name @@ -207,7 +185,7 @@ class Code(object): class CodePage(object): """Group of adjacent code objects.""" - SHIFT = 20 # 1M pages + SHIFT = 12 # 4K pages SIZE = (1 << SHIFT) MASK = ~(SIZE - 1) @@ -529,7 +507,6 @@ class Descriptor(object): # for the gory details. -# Reference: struct perf_file_header in kernel/tools/perf/util/header.h TRACE_HEADER_DESC = Descriptor([ ("magic", "u64"), ("size", "u64"), @@ -543,7 +520,6 @@ TRACE_HEADER_DESC = Descriptor([ ]) -# Reference: /usr/include/linux/perf_event.h PERF_EVENT_ATTR_DESC = Descriptor([ ("type", "u32"), ("size", "u32"), @@ -553,13 +529,12 @@ PERF_EVENT_ATTR_DESC = Descriptor([ ("read_format", "u64"), ("flags", "u64"), ("wakeup_events_or_watermark", "u32"), - ("bp_type", "u32"), + ("bt_type", "u32"), ("bp_addr", "u64"), - ("bp_len", "u64") + ("bp_len", "u64"), ]) -# Reference: /usr/include/linux/perf_event.h PERF_EVENT_HEADER_DESC = Descriptor([ ("type", "u32"), ("misc", "u16"), @@ -567,7 +542,6 @@ PERF_EVENT_HEADER_DESC = Descriptor([ ]) -# Reference: kernel/events/core.c PERF_MMAP_EVENT_BODY_DESC = Descriptor([ ("pid", "u32"), ("tid", "u32"), @@ -592,7 +566,6 @@ PERF_SAMPLE_STREAM_ID = 1 << 9 PERF_SAMPLE_RAW = 1 << 10 -# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE. PERF_SAMPLE_EVENT_BODY_FIELDS = [ ("ip", "u64", PERF_SAMPLE_IP), ("pid", "u32", PERF_SAMPLE_TID), @@ -729,12 +702,8 @@ class LibraryRepo(object): # Unfortunately, section headers span two lines, so we have to # keep the just seen section name (from the first line in each # section header) in the after_section variable. - if mmap_info.filename.endswith(".ko"): - dynamic_symbols = "" - else: - dynamic_symbols = "-T" process = subprocess.Popen( - "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename), + "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) pipe = process.stdout after_section = None @@ -826,7 +795,7 @@ def PrintReport(code_map, library_repo, arch, ticks, options): code.PrintAnnotated(arch, options) print print "Ticks per library:" - mmap_infos = [m for m in library_repo.infos if m.ticks > 0] + mmap_infos = [m for m in library_repo.infos] mmap_infos.sort(key=lambda m: m.ticks, reverse=True) for mmap_info in mmap_infos: mmap_ticks = mmap_info.ticks @@ -916,9 +885,6 @@ if __name__ == "__main__": ticks = 0 missed_ticks = 0 really_missed_ticks = 0 - optimized_ticks = 0 - generated_ticks = 0 - v8_internal_ticks = 0 mmap_time = 0 sample_time = 0 @@ -962,12 +928,6 @@ if __name__ == "__main__": code = code_map.Find(sample.ip) if code: code.Tick(sample.ip) - if code.codetype == Code.OPTIMIZED: - optimized_ticks += 1 - elif code.codetype == Code.FULL_CODEGEN: - generated_ticks += 1 - elif code.codetype == Code.V8INTERNAL: - v8_internal_ticks += 1 else: missed_ticks += 1 if not library_repo.Tick(sample.ip) and not code: @@ -987,21 +947,12 @@ if __name__ == "__main__": PrintReport(code_map, library_repo, log_reader.arch, ticks, options) if not options.quiet: - def PrintTicks(number, total, description): - print("%10d %5.1f%% ticks in %s" % - (number, 100.0*number/total, description)) print print "Stats:" print "%10d total trace events" % events print "%10d total ticks" % ticks print "%10d ticks not in symbols" % missed_ticks - unaccounted = "unaccounted ticks" - if really_missed_ticks > 0: - unaccounted += " (probably in the kernel, try --kernel)" - PrintTicks(really_missed_ticks, ticks, unaccounted) - PrintTicks(optimized_ticks, ticks, "ticks in optimized code") - PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code") - PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*") + print "%10d unaccounted ticks" % really_missed_ticks print "%10d total symbols" % len([c for c in code_map.AllCode()]) print "%10d used symbols" % len([c for c in code_map.UsedCode()]) print "%9.2fs library processing time" % mmap_time |