6 files changed, 288 insertions, 24 deletions
diff --git a/chromium/third_party/catapult/tracing/tracing_build/check_common.py b/chromium/third_party/catapult/tracing/tracing_build/check_common.py
index b49513f6d61..41fd301f4a3 100644
--- a/chromium/third_party/catapult/tracing/tracing_build/check_common.py
+++ b/chromium/third_party/catapult/tracing/tracing_build/check_common.py
@@ -84,7 +84,7 @@ def CheckCommon(file_name, listed_files):
         '\n\n'
         '  Note: only files actually used in about:tracing should\n'
         '  be listed in the build files. Try running \n'
-        '       tracing/bin/update_gyp_and_gn\n'
+        '       tracing/bin/update_gypi\n'
         '  to update the files automatically.')
 
   return error
diff --git a/chromium/third_party/catapult/tracing/tracing_build/html2trace.py b/chromium/third_party/catapult/tracing/tracing_build/html2trace.py
index 1acab58d5d6..85a451888af 100644
--- a/chromium/third_party/catapult/tracing/tracing_build/html2trace.py
+++ b/chromium/third_party/catapult/tracing/tracing_build/html2trace.py
@@ -3,7 +3,6 @@
 # found in the LICENSE file.
 
 import base64
-import codecs
 import gzip
 import json
 import re
@@ -20,13 +19,23 @@ TRACE_DATA_START_LINE_RE = re.compile(
 TRACE_DATA_END_LINE_RE = re.compile(r'^<\/\s*script>$')
 
 
-def CopyTraceDataFromHTMLFilePath(html_path, trace_path, gzipped_output=False):
+def IsHTMLTrace(trace_file_handle):
+  trace_file_handle.seek(0)
+  for line in trace_file_handle:
+    line = line.strip()
+    if not line:
+      continue
+    return line == '<!DOCTYPE html>'
+
+
+def CopyTraceDataFromHTMLFilePath(html_file_handle, trace_path,
+                                  gzipped_output=False):
   """Copies trace data from an existing HTML file into new trace file(s).
 
-  If |html_path| doesn't contain any trace data blocks, this function throws an
-  exception. If |html_path| contains more than one trace data block, the first
-  block will be extracted into |trace_path| and the rest will be extracted
-  into separate files |trace_path|.1, |trace_path|.2, etc.
+  If |html_file_handle| doesn't contain any trace data blocks, this function
+  throws an exception. If |html_file_handle| contains more than one trace data
+  block, the first block will be extracted into |trace_path| and the rest will
+  be extracted into separate files |trace_path|.1, |trace_path|.2, etc.
 
   The contents of each trace data block is decoded and, if |gzipped_output| is
   false, inflated before it's stored in a trace file.
@@ -34,7 +43,7 @@ def CopyTraceDataFromHTMLFilePath(html_path, trace_path, gzipped_output=False):
   This function returns a list of paths of the saved trace files ([|trace_path|,
   |trace_path|.1, |trace_path|.2, ...]).
   """
-  trace_data_list = _ExtractTraceDataFromHTMLFile(html_path,
+  trace_data_list = _ExtractTraceDataFromHTMLFile(html_file_handle,
                                                   unzip_data=not gzipped_output)
   saved_paths = []
   for i, trace_data in enumerate(trace_data_list):
@@ -45,14 +54,15 @@ def CopyTraceDataFromHTMLFilePath(html_path, trace_path, gzipped_output=False):
   return saved_paths
 
 
-def ReadTracesFromHTMLFilePath(html_path):
+def ReadTracesFromHTMLFilePath(html_file_handle):
   """Returns a list of inflated JSON traces extracted from an HTML file."""
-  return map(json.load, _ExtractTraceDataFromHTMLFile(html_path))
+  return map(json.load, _ExtractTraceDataFromHTMLFile(html_file_handle))
 
 
-def _ExtractTraceDataFromHTMLFile(html_path, unzip_data=True):
-  with codecs.open(html_path, mode='r', encoding='utf-8') as html_file:
-    lines = html_file.readlines()
+def _ExtractTraceDataFromHTMLFile(html_file_handle, unzip_data=True):
+  assert IsHTMLTrace(html_file_handle)
+  html_file_handle.seek(0)
+  lines = html_file_handle.readlines()
 
   start_indices = [i for i in xrange(len(lines))
                    if TRACE_DATA_START_LINE_RE.match(lines[i])]
diff --git a/chromium/third_party/catapult/tracing/tracing_build/merge_traces.py b/chromium/third_party/catapult/tracing/tracing_build/merge_traces.py
index b107253dcf8..5280f620b1c 100644
--- a/chromium/third_party/catapult/tracing/tracing_build/merge_traces.py
+++ b/chromium/third_party/catapult/tracing/tracing_build/merge_traces.py
@@ -27,6 +27,7 @@ METADATA_PHASE = 'M'
 MEMORY_DUMP_PHASE = 'v'
 BEGIN_PHASE = 'B'
 END_PHASE = 'E'
+CLOCK_SYNC_EVENT_PHASE = 'c'
 
 
 # Minimum gap between two consecutive merged traces in microseconds.
@@ -295,11 +296,7 @@ def LoadTrace(filename):
   """Load a trace from a (possibly gzipped) file and return its parsed JSON."""
   logging.info('Loading trace %r...', filename)
   if filename.endswith(HTML_FILENAME_SUFFIX):
-    traces = html2trace.ReadTracesFromHTMLFilePath(filename)
-    if len(traces) > 1:
-      logging.warning('HTML trace contains multiple trace data blocks. Only '
-                      'the first block will be merged.')
-    return traces[0]
+    return LoadHTMLTrace(filename)
   elif filename.endswith(GZIP_FILENAME_SUFFIX):
     with gzip.open(filename, 'rb') as f:
       return json.load(f)
@@ -308,6 +305,30 @@ def LoadTrace(filename):
       return json.load(f)
 
 
+def LoadHTMLTrace(filename):
+  """Load a trace from a vulcanized HTML trace file."""
+  trace_components = collections.defaultdict(list)
+
+  for sub_trace in html2trace.ReadTracesFromHTMLFilePath(filename):
+    for name, component in TraceAsDict(sub_trace).iteritems():
+      trace_components[name].append(component)
+
+  trace = {}
+  for name, components in trace_components.iteritems():
+    if len(components) == 1:
+      trace[name] = components[0]
+    elif all(isinstance(component, list) for component in components):
+      trace[name] = [e for component in components for e in component]
+    else:
+      trace[name] = components[0]
+      logging.warning(
+          'Values of repeated trace component %r in HTML trace %r are not '
+          'lists. The first defined value of the component will be used.',
+          filename, name)
+
+  return trace
+
+
 def SaveTrace(trace, filename):
   """Save a JSON trace to a (possibly gzipped) file."""
   if filename is None:
@@ -326,6 +347,13 @@ def SaveTrace(trace, filename):
         json.dump(trace, f)
 
 
+def TraceAsDict(trace):
+  """Ensure that a trace is a dictionary."""
+  if isinstance(trace, list):
+    return {'traceEvents': trace}
+  return trace
+
+
 def MergeTraceFiles(input_trace_filenames, output_trace_filename):
   """Merge a collection of input trace files into an output trace file."""
   logging.info('Loading %d input traces...', len(input_trace_filenames))
@@ -347,9 +375,7 @@ def MergeTraces(traces):
   trace_components = collections.defaultdict(collections.OrderedDict)
 
   for filename, trace in traces.iteritems():
-    if isinstance(trace, list):
-      trace = {'traceEvents': trace}
-    for name, component in trace.iteritems():
+    for name, component in TraceAsDict(trace).iteritems():
       trace_components[name][filename] = component
 
   merged_trace = {}
@@ -372,14 +398,32 @@ def MergeComponents(component_name, components_by_filename):
 
 def MergeTraceEvents(events_by_filename):
   """Merge trace events from multiple traces into a single list of events."""
+  # Remove strings from the list of trace events
+  # (https://github.com/catapult-project/catapult/issues/2497).
+  events_by_filename = collections.OrderedDict(
+      (filename, [e for e in events if not isinstance(e, basestring)])
+      for filename, events in events_by_filename.iteritems())
+
   timestamp_range_by_filename = _AdjustTimestampRanges(events_by_filename)
   process_map = _CreateProcessMapFromTraceEvents(events_by_filename)
   merged_events = _CombineTraceEvents(events_by_filename, process_map)
+  _RemoveSurplusClockSyncEvents(merged_events)
   merged_events.extend(
       _BuildInjectedTraceMarkerEvents(timestamp_range_by_filename, process_map))
   return merged_events
 
 
+def _RemoveSurplusClockSyncEvents(events):
+  """Remove all clock sync events except for the first one."""
+  # TODO(petrcermak): Figure out how to handle merging multiple clock sync
+  # events.
+  clock_sync_event_indices = [i for i, e in enumerate(events)
+                              if e['ph'] == CLOCK_SYNC_EVENT_PHASE]
+  # The indices need to be traversed from largest to smallest (hence the -1).
+  for i in clock_sync_event_indices[:0:-1]:
+    del events[i]
+
+
 def _AdjustTimestampRanges(events_by_filename):
   logging.info('Adjusting timestamp ranges of traces...')
 
diff --git a/chromium/third_party/catapult/tracing/tracing_build/slim_trace.py b/chromium/third_party/catapult/tracing/tracing_build/slim_trace.py
new file mode 100644
index 00000000000..ac47c79d9b5
--- /dev/null
+++ b/chromium/third_party/catapult/tracing/tracing_build/slim_trace.py
@@ -0,0 +1,112 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import argparse
+import codecs
+import json
+import os
+import logging
+
+
+from tracing_build import html2trace
+from tracing_build import trace2html
+
+
+def GetFileSizeInMb(path):
+  return os.path.getsize(path) >> 20
+
+
+def Main(argv):
+  parser = argparse.ArgumentParser(
+      description='Slim a trace to a more managable size')
+  parser.add_argument('trace_path', metavar='TRACE_PATH', type=str,
+                      help='trace file path (input).')
+  options = parser.parse_args(argv[1:])
+
+  trace_path = os.path.abspath(options.trace_path)
+
+  orignal_trace_name = os.path.splitext(os.path.basename(trace_path))[0]
+  slimmed_trace_path = os.path.join(
+      os.path.dirname(trace_path), 'slimmed_%s.html' % orignal_trace_name)
+
+  with codecs.open(trace_path, mode='r', encoding='utf-8') as f:
+    SlimTrace(f, slimmed_trace_path)
+
+  print 'Original trace %s (%s Mb)' % (
+      trace_path, GetFileSizeInMb(trace_path))
+  print 'Slimmed trace file://%s (%s Mb)' % (
+      slimmed_trace_path, GetFileSizeInMb(slimmed_trace_path))
+
+def SlimTraceEventsList(events_list):
+  filtered_events = []
+  # Filter out all events of phase complete that takes less than 20
+  # microseconds.
+  for e in events_list:
+    dur = e.get('dur', 0)
+    if e['ph'] != 'X' or dur >= 20:
+      filtered_events.append(e)
+  return filtered_events
+
+
+def SlimSingleTrace(trace_data):
+  if isinstance(trace_data, dict):
+    trace_data['traceEvents'] = SlimTraceEventsList(trace_data['traceEvents'])
+  elif isinstance(trace_data, list) and isinstance(trace_data[0], dict):
+    trace_data = SlimTraceEventsList(trace_data)
+  else:
+    logging.warning('Cannot slim trace %s...', trace_data[:10])
+  return trace_data
+
+
+class TraceExtractor(object):
+  def CanExtractFile(self, trace_file_handle):
+    raise NotImplementedError
+
+  def ExtractTracesFromFile(self, trace_file_handle):
+    raise NotImplementedError
+
+
+class HTMLTraceExtractor(TraceExtractor):
+  def CanExtractFile(self, trace_file_handle):
+    return html2trace.IsHTMLTrace(trace_file_handle)
+
+  def ExtractTracesFromFile(self, trace_file_handle):
+    return html2trace.ReadTracesFromHTMLFilePath(trace_file_handle)
+
+
+class JsonTraceExtractor(TraceExtractor):
+  def CanExtractFile(self, trace_file_handle):
+    trace_file_handle.seek(0)
+    begin_char = trace_file_handle.read(1)
+    trace_file_handle.seek(-1, 2)
+    end_char = trace_file_handle.read(1)
+    return ((begin_char == '{' and end_char == '}') or
+            (begin_char == '[' and end_char == ']'))
+
+  def ExtractTracesFromFile(self, trace_file_handle):
+    trace_file_handle.seek(0)
+    return [json.load(trace_file_handle)]
+
+
+ALL_TRACE_EXTRACTORS = [
+    HTMLTraceExtractor(),
+    JsonTraceExtractor()
+]
+
+
+def SlimTrace(trace_file_handle, slimmed_trace_path):
+  traces = None
+  for extractor in ALL_TRACE_EXTRACTORS:
+    if extractor.CanExtractFile(trace_file_handle):
+      traces = extractor.ExtractTracesFromFile(trace_file_handle)
+      break
+
+  if traces == None:
+    raise Exception('Cannot extrac trace from %s' % trace_file_handle.name)
+
+  slimmed_traces = map(SlimSingleTrace, traces)
+
+  with codecs.open(slimmed_trace_path, mode='w', encoding='utf-8') as f:
+    trace2html.WriteHTMLForTraceDataToFile(
+        slimmed_traces, title='', output_file=f)
diff --git a/chromium/third_party/catapult/tracing/tracing_build/strip_memory_infra_trace.py b/chromium/third_party/catapult/tracing/tracing_build/strip_memory_infra_trace.py
new file mode 100755
index 00000000000..5db429f8c6b
--- /dev/null
+++ b/chromium/third_party/catapult/tracing/tracing_build/strip_memory_infra_trace.py
@@ -0,0 +1,100 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Filters a big trace keeping only the last memory-infra dumps."""
+
+import collections
+import gzip
+import json
+
+
+def FormatBytes(value):
+  units = ['B', 'KiB', 'MiB', 'GiB']
+  while abs(value) >= 1024 and len(units) > 1:
+    value /= 1024
+    units = units.pop(0)
+  return '%3.1f %s' % (value, units[0])
+
+
+def Main(argv):
+  if len(argv) < 2:
+    print 'Usage: %s trace.json[.gz]' % argv[0]
+    return 1
+
+  in_path = argv[1]
+  if in_path.lower().endswith('.gz'):
+    fin = gzip.open(in_path, 'rb')
+  else:
+    fin = open(in_path, 'r')
+  with fin:
+    print 'Loading trace (can take 1 min on a z620 for a 1GB trace)...'
+    trace = json.load(fin)
+    print 'Done. Read ' + FormatBytes(fin.tell())
+
+  print 'Filtering events'
+  phase_count = collections.defaultdict(int)
+  out_events = []
+  global_dumps = collections.OrderedDict()
+  if isinstance(trace, dict):
+    in_events = trace.get('traceEvents', [])
+  elif isinstance(trace, list) and isinstance(trace[0], dict):
+    in_events = trace
+
+  for evt in in_events:
+    phase = evt.get('ph', '?')
+    phase_count[phase] += 1
+
+    # Drop all diagnostic events for memory-infra debugging.
+    if phase not in ('v', 'V') and evt.get('cat', '').endswith('memory-infra'):
+      continue
+
+    # pass-through all the other non-memory-infra events
+    if phase != 'v':
+      out_events.append(evt)
+      continue
+
+    # Recreate the global dump groups
+    event_id = evt['id']
+    global_dumps.setdefault(event_id, [])
+    global_dumps[event_id].append(evt)
+
+
+  print 'Detected %d memory-infra global dumps' % len(global_dumps)
+  if global_dumps:
+    max_procs = max(len(x) for x in global_dumps.itervalues())
+    print 'Max number of processes seen: %d' % max_procs
+
+  ndumps = 2
+  print 'Preserving the last %d memory-infra dumps' % ndumps
+  detailed_dumps = []
+  non_detailed_dumps = []
+  for global_dump in global_dumps.itervalues():
+    try:
+      level_of_detail = global_dump[0]['args']['dumps']['level_of_detail']
+    except KeyError:
+      level_of_detail = None
+    if level_of_detail == 'detailed':
+      detailed_dumps.append(global_dump)
+    else:
+      non_detailed_dumps.append(global_dump)
+
+  dumps_to_preserve = detailed_dumps[-ndumps:]
+  ndumps -= len(dumps_to_preserve)
+  if ndumps:
+    dumps_to_preserve += non_detailed_dumps[-ndumps:]
+
+  for global_dump in dumps_to_preserve:
+    out_events += global_dump
+
+  print '\nEvents histogram for the original trace (count by phase)'
+  print '--------------------------------------------------------'
+  for phase, count in sorted(phase_count.items(), key=lambda x: x[1]):
+    print '%s %d' % (phase, count)
+
+  out_path = in_path.split('.json')[0] + '-filtered.json'
+  print '\nWriting filtered trace to ' + out_path,
+  with open(out_path, 'w') as fout:
+    json.dump({'traceEvents': out_events}, fout)
+    num_bytes_written = fout.tell()
+  print ' (%s written)' % FormatBytes(num_bytes_written)
diff --git a/chromium/third_party/catapult/tracing/tracing_build/trace2html.py b/chromium/third_party/catapult/tracing/tracing_build/trace2html.py
index 82682d065a8..96006b247cf 100644
--- a/chromium/third_party/catapult/tracing/tracing_build/trace2html.py
+++ b/chromium/third_party/catapult/tracing/tracing_build/trace2html.py
@@ -82,10 +82,8 @@ def WriteHTMLForTraceDataToFile(trace_data_list,
 
   modules = [
       'tracing.trace2html',
-      'tracing.extras.importer.gzip_importer',  # Must have for all configs.
-      project.GetModuleNameForConfigName(config_name)
+      project.GetModuleNameForConfigName(config_name),
   ]
-
   vulcanizer = project.CreateVulcanizer()
   load_sequence = vulcanizer.CalcLoadSequenceForModuleNames(modules)