chromium/tools/clang/blink_gc_plugin/process-graph.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479

#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

from __future__ import print_function
from StringIO import StringIO
import argparse, os, sys, json, subprocess, pickle

parser = argparse.ArgumentParser(
  description =
    "Process the Blink points-to graph generated by the Blink GC plugin.")

parser.add_argument(
  '-', dest='use_stdin', action='store_true',
  help='Read JSON graph files from stdin')

parser.add_argument(
  '-c', '--detect-cycles', action='store_true',
  help='Detect cycles containing GC roots')

parser.add_argument(
  '-s', '--print-stats', action='store_true',
  help='Statistics about ref-counted and traced objects')

parser.add_argument(
  '-v', '--verbose', action='store_true',
  help='Verbose output')

parser.add_argument(
  '--ignore-cycles', default=None, metavar='FILE',
  help='File with cycles to ignore')

parser.add_argument(
  '--ignore-classes', nargs='*', default=[], metavar='CLASS',
  help='Classes to ignore when detecting cycles')

parser.add_argument(
  '--pickle-graph', default=None, metavar='FILE',
  help='File to read/save the graph from/to')

parser.add_argument(
  'files', metavar='FILE_OR_DIR', nargs='*', default=[],
  help='JSON graph files or directories containing them')

# Command line args after parsing.
args = None

# Map from node labels to nodes.
graph = {}

# Set of root nodes.
roots = []

# List of cycles to ignore.
ignored_cycles = []

# Global flag to determine exit code.
global_reported_error = False

try:
  # Python3 remove sys.maxint.
  maxint = sys.maxint
except AttributeError:
  # Also see https://stackoverflow.com/a/13795777/4052492.
  maxint = sys.maxsize


def set_reported_error(value):
  global global_reported_error
  global_reported_error = value

def reported_error():
  return global_reported_error

def log(msg):
  if args.verbose:
    print(msg)


global_inc_copy = 0
def inc_copy():
  global global_inc_copy
  global_inc_copy += 1

def get_node(name):
  return graph.setdefault(name, Node(name))

ptr_types = ('raw', 'ref', 'mem')

def inc_ptr(dst, ptr):
  if ptr in ptr_types:
    node = graph.get(dst)
    if not node: return
    node.counts[ptr] += 1

def add_counts(s1, s2):
  for (k, v) in s2.iteritems():
    s1[k] += s2[k]

# Representation of graph nodes. Basically a map of directed edges.
class Node:
  def __init__(self, name):
    self.name = name
    self.edges = {}
    self.reset()
  def __repr__(self):
    return "%s(%s) %s" % (self.name, self.visited, self.edges)
  def update_node(self, decl):
    # Currently we don't track any node info besides its edges.
    pass
  def update_edge(self, e):
    new_edge = Edge(**e)
    edge = self.edges.get(new_edge.key)
    if edge:
      # If an edge exist, its kind is the strongest of the two.
      edge.kind = max(edge.kind, new_edge.kind)
    else:
      self.edges[new_edge.key] = new_edge
  def super_edges(self):
    return [e for e in self.edges.values() if e.is_super()]

  def subclass_edges(self):
    return [e for e in self.edges.values() if e.is_subclass()]

  def reset(self):
    self.cost = maxint
    self.visited = False
    self.path = None
    self.counts = {}
    for ptr in ptr_types:
      self.counts[ptr] = 0
  def update_counts(self):
    for e in self.edges.values():
      inc_ptr(e.dst, e.ptr)

# Representation of directed graph edges.
class Edge:
  def __init__(self, **decl):
    self.src = decl['src']
    self.dst = decl['dst']
    self.lbl = decl['lbl']
    self.ptr = decl['ptr']
    self.kind = decl['kind'] # 0 = weak, 1 = strong, 2 = root
    self.loc = decl['loc']
    # The label does not uniquely determine an edge from a node. We
    # define the semi-unique key to be the concatenation of the
    # label and dst name. This is sufficient to track the strongest
    # edge to a particular type. For example, if the field A::m_f
    # has type HashMap<WeakMember<B>, Member<B>> we will have a
    # strong edge with key m_f#B from A to B.
    self.key = '%s#%s' % (self.lbl, self.dst)
  def __repr__(self):
    return '%s (%s) => %s' % (self.src, self.lbl, self.dst)
  def is_root(self):
    return self.kind == 2
  def is_weak(self):
    return self.kind == 0
  def keeps_alive(self):
    return self.kind > 0
  def is_subclass(self):
    return self.lbl.startswith('<subclass>')
  def is_super(self):
    return self.lbl.startswith('<super>')

def parse_file(filename):
  obj = json.load(open(filename))
  return obj

def build_graphs_in_dir(dirname):
  # TODO: Use plateform independent code, eg, os.walk
  files = subprocess.check_output(
    ['find', dirname, '-name', '*.graph.json']).split('\n')
  log("Found %d files" % len(files))
  for f in files:
    f.strip()
    if len(f) < 1:
      continue
    build_graph(f)

def build_graph(filename):
  for decl in parse_file(filename):
    if 'name' in decl:
      # Add/update a node entry
      name = decl['name']
      node = get_node(name)
      node.update_node(decl)
    else:
      # Add/update an edge entry
      name = decl['src']
      node = get_node(name)
      node.update_edge(decl)

# Copy all non-weak edges from super classes to their subclasses.
# This causes all fields of a super to be considered fields of a
# derived class without tranitively relating derived classes with
# each other. For example, if B <: A, C <: A, and for some D, D => B,
# we don't want that to entail that D => C.
def copy_super_edges(edge):
  if edge.is_weak() or not edge.is_super():
    return
  inc_copy()
  # Make the super-class edge weak (prohibits processing twice).
  edge.kind = 0
  # If the super class is not in our graph exit early.
  super_node = graph.get(edge.dst)
  if super_node is None: return
  # Recursively copy all super-class edges.
  for e in super_node.super_edges():
    copy_super_edges(e)
  # Copy strong super-class edges (ignoring sub-class edges) to the sub class.
  sub_node = graph[edge.src]
  for e in super_node.edges.values():
    if e.keeps_alive() and not e.is_subclass():
      new_edge = Edge(
        src = sub_node.name,
        dst = e.dst,
        lbl = '%s <: %s' % (super_node.name, e.lbl),
        ptr = e.ptr,
        kind = e.kind,
        loc = e.loc,
      )
      sub_node.edges[new_edge.key] = new_edge
  # Add a strong sub-class edge.
  sub_edge = Edge(
    src = super_node.name,
    dst = sub_node.name,
    lbl = '<subclass>',
    ptr = edge.ptr,
    kind = 1,
    loc = edge.loc,
  )
  super_node.edges[sub_edge.key] = sub_edge

def complete_graph():
  for node in graph.values():
    for edge in node.super_edges():
      copy_super_edges(edge)
    for edge in node.edges.values():
      if edge.is_root():
        roots.append(edge)
  log("Copied edges down <super> edges for %d graph nodes" % global_inc_copy)

def reset_graph():
  for n in graph.values():
    n.reset()

def shortest_path(start, end):
  start.cost = 0
  minlist = [start]
  while len(minlist) > 0:
    minlist.sort(key=lambda n: -n.cost)
    current = minlist.pop()
    current.visited = True
    if current == end or current.cost >= end.cost + 1:
      return
    for e in current.edges.values():
      if not e.keeps_alive():
        continue
      dst = graph.get(e.dst)
      if dst is None or dst.visited:
        continue
      if current.cost < dst.cost:
        dst.cost = current.cost + 1
        dst.path = e
      minlist.append(dst)

def detect_cycles():
  for root_edge in roots:
    reset_graph()
    # Mark ignored classes as already visited
    for ignore in args.ignore_classes:
      name = ignore.find("::") > 0 and ignore or ("blink::" + ignore)
      node = graph.get(name)
      if node:
        node.visited = True
    src = graph[root_edge.src]
    dst = graph.get(root_edge.dst)
    if src.visited:
      continue
    if root_edge.dst == "WTF::String":
      continue
    if dst is None:
      print("\nPersistent root to incomplete destination object:")
      print(root_edge)
      set_reported_error(True)
      continue
    # Find the shortest path from the root target (dst) to its host (src)
    shortest_path(dst, src)
    if src.cost < maxint:
      report_cycle(root_edge)

def is_ignored_cycle(cycle):
  for block in ignored_cycles:
    if block_match(cycle, block):
      return True

def block_match(b1, b2):
  if len(b1) != len(b2):
    return False
  for (l1, l2) in zip(b1, b2):
    if l1 != l2:
      return False
  return True

def report_cycle(root_edge):
  dst = graph[root_edge.dst]
  path = []
  edge = root_edge
  dst.path = None
  while edge:
    path.append(edge)
    edge = graph[edge.src].path
  path.append(root_edge)
  path.reverse()
  # Find the max loc length for pretty printing.
  max_loc = 0
  for p in path:
    if len(p.loc) > max_loc:
      max_loc = len(p.loc)
  out = StringIO()
  for p in path[:-1]:
    print((p.loc + ':').ljust(max_loc + 1), p, file=out)
  sout = out.getvalue()
  if not is_ignored_cycle(sout):
    print("\nFound a potentially leaking cycle starting from a GC root:\n",
          sout, sep="")
    set_reported_error(True)

def load_graph():
  global graph
  global roots
  log("Reading graph from pickled file: " + args.pickle_graph)
  dump = pickle.load(open(args.pickle_graph, 'rb'))
  graph = dump[0]
  roots = dump[1]

def save_graph():
  log("Saving graph to pickle file: " + args.pickle_graph)
  dump = (graph, roots)
  pickle.dump(dump, open(args.pickle_graph, 'wb'))

def read_ignored_cycles():
  global ignored_cycles
  if not args.ignore_cycles:
    return
  log("Reading ignored cycles from file: " + args.ignore_cycles)
  block = []
  for l in open(args.ignore_cycles):
    line = l.strip()
    if not line or line.startswith('Found'):
      if len(block) > 0:
        ignored_cycles.append(block)
      block = []
    else:
      block += l
  if len(block) > 0:
    ignored_cycles.append(block)

gc_bases = (
  'blink::GarbageCollected',
  'blink::GarbageCollectedMixin',
)
ref_bases = (
  'WTF::RefCounted',
  'WTF::ThreadSafeRefCounted',
)
gcref_bases = (
  'blink::RefCountedGarbageCollected',
  'blink::ThreadSafeRefCountedGarbageCollected',
)
ref_mixins = (
  'blink::EventTarget',
  'blink::EventTargetWithInlineData',
  'blink::ActiveDOMObject',
)

def print_stats():
  gcref_managed = []
  ref_managed = []
  gc_managed = []
  hierarchies = []

  for node in graph.values():
    node.update_counts()
    for sup in node.super_edges():
      if sup.dst in gcref_bases:
        gcref_managed.append(node)
      elif sup.dst in ref_bases:
        ref_managed.append(node)
      elif sup.dst in gc_bases:
        gc_managed.append(node)

  groups = [("GC manged   ", gc_managed),
            ("ref counted ", ref_managed),
            ("in transition", gcref_managed)]
  total = sum([len(g) for (s,g) in groups])
  for (s, g) in groups:
    percent = len(g) * 100 / total
    print("%2d%% is %s (%d hierarchies)" % (percent, s, len(g)))

  for base in gcref_managed:
    stats = dict({ 'classes': 0, 'ref-mixins': 0 })
    for ptr in ptr_types: stats[ptr] = 0
    hierarchy_stats(base, stats)
    hierarchies.append((base, stats))

  print("\nHierarchies in transition (RefCountedGarbageCollected):")
  hierarchies.sort(key=lambda n: -n[1]['classes'])
  for (node, stats) in hierarchies:
    total = stats['mem'] + stats['ref'] + stats['raw']
    print(
        ("%s %3d%% of %-30s: %3d cls, %3d mem, %3d ref, %3d raw, %3d ref-mixins"
         % (
             stats['ref'] == 0 and stats['ref-mixins'] == 0 and "*" or " ",
             total == 0 and 100 or stats['mem'] * 100 / total,
             node.name.replace('blink::', ''),
             stats['classes'],
             stats['mem'],
             stats['ref'],
             stats['raw'],
             stats['ref-mixins'],
         )))


def hierarchy_stats(node, stats):
  if not node: return
  stats['classes'] += 1
  add_counts(stats, node.counts)
  for edge in node.super_edges():
    if edge.dst in ref_mixins:
      stats['ref-mixins'] += 1
  for edge in node.subclass_edges():
    hierarchy_stats(graph.get(edge.dst), stats)

def main():
  global args
  args = parser.parse_args()
  if not (args.detect_cycles or args.print_stats):
    print("Please select an operation to perform (eg, -c to detect cycles)")
    parser.print_help()
    return 1
  if args.pickle_graph and os.path.isfile(args.pickle_graph):
    load_graph()
  else:
    if args.use_stdin:
      log("Reading files from stdin")
      for f in sys.stdin:
        build_graph(f.strip())
    else:
      log("Reading files and directories from command line")
      if len(args.files) == 0:
        print("Please provide files or directores for building the graph")
        parser.print_help()
        return 1
      for f in args.files:
        if os.path.isdir(f):
          log("Building graph from files in directory: " + f)
          build_graphs_in_dir(f)
        else:
          log("Building graph from file: " + f)
          build_graph(f)
    log("Completing graph construction (%d graph nodes)" % len(graph))
    complete_graph()
    if args.pickle_graph:
      save_graph()
  if args.detect_cycles:
    read_ignored_cycles()
    log("Detecting cycles containg GC roots")
    detect_cycles()
  if args.print_stats:
    log("Printing statistics")
    print_stats()
  if reported_error():
    return 1
  return 0

if __name__ == '__main__':
  sys.exit(main())