summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorArtem Dergachev <artem.dergachev@gmail.com>2019-06-19 23:33:59 +0000
committerArtem Dergachev <artem.dergachev@gmail.com>2019-06-19 23:33:59 +0000
commitcd58cd0fced56e242f84e26dc6d011a13823bb32 (patch)
treef62c373e89fe33460b1cfad25a8d10a359f4f699 /utils
parent28823bb64502587c484df3ffa707f7f562ef367f (diff)
downloadclang-cd58cd0fced56e242f84e26dc6d011a13823bb32.tar.gz
[analyzer] exploded-graph-rewriter: Implement a --diff mode.
In this mode the tool would avoid duplicating the contents of the program state on every node, replacing them with a diff-like dump of changes that happened on that node. This is useful because most of the time we only interested in whether the effect of the statement was modeled correctly. A diffed graph would also be much faster to load and navigate, being much smaller than the original graph. The diffs are computed "semantically" as opposed to plain text diffs. I.e., the diff algorithm is hand-crafted separately for every state trait, taking the underlying data structures into account. This is especially nice for Environment because textual diffs would have been terrible. On the other hand, it requires some boilerplate to implement. Differential Revision: https://reviews.llvm.org/D62761 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@363898 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'utils')
-rwxr-xr-xutils/analyzer/exploded-graph-rewriter.py279
1 files changed, 218 insertions, 61 deletions
diff --git a/utils/analyzer/exploded-graph-rewriter.py b/utils/analyzer/exploded-graph-rewriter.py
index 7f83f801ae..1ee1b34afa 100755
--- a/utils/analyzer/exploded-graph-rewriter.py
+++ b/utils/analyzer/exploded-graph-rewriter.py
@@ -18,6 +18,13 @@ import logging
import re
+# A helper function for finding the difference between two dictionaries.
+def diff_dicts(curr, prev):
+ removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
+ added = [k for k in curr if k not in prev or curr[k] != prev[k]]
+ return (removed, added)
+
+
# A deserialized source location.
class SourceLocation(object):
def __init__(self, json_loc):
@@ -47,13 +54,21 @@ class ProgramPoint(object):
self.block_id = json_pp['block_id']
-# A value of a single expression in a deserialized Environment.
-class EnvironmentBinding(object):
- def __init__(self, json_eb):
- super(EnvironmentBinding, self).__init__()
- self.stmt_id = json_eb['stmt_id']
- self.pretty = json_eb['pretty']
- self.value = json_eb['value']
+# A single expression acting as a key in a deserialized Environment.
+class EnvironmentBindingKey(object):
+ def __init__(self, json_ek):
+ super(EnvironmentBindingKey, self).__init__()
+ self.stmt_id = json_ek['stmt_id']
+ self.pretty = json_ek['pretty']
+
+ def _key(self):
+ return self.stmt_id
+
+ def __eq__(self, other):
+ return self._key() == other._key()
+
+ def __hash__(self):
+ return hash(self._key())
# Deserialized description of a location context.
@@ -65,6 +80,15 @@ class LocationContext(object):
self.decl = json_frame['calling']
self.line = json_frame['call_line']
+ def _key(self):
+ return self.lctx_id
+
+ def __eq__(self, other):
+ return self._key() == other._key()
+
+ def __hash__(self):
+ return hash(self._key())
+
# A group of deserialized Environment bindings that correspond to a specific
# location context.
@@ -72,8 +96,17 @@ class EnvironmentFrame(object):
def __init__(self, json_frame):
super(EnvironmentFrame, self).__init__()
self.location_context = LocationContext(json_frame)
- self.bindings = [EnvironmentBinding(b) for b in json_frame['items']] \
- if json_frame['items'] is not None else []
+ self.bindings = collections.OrderedDict(
+ [(EnvironmentBindingKey(b),
+ b['value']) for b in json_frame['items']]
+ if json_frame['items'] is not None else [])
+
+ def diff_bindings(self, prev):
+ return diff_dicts(self.bindings, prev.bindings)
+
+ def is_different(self, prev):
+ removed, added = self.diff_bindings(prev)
+ return len(removed) != 0 or len(added) != 0
# A deserialized Environment.
@@ -82,14 +115,46 @@ class Environment(object):
super(Environment, self).__init__()
self.frames = [EnvironmentFrame(f) for f in json_e]
+ def diff_frames(self, prev):
+ # TODO: It's difficult to display a good diff when frame numbers shift.
+ if len(self.frames) != len(prev.frames):
+ return None
+
+ updated = []
+ for i in range(len(self.frames)):
+ f = self.frames[i]
+ prev_f = prev.frames[i]
+ if f.location_context == prev_f.location_context:
+ if f.is_different(prev_f):
+ updated.append(i)
+ else:
+ # We have the whole frame replaced with another frame.
+ # TODO: Produce a nice diff.
+ return None
+
+ # TODO: Add support for added/removed.
+ return updated
+
+ def is_different(self, prev):
+ updated = self.diff_frames(prev)
+ return updated is None or len(updated) > 0
+
+
+# A single binding key in a deserialized RegionStore cluster.
+class StoreBindingKey(object):
+ def __init__(self, json_sk):
+ super(StoreBindingKey, self).__init__()
+ self.kind = json_sk['kind']
+ self.offset = json_sk['offset']
-# A single binding in a deserialized RegionStore cluster.
-class StoreBinding(object):
- def __init__(self, json_sb):
- super(StoreBinding, self).__init__()
- self.kind = json_sb['kind']
- self.offset = json_sb['offset']
- self.value = json_sb['value']
+ def _key(self):
+ return (self.kind, self.offset)
+
+ def __eq__(self, other):
+ return self._key() == other._key()
+
+ def __hash__(self):
+ return hash(self._key())
# A single cluster of the deserialized RegionStore.
@@ -97,14 +162,34 @@ class StoreCluster(object):
def __init__(self, json_sc):
super(StoreCluster, self).__init__()
self.base_region = json_sc['cluster']
- self.bindings = [StoreBinding(b) for b in json_sc['items']]
+ self.bindings = collections.OrderedDict(
+ [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
+
+ def diff_bindings(self, prev):
+ return diff_dicts(self.bindings, prev.bindings)
+
+ def is_different(self, prev):
+ removed, added = self.diff_bindings(prev)
+ return len(removed) != 0 or len(added) != 0
# A deserialized RegionStore.
class Store(object):
def __init__(self, json_s):
super(Store, self).__init__()
- self.clusters = [StoreCluster(c) for c in json_s]
+ self.clusters = collections.OrderedDict(
+ [(c['pointer'], StoreCluster(c)) for c in json_s])
+
+ def diff_clusters(self, prev):
+ removed = [k for k in prev.clusters if k not in self.clusters]
+ added = [k for k in self.clusters if k not in prev.clusters]
+ updated = [k for k in prev.clusters if k in self.clusters
+ and prev.clusters[k].is_different(self.clusters[k])]
+ return (removed, added, updated)
+
+ def is_different(self, prev):
+ removed, added, updated = self.diff_clusters(prev)
+ return len(removed) != 0 or len(added) != 0 or len(updated) != 0
# A deserialized program state.
@@ -213,8 +298,9 @@ class ExplodedGraph(object):
# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
# syntax highlighing.
class DotDumpVisitor(object):
- def __init__(self):
+ def __init__(self, do_diffs):
super(DotDumpVisitor, self).__init__()
+ self._do_diffs = do_diffs
@staticmethod
def _dump_raw(s):
@@ -230,6 +316,14 @@ class DotDumpVisitor(object):
.replace('\\l', '<br />')
.replace('|', ''), end='')
+ @staticmethod
+ def _diff_plus_minus(is_added):
+ if is_added is None:
+ return ''
+ if is_added:
+ return '<font color="forestgreen">+</font>'
+ return '<font color="red">-</font>'
+
def visit_begin_graph(self, graph):
self._graph = graph
self._dump_raw('digraph "ExplodedGraph" {\n')
@@ -263,72 +357,128 @@ class DotDumpVisitor(object):
'<font color="%s">%s</font></td><td>%s</td></tr>'
% (color, p.stmt_kind, p.pretty))
elif p.kind == 'Edge':
- self._dump('<tr><td width="0">-</td>'
+ self._dump('<tr><td width="0"></td>'
'<td align="left" width="0">'
'<font color="%s">%s</font></td><td align="left">'
'[B%d] -\\> [B%d]</td></tr>'
% (color, p.kind, p.src_id, p.dst_id))
else:
# TODO: Print more stuff for other kinds of points.
- self._dump('<tr><td width="0">-</td>'
+ self._dump('<tr><td width="0"></td>'
'<td align="left" width="0" colspan="2">'
'<font color="%s">%s</font></td></tr>'
% (color, p.kind))
- def visit_environment(self, e):
+ def visit_environment(self, e, prev_e=None):
self._dump('<table border="0">')
- for f in e.frames:
- self._dump('<tr><td align="left"><b>%s</b></td>'
+ def dump_location_context(lc, is_added=None):
+ self._dump('<tr><td>%s</td>'
+ '<td align="left"><b>%s</b></td>'
'<td align="left"><font color="grey60">%s </font>'
'%s</td></tr>'
- % (f.location_context.caption,
- f.location_context.decl,
- ('(line %s)' % f.location_context.line)
- if f.location_context.line is not None else ''))
- for b in f.bindings:
- self._dump('<tr><td align="left"><i>S%s</i></td>'
- '<td align="left">%s</td>'
- '<td align="left">%s</td></tr>'
- % (b.stmt_id, b.pretty, b.value))
+ % (self._diff_plus_minus(is_added),
+ lc.caption, lc.decl,
+ ('(line %s)' % lc.line) if lc.line is not None
+ else ''))
+
+ def dump_binding(f, b, is_added=None):
+ self._dump('<tr><td>%s</td>'
+ '<td align="left"><i>S%s</i></td>'
+ '<td align="left">%s</td>'
+ '<td align="left">%s</td></tr>'
+ % (self._diff_plus_minus(is_added),
+ b.stmt_id, b.pretty, f.bindings[b]))
+
+ frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
+ if frames_updated:
+ for i in frames_updated:
+ f = e.frames[i]
+ prev_f = prev_e.frames[i]
+ dump_location_context(f.location_context)
+ bindings_removed, bindings_added = f.diff_bindings(prev_f)
+ for b in bindings_removed:
+ dump_binding(prev_f, b, False)
+ for b in bindings_added:
+ dump_binding(f, b, True)
+ else:
+ for f in e.frames:
+ dump_location_context(f.location_context)
+ for b in f.bindings:
+ dump_binding(f, b)
self._dump('</table>')
- def visit_store(self, s):
+ def visit_store(self, s, prev_s=None):
self._dump('<table border="0">')
- for c in s.clusters:
- for b in c.bindings:
- self._dump('<tr><td align="left">%s</td>'
- '<td align="left">%s</td>'
- '<td align="left">%s</td>'
- '<td align="left">%s</td></tr>'
- % (c.base_region, b.offset,
- '(<i>Default</i>)' if b.kind == 'Default'
- else '',
- b.value))
+ def dump_binding(s, c, b, is_added=None):
+ self._dump('<tr><td>%s</td>'
+ '<td align="left">%s</td>'
+ '<td align="left">%s</td>'
+ '<td align="left">%s</td>'
+ '<td align="left">%s</td></tr>'
+ % (self._diff_plus_minus(is_added),
+ s.clusters[c].base_region, b.offset,
+ '(<i>Default</i>)' if b.kind == 'Default'
+ else '',
+ s.clusters[c].bindings[b]))
+
+ if prev_s is not None:
+ clusters_removed, clusters_added, clusters_updated = \
+ s.diff_clusters(prev_s)
+ for c in clusters_removed:
+ for b in prev_s.clusters[c].bindings:
+ dump_binding(prev_s, c, b, False)
+ for c in clusters_updated:
+ bindings_removed, bindings_added = \
+ s.clusters[c].diff_bindings(prev_s.clusters[c])
+ for b in bindings_removed:
+ dump_binding(prev_s, c, b, False)
+ for b in bindings_added:
+ dump_binding(s, c, b, True)
+ for c in clusters_added:
+ for b in s.clusters[c].bindings:
+ dump_binding(s, c, b, True)
+ else:
+ for c in s.clusters:
+ for b in s.clusters[c].bindings:
+ dump_binding(s, c, b)
self._dump('</table>')
- def visit_state(self, s):
- self._dump('<tr><td align="left">'
- '<b>Store: </b>')
+ def visit_state(self, s, prev_s):
+ # == Store ==
+ self._dump('<tr><td align="left"><b>Store: </b>')
if s.store is None:
self._dump('<i> Nothing!</i>')
else:
- self._dump('</td></tr>'
- '<tr><td align="left">')
- self.visit_store(s.store)
+ if prev_s is not None and prev_s.store is not None:
+ if s.store.is_different(prev_s.store):
+ self._dump('</td></tr><tr><td align="left">')
+ self.visit_store(s.store, prev_s.store)
+ else:
+ self._dump('<i> No changes!</i>')
+ else:
+ self._dump('</td></tr><tr><td align="left">')
+ self.visit_store(s.store)
+ self._dump('</td></tr><hr />')
- self._dump('</td></tr><hr />'
- '<tr><td align="left">'
+ # == Environment ==
+ self._dump('<tr><td align="left">'
'<b>Environment: </b>')
if s.environment is None:
self._dump('<i> Nothing!</i>')
else:
- self._dump('</td></tr>'
- '<tr><td align="left">')
- self.visit_environment(s.environment)
+ if prev_s is not None and prev_s.environment is not None:
+ if s.environment.is_different(prev_s.environment):
+ self._dump('</td></tr><tr><td align="left">')
+ self.visit_environment(s.environment, prev_s.environment)
+ else:
+ self._dump('<i> No changes!</i>')
+ else:
+ self._dump('</td></tr><tr><td align="left">')
+ self.visit_environment(s.environment)
self._dump('</td></tr>')
@@ -353,7 +503,14 @@ class DotDumpVisitor(object):
if node.state is not None:
self._dump('<hr />')
- self.visit_state(node.state)
+ prev_s = None
+ # Do diffs only when we have a unique predecessor.
+ # Don't do diffs on the leaf nodes because they're
+ # the important ones.
+ if self._do_diffs and len(node.predecessors) == 1 \
+ and len(node.successors) > 0:
+ prev_s = self._graph.nodes[node.predecessors[0]].state
+ self.visit_state(node.state, prev_s)
self._dump_raw('</table>>];\n')
def visit_edge(self, pred, succ):
@@ -383,13 +540,13 @@ class Explorer(object):
def main():
parser = argparse.ArgumentParser()
parser.add_argument('filename', type=str)
- parser.add_argument('-d', '--debug', action='store_const', dest='loglevel',
- const=logging.DEBUG, default=logging.WARNING,
- help='enable debug prints')
parser.add_argument('-v', '--verbose', action='store_const',
- dest='loglevel', const=logging.INFO,
+ dest='loglevel', const=logging.DEBUG,
default=logging.WARNING,
help='enable info prints')
+ parser.add_argument('-d', '--diff', action='store_const', dest='diff',
+ const=True, default=False,
+ help='display differences between states')
args = parser.parse_args()
logging.basicConfig(level=args.loglevel)
@@ -400,7 +557,7 @@ def main():
graph.add_raw_line(raw_line)
explorer = Explorer()
- visitor = DotDumpVisitor()
+ visitor = DotDumpVisitor(args.diff)
explorer.explore(graph, visitor)