contrib/analyze_brprob.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

#!/usr/bin/env python3
#
# Script to analyze results of our branch prediction heuristics
#
# This file is part of GCC.
#
# GCC is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
#
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3.  If not see
# <http://www.gnu.org/licenses/>.  */
#
#
#
# This script is used to calculate two basic properties of the branch prediction
# heuristics - coverage and hitrate.  Coverage is number of executions
# of a given branch matched by the heuristics and hitrate is probability
# that once branch is predicted as taken it is really taken.
#
# These values are useful to determine the quality of given heuristics.
# Hitrate may be directly used in predict.def.
#
# Usage:
#  Step 1: Compile and profile your program.  You need to use -fprofile-generate
#    flag to get the profiles.
#  Step 2: Make a reference run of the intrumented application.
#  Step 3: Compile the program with collected profile and dump IPA profiles
#          (-fprofile-use -fdump-ipa-profile-details)
#  Step 4: Collect all generated dump files:
#          find . -name '*.profile' | xargs cat > dump_file
#  Step 5: Run the script:
#          ./analyze_brprob.py dump_file
#          and read results.  Basically the following table is printed:
#
# HEURISTICS                           BRANCHES  (REL)  HITRATE                COVERAGE  (REL)
# early return (on trees)                     3   0.2%  35.83% /  93.64%          66360   0.0%
# guess loop iv compare                       8   0.6%  53.35% /  53.73%       11183344   0.0%
# call                                       18   1.4%  31.95% /  69.95%       51880179   0.2%
# loop guard                                 23   1.8%  84.13% /  84.85%    13749065956  42.2%
# opcode values positive (on trees)          42   3.3%  15.71% /  84.81%     6771097902  20.8%
# opcode values nonequal (on trees)         226  17.6%  72.48% /  72.84%      844753864   2.6%
# loop exit                                 231  18.0%  86.97% /  86.98%     8952666897  27.5%
# loop iterations                           239  18.6%  91.10% /  91.10%     3062707264   9.4%
# DS theory                                 281  21.9%  82.08% /  83.39%     7787264075  23.9%
# no prediction                             293  22.9%  46.92% /  70.70%     2293267840   7.0%
# guessed loop iterations                   313  24.4%  76.41% /  76.41%    10782750177  33.1%
# first match                               708  55.2%  82.30% /  82.31%    22489588691  69.0%
# combined                                 1282 100.0%  79.76% /  81.75%    32570120606 100.0%
#
#
#  The heuristics called "first match" is a heuristics used by GCC branch
#  prediction pass and it predicts 55.2% branches correctly. As you can,
#  the heuristics has very good covertage (69.05%).  On the other hand,
#  "opcode values nonequal (on trees)" heuristics has good hirate, but poor
#  coverage.

import sys
import os
import re
import argparse

from math import *

def percentage(a, b):
    return 100.0 * a / b

def average(values):
    return 1.0 * sum(values) / len(values)

def average_cutoff(values, cut):
    l = len(values)
    skip = floor(l * cut / 2)
    if skip > 0:
        values.sort()
        values = values[skip:-skip]
    return average(values)

def median(values):
    values.sort()
    return values[int(len(values) / 2)]

class Summary:
    def __init__(self, name):
        self.name = name
        self.branches = 0
        self.count = 0
        self.hits = 0
        self.fits = 0

    def get_hitrate(self):
        return self.hits / self.count

    def count_formatted(self):
        v = self.count
        for unit in ['','K','M','G','T','P','E','Z']:
            if v < 1000:
                return "%3.2f%s" % (v, unit)
            v /= 1000.0
        return "%.1f%s" % (v, 'Y')

class Profile:
    def __init__(self, filename):
        self.filename = filename
        self.heuristics = {}
        self.niter_vector = []

    def add(self, name, prediction, count, hits):
        if not name in self.heuristics:
            self.heuristics[name] = Summary(name)

        s = self.heuristics[name]
        s.branches += 1
        s.count += count
        if prediction < 50:
            hits = count - hits
        s.hits += hits
        s.fits += max(hits, count - hits)

    def add_loop_niter(self, niter):
        if niter > 0:
            self.niter_vector.append(niter)

    def branches_max(self):
        return max([v.branches for k, v in self.heuristics.items()])

    def count_max(self):
        return max([v.count for k, v in self.heuristics.items()])

    def dump(self, sorting):
        sorter = lambda x: x[1].branches
        if sorting == 'hitrate':
            sorter = lambda x: x[1].get_hitrate()
        elif sorting == 'coverage':
            sorter = lambda x: x[1].count

        print('%-40s %8s %6s  %-16s %14s %8s %6s' % ('HEURISTICS', 'BRANCHES', '(REL)',
              'HITRATE', 'COVERAGE', 'COVERAGE', '(REL)'))
        for (k, v) in sorted(self.heuristics.items(), key = sorter):
            print('%-40s %8i %5.1f%% %6.2f%% / %6.2f%% %14i %8s %5.1f%%' %
            (k, v.branches, percentage(v.branches, self.branches_max ()),
             percentage(v.hits, v.count), percentage(v.fits, v.count),
             v.count, v.count_formatted(), percentage(v.count, self.count_max()) ))

        if len(self.niter_vector) > 0:
            print ('\nLoop count: %d' % len(self.niter_vector)),
            print('  avg. # of iter: %.2f' % average(self.niter_vector))
            print('  median # of iter: %.2f' % median(self.niter_vector))
            for v in [1, 5, 10, 20, 30]:
                cut = 0.01 * v
                print('  avg. (%d%% cutoff) # of iter: %.2f' % (v, average_cutoff(self.niter_vector, cut)))

parser = argparse.ArgumentParser()
parser.add_argument('dump_file', metavar = 'dump_file', help = 'IPA profile dump file')
parser.add_argument('-s', '--sorting', dest = 'sorting', choices = ['branches', 'hitrate', 'coverage'], default = 'branches')

args = parser.parse_args()

profile = Profile(sys.argv[1])
r = re.compile('  (.*) heuristics( of edge [0-9]*->[0-9]*)?( \\(.*\\))?: (.*)%.*exec ([0-9]*) hit ([0-9]*)')
loop_niter_str = ';;  profile-based iteration count: '
for l in open(args.dump_file).readlines():
    m = r.match(l)
    if m != None and m.group(3) == None:
        name = m.group(1)
        prediction = float(m.group(4))
        count = int(m.group(5))
        hits = int(m.group(6))

        profile.add(name, prediction, count, hits)
    elif l.startswith(loop_niter_str):
        v = int(l[len(loop_niter_str):])
        profile.add_loop_niter(v)

profile.dump(args.sorting)