1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
import os
import os.path
import shutil
import sys
from c_analyzer_common import util, info
from . import source
from .info import Symbol
#PYTHON = os.path.join(REPO_ROOT, 'python')
PYTHON = sys.executable
def iter_symbols(binary=PYTHON, dirnames=None, *,
# Alternately, use look_up_known_symbol()
# from c_globals.supported.
find_local_symbol=source.find_symbol,
_file_exists=os.path.exists,
_iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)),
):
"""Yield a Symbol for each symbol found in the binary."""
if not _file_exists(binary):
raise Exception('executable missing (need to build it first?)')
if find_local_symbol:
cache = {}
def find_local_symbol(name, *, _find=find_local_symbol):
return _find(name, dirnames, _perfilecache=cache)
else:
find_local_symbol = None
if os.name == 'nt':
# XXX Support this.
raise NotImplementedError
else:
yield from _iter_symbols_nm(binary, find_local_symbol)
#############################
# binary format (e.g. ELF)
SPECIAL_SYMBOLS = {
'__bss_start',
'__data_start',
'__dso_handle',
'_DYNAMIC',
'_edata',
'_end',
'__environ@@GLIBC_2.2.5',
'_GLOBAL_OFFSET_TABLE_',
'__JCR_END__',
'__JCR_LIST__',
'__TMC_END__',
}
def _is_special_symbol(name):
if name in SPECIAL_SYMBOLS:
return True
if '@@GLIBC' in name:
return True
return False
#############################
# "nm"
NM_KINDS = {
'b': Symbol.KIND.VARIABLE, # uninitialized
'd': Symbol.KIND.VARIABLE, # initialized
#'g': Symbol.KIND.VARIABLE, # uninitialized
#'s': Symbol.KIND.VARIABLE, # initialized
't': Symbol.KIND.FUNCTION,
}
def _iter_symbols_nm(binary, find_local_symbol=None,
*,
_which=shutil.which,
_run=util.run_cmd,
):
nm = _which('nm')
if not nm:
raise NotImplementedError
argv = [nm,
'--line-numbers',
binary,
]
try:
output = _run(argv)
except Exception:
if nm is None:
# XXX Use dumpbin.exe /SYMBOLS on Windows.
raise NotImplementedError
raise
for line in output.splitlines():
(name, kind, external, filename, funcname, vartype,
) = _parse_nm_line(line,
_find_local_symbol=find_local_symbol,
)
if kind != Symbol.KIND.VARIABLE:
continue
elif _is_special_symbol(name):
continue
assert vartype is None
yield Symbol(
id=(filename, funcname, name),
kind=kind,
external=external,
)
def _parse_nm_line(line, *, _find_local_symbol=None):
_origline = line
_, _, line = line.partition(' ') # strip off the address
line = line.strip()
kind, _, line = line.partition(' ')
line = line.strip()
external = kind.isupper()
kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)
name, _, filename = line.partition('\t')
name = name.strip()
if filename:
filename = os.path.relpath(filename.partition(':')[0])
else:
filename = info.UNKNOWN
vartype = None
name, islocal = _parse_nm_name(name, kind)
if islocal:
funcname = info.UNKNOWN
if _find_local_symbol is not None:
filename, funcname, vartype = _find_local_symbol(name)
filename = filename or info.UNKNOWN
funcname = funcname or info.UNKNOWN
else:
funcname = None
# XXX fine filename and vartype?
return name, kind, external, filename, funcname, vartype
def _parse_nm_name(name, kind):
if kind != Symbol.KIND.VARIABLE:
return name, None
if _is_special_symbol(name):
return name, None
actual, sep, digits = name.partition('.')
if not sep:
return name, False
if not digits.isdigit():
raise Exception(f'got bogus name {name}')
return actual, True
|