summaryrefslogtreecommitdiff
path: root/Tools/c-analyzer/c_symbols/binary.py
blob: e125dbd5b5edc53703976cf734843124c43022ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
import os.path
import shutil
import sys

from c_analyzer_common import util, info
from . import source
from .info import Symbol


#PYTHON = os.path.join(REPO_ROOT, 'python')
PYTHON = sys.executable


def iter_symbols(binary=PYTHON, dirnames=None, *,
                 # Alternately, use look_up_known_symbol()
                 # from c_globals.supported.
                 find_local_symbol=source.find_symbol,
                 _file_exists=os.path.exists,
                 _iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)),
                 ):
    """Yield a Symbol for each symbol found in the binary."""
    if not _file_exists(binary):
        raise Exception('executable missing (need to build it first?)')

    if find_local_symbol:
        cache = {}
        def find_local_symbol(name, *, _find=find_local_symbol):
            return _find(name, dirnames, _perfilecache=cache)
    else:
        find_local_symbol = None

    if os.name == 'nt':
        # XXX Support this.
        raise NotImplementedError
    else:
        yield from _iter_symbols_nm(binary, find_local_symbol)


#############################
# binary format (e.g. ELF)

SPECIAL_SYMBOLS = {
        '__bss_start',
        '__data_start',
        '__dso_handle',
        '_DYNAMIC',
        '_edata',
        '_end',
        '__environ@@GLIBC_2.2.5',
        '_GLOBAL_OFFSET_TABLE_',
        '__JCR_END__',
        '__JCR_LIST__',
        '__TMC_END__',
        }


def _is_special_symbol(name):
    if name in SPECIAL_SYMBOLS:
        return True
    if '@@GLIBC' in name:
        return True
    return False


#############################
# "nm"

NM_KINDS = {
        'b': Symbol.KIND.VARIABLE,  # uninitialized
        'd': Symbol.KIND.VARIABLE,  # initialized
        #'g': Symbol.KIND.VARIABLE,  # uninitialized
        #'s': Symbol.KIND.VARIABLE,  # initialized
        't': Symbol.KIND.FUNCTION,
        }


def _iter_symbols_nm(binary, find_local_symbol=None,
                     *,
                     _which=shutil.which,
                     _run=util.run_cmd,
                     ):
    nm = _which('nm')
    if not nm:
        raise NotImplementedError
    argv = [nm,
            '--line-numbers',
            binary,
            ]
    try:
        output = _run(argv)
    except Exception:
        if nm is None:
            # XXX Use dumpbin.exe /SYMBOLS on Windows.
            raise NotImplementedError
        raise
    for line in output.splitlines():
        (name, kind, external, filename, funcname, vartype,
         ) = _parse_nm_line(line,
                            _find_local_symbol=find_local_symbol,
                            )
        if kind != Symbol.KIND.VARIABLE:
            continue
        elif _is_special_symbol(name):
            continue
        assert vartype is None
        yield Symbol(
                id=(filename, funcname, name),
                kind=kind,
                external=external,
                )


def _parse_nm_line(line, *, _find_local_symbol=None):
    _origline = line
    _, _, line = line.partition(' ')  # strip off the address
    line = line.strip()

    kind, _, line = line.partition(' ')
    line = line.strip()
    external = kind.isupper()
    kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)

    name, _, filename = line.partition('\t')
    name = name.strip()
    if filename:
        filename = os.path.relpath(filename.partition(':')[0])
    else:
        filename = info.UNKNOWN

    vartype = None
    name, islocal = _parse_nm_name(name, kind)
    if islocal:
        funcname = info.UNKNOWN
        if _find_local_symbol is not None:
            filename, funcname, vartype = _find_local_symbol(name)
            filename = filename or info.UNKNOWN
            funcname = funcname or info.UNKNOWN
    else:
        funcname = None
        # XXX fine filename and vartype?
    return name, kind, external, filename, funcname, vartype


def _parse_nm_name(name, kind):
    if kind != Symbol.KIND.VARIABLE:
        return name, None
    if _is_special_symbol(name):
        return name, None

    actual, sep, digits = name.partition('.')
    if not sep:
        return name, False

    if not digits.isdigit():
        raise Exception(f'got bogus name {name}')
    return actual, True