summaryrefslogtreecommitdiff
path: root/Tools/c-analyzer/cpython/_generate.py
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2020-10-22 18:42:51 -0600
committerGitHub <noreply@github.com>2020-10-22 18:42:51 -0600
commit345cd37abe324ad4f60f80e2c3133b8849e54e9b (patch)
tree5d965e662dca9dcac19e7eddd63a3d9d0b816fed /Tools/c-analyzer/cpython/_generate.py
parentec388cfb4ede56dace2bb78851ff6f38fa2a6abe (diff)
downloadcpython-git-345cd37abe324ad4f60f80e2c3133b8849e54e9b.tar.gz
bpo-36876: Fix the C analyzer tool. (GH-22841)
The original tool wasn't working right and it was simpler to create a new one, partially re-using some of the old code. At this point the tool runs properly on the master. (Try: ./python Tools/c-analyzer/c-analyzer.py analyze.) It take ~40 seconds on my machine to analyze the full CPython code base. Note that we'll need to iron out some OS-specific stuff (e.g. preprocessor). We're okay though since this tool isn't used yet in our workflow. We will also need to verify the analysis results in detail before activating the check in CI, though I'm pretty sure it's close. https://bugs.python.org/issue36876
Diffstat (limited to 'Tools/c-analyzer/cpython/_generate.py')
-rw-r--r--Tools/c-analyzer/cpython/_generate.py326
1 files changed, 0 insertions, 326 deletions
diff --git a/Tools/c-analyzer/cpython/_generate.py b/Tools/c-analyzer/cpython/_generate.py
deleted file mode 100644
index 3456604b81..0000000000
--- a/Tools/c-analyzer/cpython/_generate.py
+++ /dev/null
@@ -1,326 +0,0 @@
-# The code here consists of hacks for pre-populating the known.tsv file.
-
-from c_analyzer.parser.preprocessor import _iter_clean_lines
-from c_analyzer.parser.naive import (
- iter_variables, parse_variable_declaration, find_variables,
- )
-from c_analyzer.common.known import HEADER as KNOWN_HEADER
-from c_analyzer.common.info import UNKNOWN, ID
-from c_analyzer.variables import Variable
-from c_analyzer.util import write_tsv
-
-from . import SOURCE_DIRS, REPO_ROOT
-from .known import DATA_FILE as KNOWN_FILE
-from .files import iter_cpython_files
-
-
-POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
-POTS += tuple('const ' + v for v in POTS)
-STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
-
-
-def _parse_global(line, funcname=None):
- line = line.strip()
- if line.startswith('static '):
- if '(' in line and '[' not in line and ' = ' not in line:
- return None, None
- name, decl = parse_variable_declaration(line)
- elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
- name, decl = parse_variable_declaration(line)
- elif line.startswith('_Py_static_string('):
- decl = line.strip(';').strip()
- name = line.split('(')[1].split(',')[0].strip()
- elif line.startswith('_Py_IDENTIFIER('):
- decl = line.strip(';').strip()
- name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
- elif funcname:
- return None, None
-
- # global-only
- elif line.startswith('PyAPI_DATA('): # only in .h files
- name, decl = parse_variable_declaration(line)
- elif line.startswith('extern '): # only in .h files
- name, decl = parse_variable_declaration(line)
- elif line.startswith('PyDoc_VAR('):
- decl = line.strip(';').strip()
- name = line.split('(')[1].split(')')[0].strip()
- elif line.startswith(POTS): # implied static
- if '(' in line and '[' not in line and ' = ' not in line:
- return None, None
- name, decl = parse_variable_declaration(line)
- elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static
- name, decl = parse_variable_declaration(line)
- elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static
- name, decl = parse_variable_declaration(line)
- elif line.startswith('struct '):
- if not line.endswith(' = {'):
- return None, None
- if not line.partition(' ')[2].startswith(STRUCTS):
- return None, None
- # implied static
- name, decl = parse_variable_declaration(line)
-
- # file-specific
- elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
- # Objects/typeobject.c
- funcname = line.split('(')[1].split(',')[0]
- return [
- ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
- ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
- ]
- elif line.startswith('WRAP_METHOD('):
- # Objects/weakrefobject.c
- funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
- return [
- ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
- ]
-
- else:
- return None, None
- return name, decl
-
-
-def _pop_cached(varcache, filename, funcname, name, *,
- _iter_variables=iter_variables,
- ):
- # Look for the file.
- try:
- cached = varcache[filename]
- except KeyError:
- cached = varcache[filename] = {}
- for variable in _iter_variables(filename,
- parse_variable=_parse_global,
- ):
- variable._isglobal = True
- cached[variable.id] = variable
- for var in cached:
- print(' ', var)
-
- # Look for the variable.
- if funcname == UNKNOWN:
- for varid in cached:
- if varid.name == name:
- break
- else:
- return None
- return cached.pop(varid)
- else:
- return cached.pop((filename, funcname, name), None)
-
-
-def find_matching_variable(varid, varcache, allfilenames, *,
- _pop_cached=_pop_cached,
- ):
- if varid.filename and varid.filename != UNKNOWN:
- filenames = [varid.filename]
- else:
- filenames = allfilenames
- for filename in filenames:
- variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
- if variable is not None:
- return variable
- else:
- if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
- for filename in allfilenames:
- if not filename.endswith('.h'):
- continue
- variable = _pop_cached(varcache, filename, None, varid.name)
- if variable is not None:
- return variable
- return None
-
-
-MULTILINE = {
- # Python/Python-ast.c
- 'Load_singleton': 'PyObject *',
- 'Store_singleton': 'PyObject *',
- 'Del_singleton': 'PyObject *',
- 'AugLoad_singleton': 'PyObject *',
- 'AugStore_singleton': 'PyObject *',
- 'Param_singleton': 'PyObject *',
- 'And_singleton': 'PyObject *',
- 'Or_singleton': 'PyObject *',
- 'Add_singleton': 'static PyObject *',
- 'Sub_singleton': 'static PyObject *',
- 'Mult_singleton': 'static PyObject *',
- 'MatMult_singleton': 'static PyObject *',
- 'Div_singleton': 'static PyObject *',
- 'Mod_singleton': 'static PyObject *',
- 'Pow_singleton': 'static PyObject *',
- 'LShift_singleton': 'static PyObject *',
- 'RShift_singleton': 'static PyObject *',
- 'BitOr_singleton': 'static PyObject *',
- 'BitXor_singleton': 'static PyObject *',
- 'BitAnd_singleton': 'static PyObject *',
- 'FloorDiv_singleton': 'static PyObject *',
- 'Invert_singleton': 'static PyObject *',
- 'Not_singleton': 'static PyObject *',
- 'UAdd_singleton': 'static PyObject *',
- 'USub_singleton': 'static PyObject *',
- 'Eq_singleton': 'static PyObject *',
- 'NotEq_singleton': 'static PyObject *',
- 'Lt_singleton': 'static PyObject *',
- 'LtE_singleton': 'static PyObject *',
- 'Gt_singleton': 'static PyObject *',
- 'GtE_singleton': 'static PyObject *',
- 'Is_singleton': 'static PyObject *',
- 'IsNot_singleton': 'static PyObject *',
- 'In_singleton': 'static PyObject *',
- 'NotIn_singleton': 'static PyObject *',
- # Python/symtable.c
- 'top': 'static identifier ',
- 'lambda': 'static identifier ',
- 'genexpr': 'static identifier ',
- 'listcomp': 'static identifier ',
- 'setcomp': 'static identifier ',
- 'dictcomp': 'static identifier ',
- '__class__': 'static identifier ',
- # Python/compile.c
- '__doc__': 'static PyObject *',
- '__annotations__': 'static PyObject *',
- # Objects/floatobject.c
- 'double_format': 'static float_format_type ',
- 'float_format': 'static float_format_type ',
- 'detected_double_format': 'static float_format_type ',
- 'detected_float_format': 'static float_format_type ',
- # Python/dtoa.c
- 'private_mem': 'static double private_mem[PRIVATE_mem]',
- 'pmem_next': 'static double *',
- # Modules/_weakref.c
- 'weakref_functions': 'static PyMethodDef ',
-}
-INLINE = {
- # Modules/_tracemalloc.c
- 'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
- # Modules/faulthandler.c
- 'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
- 'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
- # Modules/signalmodule.c
- 'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
- 'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
- # Python/dynload_shlib.c
- 'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
- # Objects/obmalloc.c
- '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
- # Python/bootstrap_hash.c
- 'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
- }
-FUNC = {
- # Objects/object.c
- '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
- # Parser/myreadline.c
- 'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
- # Python/pylifecycle.c
- '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
- # Parser/myreadline.c
- 'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
- }
-IMPLIED = {
- # Objects/boolobject.c
- '_Py_FalseStruct': 'static struct _longobject ',
- '_Py_TrueStruct': 'static struct _longobject ',
- # Modules/config.c
- '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
- }
-GLOBALS = {}
-GLOBALS.update(MULTILINE)
-GLOBALS.update(INLINE)
-GLOBALS.update(FUNC)
-GLOBALS.update(IMPLIED)
-
-LOCALS = {
- 'buildinfo': ('Modules/getbuildinfo.c',
- 'Py_GetBuildInfo',
- 'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'),
- 'methods': ('Python/codecs.c',
- '_PyCodecRegistry_Init',
- 'static struct { char *name; PyMethodDef def; } methods[]'),
- }
-
-
-def _known(symbol):
- if symbol.funcname:
- if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
- raise KeyError(symbol.name)
- filename, funcname, decl = LOCALS[symbol.name]
- varid = ID(filename, funcname, symbol.name)
- elif not symbol.filename or symbol.filename == UNKNOWN:
- raise KeyError(symbol.name)
- else:
- varid = symbol.id
- try:
- decl = GLOBALS[symbol.name]
- except KeyError:
-
- if symbol.name.endswith('_methods'):
- decl = 'static PyMethodDef '
- elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
- decl = 'static PyTypeObject '
- else:
- raise
- if symbol.name not in decl:
- decl = decl + symbol.name
- return Variable(varid, 'static', decl)
-
-
-def known_row(varid, decl):
- return (
- varid.filename,
- varid.funcname or '-',
- varid.name,
- 'variable',
- decl,
- )
-
-
-def known_rows(symbols, *,
- cached=True,
- _get_filenames=iter_cpython_files,
- _find_match=find_matching_variable,
- _find_symbols=find_variables,
- _as_known=known_row,
- ):
- filenames = list(_get_filenames())
- cache = {}
- if cached:
- for symbol in symbols:
- try:
- found = _known(symbol)
- except KeyError:
- found = _find_match(symbol, cache, filenames)
- if found is None:
- found = Variable(symbol.id, UNKNOWN, UNKNOWN)
- yield _as_known(found.id, found.vartype)
- else:
- raise NotImplementedError # XXX incorporate KNOWN
- for variable in _find_symbols(symbols, filenames,
- srccache=cache,
- parse_variable=_parse_global,
- ):
- #variable = variable._replace(
- # filename=os.path.relpath(variable.filename, REPO_ROOT))
- if variable.funcname == UNKNOWN:
- print(variable)
- if variable.vartype== UNKNOWN:
- print(variable)
- yield _as_known(variable.id, variable.vartype)
-
-
-def generate(symbols, filename=None, *,
- _generate_rows=known_rows,
- _write_tsv=write_tsv,
- ):
- if not filename:
- filename = KNOWN_FILE + '.new'
-
- rows = _generate_rows(symbols)
- _write_tsv(filename, KNOWN_HEADER, rows)
-
-
-if __name__ == '__main__':
- from c_symbols import binary
- symbols = binary.iter_symbols(
- binary.PYTHON,
- find_local_symbol=None,
- )
- generate(symbols)