diff options
Diffstat (limited to 'Tools/c-analyzer/cpython/_generate.py')
-rw-r--r-- | Tools/c-analyzer/cpython/_generate.py | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/Tools/c-analyzer/cpython/_generate.py b/Tools/c-analyzer/cpython/_generate.py new file mode 100644 index 0000000000..4c340acf99 --- /dev/null +++ b/Tools/c-analyzer/cpython/_generate.py @@ -0,0 +1,329 @@ +# The code here consists of hacks for pre-populating the known.tsv file. + +from c_analyzer.parser.preprocessor import _iter_clean_lines +from c_analyzer.parser.naive import ( + iter_variables, parse_variable_declaration, find_variables, + ) +from c_analyzer.common.known import HEADER as KNOWN_HEADER +from c_analyzer.common.info import UNKNOWN, ID +from c_analyzer.variables import Variable +from c_analyzer.util import write_tsv + +from . import SOURCE_DIRS, REPO_ROOT +from .known import DATA_FILE as KNOWN_FILE +from .files import iter_cpython_files + + +POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ') +POTS += tuple('const ' + v for v in POTS) +STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar') + + +def _parse_global(line, funcname=None): + line = line.strip() + if line.startswith('static '): + if '(' in line and '[' not in line and ' = ' not in line: + return None, None + name, decl = parse_variable_declaration(line) + elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): + name, decl = parse_variable_declaration(line) + elif line.startswith('_Py_static_string('): + decl = line.strip(';').strip() + name = line.split('(')[1].split(',')[0].strip() + elif line.startswith('_Py_IDENTIFIER('): + decl = line.strip(';').strip() + name = 'PyId_' + line.split('(')[1].split(')')[0].strip() + elif funcname: + return None, None + + # global-only + elif line.startswith('PyAPI_DATA('): # only in .h files + name, decl = parse_variable_declaration(line) + elif line.startswith('extern '): # only in .h files + name, decl = parse_variable_declaration(line) + elif line.startswith('PyDoc_VAR('): + decl = line.strip(';').strip() + name = line.split('(')[1].split(')')[0].strip() + elif line.startswith(POTS): # implied static + if '(' in line and '[' not in line and ' = ' not in line: + return None, None + name, decl = parse_variable_declaration(line) + elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static + name, decl = parse_variable_declaration(line) + elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static + name, decl = parse_variable_declaration(line) + elif line.startswith('struct '): + if not line.endswith(' = {'): + return None, None + if not line.partition(' ')[2].startswith(STRUCTS): + return None, None + # implied static + name, decl = parse_variable_declaration(line) + + # file-specific + elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): + # Objects/typeobject.c + funcname = line.split('(')[1].split(',')[0] + return [ + ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'), + ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'), + ] + elif line.startswith('WRAP_METHOD('): + # Objects/weakrefobject.c + funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(',')) + return [ + ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'), + ] + + else: + return None, None + return name, decl + + +def _pop_cached(varcache, filename, funcname, name, *, + _iter_variables=iter_variables, + ): + # Look for the file. + try: + cached = varcache[filename] + except KeyError: + cached = varcache[filename] = {} + for variable in _iter_variables(filename, + parse_variable=_parse_global, + ): + variable._isglobal = True + cached[variable.id] = variable + for var in cached: + print(' ', var) + + # Look for the variable. + if funcname == UNKNOWN: + for varid in cached: + if varid.name == name: + break + else: + return None + return cached.pop(varid) + else: + return cached.pop((filename, funcname, name), None) + + +def find_matching_variable(varid, varcache, allfilenames, *, + _pop_cached=_pop_cached, + ): + if varid.filename and varid.filename != UNKNOWN: + filenames = [varid.filename] + else: + filenames = allfilenames + for filename in filenames: + variable = _pop_cached(varcache, filename, varid.funcname, varid.name) + if variable is not None: + return variable + else: + if varid.filename and varid.filename != UNKNOWN and varid.funcname is None: + for filename in allfilenames: + if not filename.endswith('.h'): + continue + variable = _pop_cached(varcache, filename, None, varid.name) + if variable is not None: + return variable + return None + + +MULTILINE = { + # Python/Python-ast.c + 'Load_singleton': 'PyObject *', + 'Store_singleton': 'PyObject *', + 'Del_singleton': 'PyObject *', + 'AugLoad_singleton': 'PyObject *', + 'AugStore_singleton': 'PyObject *', + 'Param_singleton': 'PyObject *', + 'And_singleton': 'PyObject *', + 'Or_singleton': 'PyObject *', + 'Add_singleton': 'static PyObject *', + 'Sub_singleton': 'static PyObject *', + 'Mult_singleton': 'static PyObject *', + 'MatMult_singleton': 'static PyObject *', + 'Div_singleton': 'static PyObject *', + 'Mod_singleton': 'static PyObject *', + 'Pow_singleton': 'static PyObject *', + 'LShift_singleton': 'static PyObject *', + 'RShift_singleton': 'static PyObject *', + 'BitOr_singleton': 'static PyObject *', + 'BitXor_singleton': 'static PyObject *', + 'BitAnd_singleton': 'static PyObject *', + 'FloorDiv_singleton': 'static PyObject *', + 'Invert_singleton': 'static PyObject *', + 'Not_singleton': 'static PyObject *', + 'UAdd_singleton': 'static PyObject *', + 'USub_singleton': 'static PyObject *', + 'Eq_singleton': 'static PyObject *', + 'NotEq_singleton': 'static PyObject *', + 'Lt_singleton': 'static PyObject *', + 'LtE_singleton': 'static PyObject *', + 'Gt_singleton': 'static PyObject *', + 'GtE_singleton': 'static PyObject *', + 'Is_singleton': 'static PyObject *', + 'IsNot_singleton': 'static PyObject *', + 'In_singleton': 'static PyObject *', + 'NotIn_singleton': 'static PyObject *', + # Python/symtable.c + 'top': 'static identifier ', + 'lambda': 'static identifier ', + 'genexpr': 'static identifier ', + 'listcomp': 'static identifier ', + 'setcomp': 'static identifier ', + 'dictcomp': 'static identifier ', + '__class__': 'static identifier ', + # Python/compile.c + '__doc__': 'static PyObject *', + '__annotations__': 'static PyObject *', + # Objects/floatobject.c + 'double_format': 'static float_format_type ', + 'float_format': 'static float_format_type ', + 'detected_double_format': 'static float_format_type ', + 'detected_float_format': 'static float_format_type ', + # Parser/listnode.c + 'level': 'static int ', + 'atbol': 'static int ', + # Python/dtoa.c + 'private_mem': 'static double private_mem[PRIVATE_mem]', + 'pmem_next': 'static double *', + # Modules/_weakref.c + 'weakref_functions': 'static PyMethodDef ', +} +INLINE = { + # Modules/_tracemalloc.c + 'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ', + # Modules/faulthandler.c + 'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ', + 'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ', + # Modules/signalmodule.c + 'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]', + 'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ', + # Python/dynload_shlib.c + 'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]', + # Objects/obmalloc.c + '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ', + # Python/bootstrap_hash.c + 'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ', + } +FUNC = { + # Objects/object.c + '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)', + # Parser/myreadline.c + 'PyOS_InputHook': 'int (*PyOS_InputHook)(void)', + # Python/pylifecycle.c + '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)', + # Parser/myreadline.c + 'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)', + } +IMPLIED = { + # Objects/boolobject.c + '_Py_FalseStruct': 'static struct _longobject ', + '_Py_TrueStruct': 'static struct _longobject ', + # Modules/config.c + '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]', + } +GLOBALS = {} +GLOBALS.update(MULTILINE) +GLOBALS.update(INLINE) +GLOBALS.update(FUNC) +GLOBALS.update(IMPLIED) + +LOCALS = { + 'buildinfo': ('Modules/getbuildinfo.c', + 'Py_GetBuildInfo', + 'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'), + 'methods': ('Python/codecs.c', + '_PyCodecRegistry_Init', + 'static struct { char *name; PyMethodDef def; } methods[]'), + } + + +def _known(symbol): + if symbol.funcname: + if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN: + raise KeyError(symbol.name) + filename, funcname, decl = LOCALS[symbol.name] + varid = ID(filename, funcname, symbol.name) + elif not symbol.filename or symbol.filename == UNKNOWN: + raise KeyError(symbol.name) + else: + varid = symbol.id + try: + decl = GLOBALS[symbol.name] + except KeyError: + + if symbol.name.endswith('_methods'): + decl = 'static PyMethodDef ' + elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')): + decl = 'static PyTypeObject ' + else: + raise + if symbol.name not in decl: + decl = decl + symbol.name + return Variable(varid, 'static', decl) + + +def known_row(varid, decl): + return ( + varid.filename, + varid.funcname or '-', + varid.name, + 'variable', + decl, + ) + + +def known_rows(symbols, *, + cached=True, + _get_filenames=iter_cpython_files, + _find_match=find_matching_variable, + _find_symbols=find_variables, + _as_known=known_row, + ): + filenames = list(_get_filenames()) + cache = {} + if cached: + for symbol in symbols: + try: + found = _known(symbol) + except KeyError: + found = _find_match(symbol, cache, filenames) + if found is None: + found = Variable(symbol.id, UNKNOWN, UNKNOWN) + yield _as_known(found.id, found.vartype) + else: + raise NotImplementedError # XXX incorporate KNOWN + for variable in _find_symbols(symbols, filenames, + srccache=cache, + parse_variable=_parse_global, + ): + #variable = variable._replace( + # filename=os.path.relpath(variable.filename, REPO_ROOT)) + if variable.funcname == UNKNOWN: + print(variable) + if variable.vartype== UNKNOWN: + print(variable) + yield _as_known(variable.id, variable.vartype) + + +def generate(symbols, filename=None, *, + _generate_rows=known_rows, + _write_tsv=write_tsv, + ): + if not filename: + filename = KNOWN_FILE + '.new' + + rows = _generate_rows(symbols) + _write_tsv(filename, KNOWN_HEADER, rows) + + +if __name__ == '__main__': + from c_symbols import binary + symbols = binary.iter_symbols( + binary.PYTHON, + find_local_symbol=None, + ) + generate(symbols) |