summaryrefslogtreecommitdiff
path: root/Tools/c-analyzer/cpython/_generate.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/c-analyzer/cpython/_generate.py')
-rw-r--r--Tools/c-analyzer/cpython/_generate.py329
1 files changed, 329 insertions, 0 deletions
diff --git a/Tools/c-analyzer/cpython/_generate.py b/Tools/c-analyzer/cpython/_generate.py
new file mode 100644
index 0000000000..4c340acf99
--- /dev/null
+++ b/Tools/c-analyzer/cpython/_generate.py
@@ -0,0 +1,329 @@
+# The code here consists of hacks for pre-populating the known.tsv file.
+
+from c_analyzer.parser.preprocessor import _iter_clean_lines
+from c_analyzer.parser.naive import (
+ iter_variables, parse_variable_declaration, find_variables,
+ )
+from c_analyzer.common.known import HEADER as KNOWN_HEADER
+from c_analyzer.common.info import UNKNOWN, ID
+from c_analyzer.variables import Variable
+from c_analyzer.util import write_tsv
+
+from . import SOURCE_DIRS, REPO_ROOT
+from .known import DATA_FILE as KNOWN_FILE
+from .files import iter_cpython_files
+
+
+POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
+POTS += tuple('const ' + v for v in POTS)
+STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
+
+
+def _parse_global(line, funcname=None):
+ line = line.strip()
+ if line.startswith('static '):
+ if '(' in line and '[' not in line and ' = ' not in line:
+ return None, None
+ name, decl = parse_variable_declaration(line)
+ elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
+ name, decl = parse_variable_declaration(line)
+ elif line.startswith('_Py_static_string('):
+ decl = line.strip(';').strip()
+ name = line.split('(')[1].split(',')[0].strip()
+ elif line.startswith('_Py_IDENTIFIER('):
+ decl = line.strip(';').strip()
+ name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
+ elif funcname:
+ return None, None
+
+ # global-only
+ elif line.startswith('PyAPI_DATA('): # only in .h files
+ name, decl = parse_variable_declaration(line)
+ elif line.startswith('extern '): # only in .h files
+ name, decl = parse_variable_declaration(line)
+ elif line.startswith('PyDoc_VAR('):
+ decl = line.strip(';').strip()
+ name = line.split('(')[1].split(')')[0].strip()
+ elif line.startswith(POTS): # implied static
+ if '(' in line and '[' not in line and ' = ' not in line:
+ return None, None
+ name, decl = parse_variable_declaration(line)
+ elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static
+ name, decl = parse_variable_declaration(line)
+ elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static
+ name, decl = parse_variable_declaration(line)
+ elif line.startswith('struct '):
+ if not line.endswith(' = {'):
+ return None, None
+ if not line.partition(' ')[2].startswith(STRUCTS):
+ return None, None
+ # implied static
+ name, decl = parse_variable_declaration(line)
+
+ # file-specific
+ elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
+ # Objects/typeobject.c
+ funcname = line.split('(')[1].split(',')[0]
+ return [
+ ('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
+ ('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
+ ]
+ elif line.startswith('WRAP_METHOD('):
+ # Objects/weakrefobject.c
+ funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
+ return [
+ ('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
+ ]
+
+ else:
+ return None, None
+ return name, decl
+
+
+def _pop_cached(varcache, filename, funcname, name, *,
+ _iter_variables=iter_variables,
+ ):
+ # Look for the file.
+ try:
+ cached = varcache[filename]
+ except KeyError:
+ cached = varcache[filename] = {}
+ for variable in _iter_variables(filename,
+ parse_variable=_parse_global,
+ ):
+ variable._isglobal = True
+ cached[variable.id] = variable
+ for var in cached:
+ print(' ', var)
+
+ # Look for the variable.
+ if funcname == UNKNOWN:
+ for varid in cached:
+ if varid.name == name:
+ break
+ else:
+ return None
+ return cached.pop(varid)
+ else:
+ return cached.pop((filename, funcname, name), None)
+
+
+def find_matching_variable(varid, varcache, allfilenames, *,
+ _pop_cached=_pop_cached,
+ ):
+ if varid.filename and varid.filename != UNKNOWN:
+ filenames = [varid.filename]
+ else:
+ filenames = allfilenames
+ for filename in filenames:
+ variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
+ if variable is not None:
+ return variable
+ else:
+ if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
+ for filename in allfilenames:
+ if not filename.endswith('.h'):
+ continue
+ variable = _pop_cached(varcache, filename, None, varid.name)
+ if variable is not None:
+ return variable
+ return None
+
+
+MULTILINE = {
+ # Python/Python-ast.c
+ 'Load_singleton': 'PyObject *',
+ 'Store_singleton': 'PyObject *',
+ 'Del_singleton': 'PyObject *',
+ 'AugLoad_singleton': 'PyObject *',
+ 'AugStore_singleton': 'PyObject *',
+ 'Param_singleton': 'PyObject *',
+ 'And_singleton': 'PyObject *',
+ 'Or_singleton': 'PyObject *',
+ 'Add_singleton': 'static PyObject *',
+ 'Sub_singleton': 'static PyObject *',
+ 'Mult_singleton': 'static PyObject *',
+ 'MatMult_singleton': 'static PyObject *',
+ 'Div_singleton': 'static PyObject *',
+ 'Mod_singleton': 'static PyObject *',
+ 'Pow_singleton': 'static PyObject *',
+ 'LShift_singleton': 'static PyObject *',
+ 'RShift_singleton': 'static PyObject *',
+ 'BitOr_singleton': 'static PyObject *',
+ 'BitXor_singleton': 'static PyObject *',
+ 'BitAnd_singleton': 'static PyObject *',
+ 'FloorDiv_singleton': 'static PyObject *',
+ 'Invert_singleton': 'static PyObject *',
+ 'Not_singleton': 'static PyObject *',
+ 'UAdd_singleton': 'static PyObject *',
+ 'USub_singleton': 'static PyObject *',
+ 'Eq_singleton': 'static PyObject *',
+ 'NotEq_singleton': 'static PyObject *',
+ 'Lt_singleton': 'static PyObject *',
+ 'LtE_singleton': 'static PyObject *',
+ 'Gt_singleton': 'static PyObject *',
+ 'GtE_singleton': 'static PyObject *',
+ 'Is_singleton': 'static PyObject *',
+ 'IsNot_singleton': 'static PyObject *',
+ 'In_singleton': 'static PyObject *',
+ 'NotIn_singleton': 'static PyObject *',
+ # Python/symtable.c
+ 'top': 'static identifier ',
+ 'lambda': 'static identifier ',
+ 'genexpr': 'static identifier ',
+ 'listcomp': 'static identifier ',
+ 'setcomp': 'static identifier ',
+ 'dictcomp': 'static identifier ',
+ '__class__': 'static identifier ',
+ # Python/compile.c
+ '__doc__': 'static PyObject *',
+ '__annotations__': 'static PyObject *',
+ # Objects/floatobject.c
+ 'double_format': 'static float_format_type ',
+ 'float_format': 'static float_format_type ',
+ 'detected_double_format': 'static float_format_type ',
+ 'detected_float_format': 'static float_format_type ',
+ # Parser/listnode.c
+ 'level': 'static int ',
+ 'atbol': 'static int ',
+ # Python/dtoa.c
+ 'private_mem': 'static double private_mem[PRIVATE_mem]',
+ 'pmem_next': 'static double *',
+ # Modules/_weakref.c
+ 'weakref_functions': 'static PyMethodDef ',
+}
+INLINE = {
+ # Modules/_tracemalloc.c
+ 'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
+ # Modules/faulthandler.c
+ 'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
+ 'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
+ # Modules/signalmodule.c
+ 'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
+ 'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
+ # Python/dynload_shlib.c
+ 'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
+ # Objects/obmalloc.c
+ '_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
+ # Python/bootstrap_hash.c
+ 'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
+ }
+FUNC = {
+ # Objects/object.c
+ '_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
+ # Parser/myreadline.c
+ 'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
+ # Python/pylifecycle.c
+ '_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
+ # Parser/myreadline.c
+ 'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
+ }
+IMPLIED = {
+ # Objects/boolobject.c
+ '_Py_FalseStruct': 'static struct _longobject ',
+ '_Py_TrueStruct': 'static struct _longobject ',
+ # Modules/config.c
+ '_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
+ }
+GLOBALS = {}
+GLOBALS.update(MULTILINE)
+GLOBALS.update(INLINE)
+GLOBALS.update(FUNC)
+GLOBALS.update(IMPLIED)
+
+LOCALS = {
+ 'buildinfo': ('Modules/getbuildinfo.c',
+ 'Py_GetBuildInfo',
+ 'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'),
+ 'methods': ('Python/codecs.c',
+ '_PyCodecRegistry_Init',
+ 'static struct { char *name; PyMethodDef def; } methods[]'),
+ }
+
+
+def _known(symbol):
+ if symbol.funcname:
+ if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
+ raise KeyError(symbol.name)
+ filename, funcname, decl = LOCALS[symbol.name]
+ varid = ID(filename, funcname, symbol.name)
+ elif not symbol.filename or symbol.filename == UNKNOWN:
+ raise KeyError(symbol.name)
+ else:
+ varid = symbol.id
+ try:
+ decl = GLOBALS[symbol.name]
+ except KeyError:
+
+ if symbol.name.endswith('_methods'):
+ decl = 'static PyMethodDef '
+ elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
+ decl = 'static PyTypeObject '
+ else:
+ raise
+ if symbol.name not in decl:
+ decl = decl + symbol.name
+ return Variable(varid, 'static', decl)
+
+
+def known_row(varid, decl):
+ return (
+ varid.filename,
+ varid.funcname or '-',
+ varid.name,
+ 'variable',
+ decl,
+ )
+
+
+def known_rows(symbols, *,
+ cached=True,
+ _get_filenames=iter_cpython_files,
+ _find_match=find_matching_variable,
+ _find_symbols=find_variables,
+ _as_known=known_row,
+ ):
+ filenames = list(_get_filenames())
+ cache = {}
+ if cached:
+ for symbol in symbols:
+ try:
+ found = _known(symbol)
+ except KeyError:
+ found = _find_match(symbol, cache, filenames)
+ if found is None:
+ found = Variable(symbol.id, UNKNOWN, UNKNOWN)
+ yield _as_known(found.id, found.vartype)
+ else:
+ raise NotImplementedError # XXX incorporate KNOWN
+ for variable in _find_symbols(symbols, filenames,
+ srccache=cache,
+ parse_variable=_parse_global,
+ ):
+ #variable = variable._replace(
+ # filename=os.path.relpath(variable.filename, REPO_ROOT))
+ if variable.funcname == UNKNOWN:
+ print(variable)
+ if variable.vartype== UNKNOWN:
+ print(variable)
+ yield _as_known(variable.id, variable.vartype)
+
+
+def generate(symbols, filename=None, *,
+ _generate_rows=known_rows,
+ _write_tsv=write_tsv,
+ ):
+ if not filename:
+ filename = KNOWN_FILE + '.new'
+
+ rows = _generate_rows(symbols)
+ _write_tsv(filename, KNOWN_HEADER, rows)
+
+
+if __name__ == '__main__':
+ from c_symbols import binary
+ symbols = binary.iter_symbols(
+ binary.PYTHON,
+ find_local_symbol=None,
+ )
+ generate(symbols)