diff options
Diffstat (limited to 'Tools/c-analyzer/cpython')
-rw-r--r-- | Tools/c-analyzer/cpython/__main__.py | 48 | ||||
-rw-r--r-- | Tools/c-analyzer/cpython/_builtin_types.py | 365 | ||||
-rw-r--r-- | Tools/c-analyzer/cpython/globals-to-fix.tsv | 54 |
3 files changed, 448 insertions, 19 deletions
diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py index be331d5042..2b9e4233b9 100644 --- a/Tools/c-analyzer/cpython/__main__.py +++ b/Tools/c-analyzer/cpython/__main__.py @@ -20,7 +20,7 @@ import c_parser.__main__ as c_parser import c_analyzer.__main__ as c_analyzer import c_analyzer as _c_analyzer from c_analyzer.info import UNKNOWN -from . import _analyzer, _capi, _files, _parser, REPO_ROOT +from . import _analyzer, _builtin_types, _capi, _files, _parser, REPO_ROOT logger = logging.getLogger(__name__) @@ -325,6 +325,47 @@ def cmd_capi(filenames=None, *, print(line) +def _cli_builtin_types(parser): + parser.add_argument('--format', dest='fmt', default='table') +# parser.add_argument('--summary', dest='format', +# action='store_const', const='summary') + def process_format(args, *, argv=None): + orig = args.fmt + args.fmt = _builtin_types.resolve_format(args.fmt) + if isinstance(args.fmt, str): + if args.fmt not in _builtin_types._FORMATS: + parser.error(f'unsupported format {orig!r}') + + parser.add_argument('--include-modules', dest='showmodules', + action='store_true') + def process_modules(args, *, argv=None): + pass + + return [ + process_format, + process_modules, + ] + + +def cmd_builtin_types(fmt, *, + showmodules=False, + verbosity=VERBOSITY, + ): + render = _builtin_types.get_renderer(fmt) + types = _builtin_types.iter_builtin_types() + match = _builtin_types.resolve_matcher(showmodules) + if match: + types = (t for t in types if match(t, log=lambda msg: logger.log(1, msg))) + + lines = render( + types, +# verbose=verbosity > VERBOSITY, + ) + print() + for line in lines: + print(line) + + # We do not define any other cmd_*() handlers here, # favoring those defined elsewhere. @@ -354,6 +395,11 @@ COMMANDS = { [_cli_capi], cmd_capi, ), + 'builtin-types': ( + 'show the builtin types', + [_cli_builtin_types], + cmd_builtin_types, + ), } diff --git a/Tools/c-analyzer/cpython/_builtin_types.py b/Tools/c-analyzer/cpython/_builtin_types.py new file mode 100644 index 0000000000..faa0b7a82a --- /dev/null +++ b/Tools/c-analyzer/cpython/_builtin_types.py @@ -0,0 +1,365 @@ +from collections import namedtuple +import os.path +import re +import textwrap + +from c_common import tables +from . import REPO_ROOT +from ._files import iter_header_files, iter_filenames + + +CAPI_PREFIX = os.path.join('Include', '') +INTERNAL_PREFIX = os.path.join('Include', 'internal', '') + +REGEX = re.compile(textwrap.dedent(rf''' + (?: + ^ + (?: + (?: + (?: + (?: + (?: + ( static ) # <static> + \s+ + | + ( extern ) # <extern> + \s+ + )? + PyTypeObject \s+ + ) + | + (?: + ( PyAPI_DATA ) # <capi> + \s* [(] \s* PyTypeObject \s* [)] \s* + ) + ) + (\w+) # <name> + \s* + (?: + (?: + ( = \s* {{ ) # <def> + $ + ) + | + ( ; ) # <decl> + ) + ) + | + (?: + # These are specific to Objects/exceptions.c: + (?: + SimpleExtendsException + | + MiddlingExtendsException + | + ComplexExtendsException + ) + \( \w+ \s* , \s* + ( \w+ ) # <excname> + \s* , + ) + ) + ) +'''), re.VERBOSE) + + +def _parse_line(line): + m = re.match(REGEX, line) + if not m: + return None + (static, extern, capi, + name, + def_, decl, + excname, + ) = m.groups() + if def_: + isdecl = False + if extern or capi: + raise NotImplementedError(line) + kind = 'static' if static else None + elif excname: + name = f'_PyExc_{excname}' + isdecl = False + kind = 'static' + else: + isdecl = True + if static: + kind = 'static' + elif extern: + kind = 'extern' + elif capi: + kind = 'capi' + else: + kind = None + return name, isdecl, kind + + +class BuiltinTypeDecl(namedtuple('BuiltinTypeDecl', 'file lno name kind')): + + KINDS = { + 'static', + 'extern', + 'capi', + 'forward', + } + + @classmethod + def from_line(cls, line, filename, lno): + # This is similar to ._capi.CAPIItem.from_line(). + parsed = _parse_line(line) + if not parsed: + return None + name, isdecl, kind = parsed + if not isdecl: + return None + return cls.from_parsed(name, kind, filename, lno) + + @classmethod + def from_parsed(cls, name, kind, filename, lno): + if not kind: + kind = 'forward' + return cls.from_values(filename, lno, name, kind) + + @classmethod + def from_values(cls, filename, lno, name, kind): + if kind not in cls.KINDS: + raise ValueError(f'unsupported kind {kind!r}') + self = cls(filename, lno, name, kind) + if self.kind not in ('extern', 'capi') and self.api: + raise NotImplementedError(self) + elif self.kind == 'capi' and not self.api: + raise NotImplementedError(self) + return self + + @property + def relfile(self): + return self.file[len(REPO_ROOT) + 1:] + + @property + def api(self): + return self.relfile.startswith(CAPI_PREFIX) + + @property + def internal(self): + return self.relfile.startswith(INTERNAL_PREFIX) + + @property + def private(self): + if not self.name.startswith('_'): + return False + return self.api and not self.internal + + @property + def public(self): + if self.kind != 'capi': + return False + return not self.internal and not self.private + + +class BuiltinTypeInfo(namedtuple('BuiltinTypeInfo', 'file lno name static decl')): + + @classmethod + def from_line(cls, line, filename, lno, *, decls=None): + parsed = _parse_line(line) + if not parsed: + return None + name, isdecl, kind = parsed + if isdecl: + return None + return cls.from_parsed(name, kind, filename, lno, decls=decls) + + @classmethod + def from_parsed(cls, name, kind, filename, lno, *, decls=None): + if not kind: + static = False + elif kind == 'static': + static = True + else: + raise NotImplementedError((filename, line, kind)) + decl = decls.get(name) if decls else None + return cls(filename, lno, name, static, decl) + + @property + def relfile(self): + return self.file[len(REPO_ROOT) + 1:] + + @property + def exported(self): + return not self.static + + @property + def api(self): + if not self.decl: + return False + return self.decl.api + + @property + def internal(self): + if not self.decl: + return False + return self.decl.internal + + @property + def private(self): + if not self.decl: + return False + return self.decl.private + + @property + def public(self): + if not self.decl: + return False + return self.decl.public + + @property + def inmodule(self): + return self.relfile.startswith('Modules' + os.path.sep) + + def render_rowvalues(self, kinds): + row = { + 'name': self.name, + **{k: '' for k in kinds}, + 'filename': f'{self.relfile}:{self.lno}', + } + if self.static: + kind = 'static' + else: + if self.internal: + kind = 'internal' + elif self.private: + kind = 'private' + elif self.public: + kind = 'public' + else: + kind = 'global' + row['kind'] = kind + row[kind] = kind + return row + + +def _ensure_decl(decl, decls): + prev = decls.get(decl.name) + if prev: + if decl.kind == 'forward': + return None + if prev.kind != 'forward': + if decl.kind == prev.kind and decl.file == prev.file: + assert decl.lno != prev.lno, (decl, prev) + return None + raise NotImplementedError(f'duplicate {decl} (was {prev}') + decls[decl.name] = decl + + +def iter_builtin_types(filenames=None): + decls = {} + seen = set() + for filename in iter_header_files(): + seen.add(filename) + with open(filename) as infile: + for lno, line in enumerate(infile, 1): + decl = BuiltinTypeDecl.from_line(line, filename, lno) + if not decl: + continue + _ensure_decl(decl, decls) + srcfiles = [] + for filename in iter_filenames(): + if filename.endswith('.c'): + srcfiles.append(filename) + continue + if filename in seen: + continue + with open(filename) as infile: + for lno, line in enumerate(infile, 1): + decl = BuiltinTypeDecl.from_line(line, filename, lno) + if not decl: + continue + _ensure_decl(decl, decls) + + for filename in srcfiles: + with open(filename) as infile: + localdecls = {} + for lno, line in enumerate(infile, 1): + parsed = _parse_line(line) + if not parsed: + continue + name, isdecl, kind = parsed + if isdecl: + decl = BuiltinTypeDecl.from_parsed(name, kind, filename, lno) + if not decl: + raise NotImplementedError((filename, line)) + _ensure_decl(decl, localdecls) + else: + builtin = BuiltinTypeInfo.from_parsed( + name, kind, filename, lno, + decls=decls if name in decls else localdecls) + if not builtin: + raise NotImplementedError((filename, line)) + yield builtin + + +def resolve_matcher(showmodules=False): + def match(info, *, log=None): + if not info.inmodule: + return True + if log is not None: + log(f'ignored {info.name!r}') + return False + return match + + +################################## +# CLI rendering + +def resolve_format(fmt): + if not fmt: + return 'table' + elif isinstance(fmt, str) and fmt in _FORMATS: + return fmt + else: + raise NotImplementedError(fmt) + + +def get_renderer(fmt): + fmt = resolve_format(fmt) + if isinstance(fmt, str): + try: + return _FORMATS[fmt] + except KeyError: + raise ValueError(f'unsupported format {fmt!r}') + else: + raise NotImplementedError(fmt) + + +def render_table(types): + types = sorted(types, key=(lambda t: t.name)) + colspecs = tables.resolve_columns( + 'name:<33 static:^ global:^ internal:^ private:^ public:^ filename:<30') + header, div, rowfmt = tables.build_table(colspecs) + leader = ' ' * sum(c.width+2 for c in colspecs[:3]) + ' ' + yield leader + f'{"API":^29}' + yield leader + '-' * 29 + yield header + yield div + kinds = [c[0] for c in colspecs[1:-1]] + counts = {k: 0 for k in kinds} + base = {k: '' for k in kinds} + for t in types: + row = t.render_rowvalues(kinds) + kind = row['kind'] + yield rowfmt.format(**row) + counts[kind] += 1 + yield '' + yield f'total: {sum(counts.values()):>3}' + for kind in kinds: + yield f' {kind:>10}: {counts[kind]:>3}' + + +def render_repr(types): + for t in types: + yield repr(t) + + +_FORMATS = { + 'table': render_table, + 'repr': render_repr, +} diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index c92f64df1c..bc9d6aabb0 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -7,7 +7,7 @@ filename funcname name reason # global objects to fix in core code #----------------------- -# static types +# exported builtin types (C-API) Objects/boolobject.c - PyBool_Type - Objects/bytearrayobject.c - PyByteArrayIter_Type - @@ -18,8 +18,6 @@ Objects/capsule.c - PyCapsule_Type - Objects/cellobject.c - PyCell_Type - Objects/classobject.c - PyInstanceMethod_Type - Objects/classobject.c - PyMethod_Type - -Objects/codeobject.c - _PyLineIterator - -Objects/codeobject.c - _PyPositionsIterator - Objects/codeobject.c - PyCode_Type - Objects/complexobject.c - PyComplex_Type - Objects/descrobject.c - PyClassMethodDescr_Type - @@ -42,16 +40,12 @@ Objects/dictobject.c - PyDictValues_Type - Objects/dictobject.c - PyDict_Type - Objects/enumobject.c - PyEnum_Type - Objects/enumobject.c - PyReversed_Type - -Objects/exceptions.c - _PyExc_BaseExceptionGroup - -Objects/exceptions.c - _PyExc_EncodingWarning - Objects/fileobject.c - PyStdPrinter_Type - -Objects/floatobject.c - FloatInfoType - Objects/floatobject.c - PyFloat_Type - Objects/frameobject.c - PyFrame_Type - Objects/funcobject.c - PyClassMethod_Type - Objects/funcobject.c - PyFunction_Type - Objects/funcobject.c - PyStaticMethod_Type - -Objects/genericaliasobject.c - _Py_GenericAliasIterType - Objects/genericaliasobject.c - Py_GenericAliasType - Objects/genobject.c - PyAsyncGen_Type - Objects/genobject.c - PyCoro_Type - @@ -63,13 +57,10 @@ Objects/genobject.c - _PyCoroWrapper_Type - Objects/interpreteridobject.c - _PyInterpreterID_Type - Objects/iterobject.c - PyCallIter_Type - Objects/iterobject.c - PySeqIter_Type - -Objects/iterobject.c - _PyAnextAwaitable_Type - Objects/listobject.c - PyListIter_Type - Objects/listobject.c - PyListRevIter_Type - Objects/listobject.c - PyList_Type - -Objects/longobject.c - Int_InfoType - Objects/longobject.c - PyLong_Type - -Objects/memoryobject.c - _PyMemoryIter_Type - Objects/memoryobject.c - PyMemoryView_Type - Objects/memoryobject.c - _PyManagedBuffer_Type - Objects/methodobject.c - PyCFunction_Type - @@ -91,7 +82,6 @@ Objects/rangeobject.c - PyRange_Type - Objects/setobject.c - PyFrozenSet_Type - Objects/setobject.c - PySetIter_Type - Objects/setobject.c - PySet_Type - -Objects/setobject.c - _PySetDummy_Type - Objects/sliceobject.c - PyEllipsis_Type - Objects/sliceobject.c - PySlice_Type - Objects/tupleobject.c - PyTupleIter_Type - @@ -99,11 +89,8 @@ Objects/tupleobject.c - PyTuple_Type - Objects/typeobject.c - PyBaseObject_Type - Objects/typeobject.c - PySuper_Type - Objects/typeobject.c - PyType_Type - -Objects/unicodeobject.c - EncodingMapType - Objects/unicodeobject.c - PyUnicodeIter_Type - Objects/unicodeobject.c - PyUnicode_Type - -Objects/unionobject.c - _PyUnion_Type - -Objects/unionobject.c - _Py_UnionType - Objects/weakrefobject.c - _PyWeakref_CallableProxyType - Objects/weakrefobject.c - _PyWeakref_ProxyType - Objects/weakrefobject.c - _PyWeakref_RefType - @@ -113,8 +100,23 @@ Python/bltinmodule.c - PyZip_Type - Python/context.c - PyContextToken_Type - Python/context.c - PyContextVar_Type - Python/context.c - PyContext_Type - +Python/traceback.c - PyTraceBack_Type - + +#----------------------- +# other exported builtin types + +# Not in a .h file: +Objects/codeobject.c - _PyLineIterator - +# Not in a .h file: +Objects/codeobject.c - _PyPositionsIterator - +Objects/genericaliasobject.c - _Py_GenericAliasIterType - +# Not in a .h file: +Objects/iterobject.c - _PyAnextAwaitable_Type - +# Not in a .h file: +Objects/memoryobject.c - _PyMemoryIter_Type - +#Objects/unicodeobject.c - _PyUnicodeASCIIIter_Type - +Objects/unionobject.c - _PyUnion_Type - Python/context.c - _PyContextTokenMissing_Type - -Python/errors.c - UnraisableHookArgsType - Python/hamt.c - _PyHamtItems_Type - Python/hamt.c - _PyHamtKeys_Type - Python/hamt.c - _PyHamtValues_Type - @@ -123,17 +125,32 @@ Python/hamt.c - _PyHamt_BitmapNode_Type - Python/hamt.c - _PyHamt_CollisionNode_Type - Python/hamt.c - _PyHamt_Type - Python/symtable.c - PySTEntry_Type - + +#----------------------- +# private static builtin types + +Objects/setobject.c - _PySetDummy_Type - +Objects/unicodeobject.c - EncodingMapType - +#Objects/unicodeobject.c - PyFieldNameIter_Type - +#Objects/unicodeobject.c - PyFormatterIter_Type - + +#----------------------- +# static builtin structseq + +Objects/floatobject.c - FloatInfoType - +Objects/longobject.c - Int_InfoType - +Python/errors.c - UnraisableHookArgsType - Python/sysmodule.c - AsyncGenHooksType - Python/sysmodule.c - FlagsType - Python/sysmodule.c - Hash_InfoType - Python/sysmodule.c - VersionInfoType - Python/thread.c - ThreadInfoType - -Python/traceback.c - PyTraceBack_Type - #----------------------- # builtin exception types Objects/exceptions.c - _PyExc_BaseException - +Objects/exceptions.c - _PyExc_BaseExceptionGroup - Objects/exceptions.c - _PyExc_UnicodeEncodeError - Objects/exceptions.c - _PyExc_UnicodeDecodeError - Objects/exceptions.c - _PyExc_UnicodeTranslateError - @@ -197,9 +214,11 @@ Objects/exceptions.c - _PyExc_ImportWarning - Objects/exceptions.c - _PyExc_UnicodeWarning - Objects/exceptions.c - _PyExc_BytesWarning - Objects/exceptions.c - _PyExc_ResourceWarning - +Objects/exceptions.c - _PyExc_EncodingWarning - Objects/exceptions.c - PyExc_EnvironmentError - Objects/exceptions.c - PyExc_IOError - Objects/exceptions.c - PyExc_BaseException - +Objects/exceptions.c - PyExc_BaseExceptionGroup - Objects/exceptions.c - PyExc_Exception - Objects/exceptions.c - PyExc_TypeError - Objects/exceptions.c - PyExc_StopAsyncIteration - @@ -263,6 +282,7 @@ Objects/exceptions.c - PyExc_ImportWarning - Objects/exceptions.c - PyExc_UnicodeWarning - Objects/exceptions.c - PyExc_BytesWarning - Objects/exceptions.c - PyExc_ResourceWarning - +Objects/exceptions.c - PyExc_EncodingWarning - #----------------------- # singletons @@ -354,8 +374,6 @@ Objects/unicodeobject.c - static_strings - # other # initialized once -Objects/exceptions.c - PyExc_BaseExceptionGroup - -Objects/exceptions.c - PyExc_EncodingWarning - # XXX This should have been found by the analyzer but wasn't: Python/context.c - _token_missing - # XXX This should have been found by the analyzer but wasn't: |