summaryrefslogtreecommitdiff
path: root/Tools/c-analyzer/cpython
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/c-analyzer/cpython')
-rw-r--r--Tools/c-analyzer/cpython/__main__.py48
-rw-r--r--Tools/c-analyzer/cpython/_builtin_types.py365
-rw-r--r--Tools/c-analyzer/cpython/globals-to-fix.tsv54
3 files changed, 448 insertions, 19 deletions
diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py
index be331d5042..2b9e4233b9 100644
--- a/Tools/c-analyzer/cpython/__main__.py
+++ b/Tools/c-analyzer/cpython/__main__.py
@@ -20,7 +20,7 @@ import c_parser.__main__ as c_parser
import c_analyzer.__main__ as c_analyzer
import c_analyzer as _c_analyzer
from c_analyzer.info import UNKNOWN
-from . import _analyzer, _capi, _files, _parser, REPO_ROOT
+from . import _analyzer, _builtin_types, _capi, _files, _parser, REPO_ROOT
logger = logging.getLogger(__name__)
@@ -325,6 +325,47 @@ def cmd_capi(filenames=None, *,
print(line)
+def _cli_builtin_types(parser):
+ parser.add_argument('--format', dest='fmt', default='table')
+# parser.add_argument('--summary', dest='format',
+# action='store_const', const='summary')
+ def process_format(args, *, argv=None):
+ orig = args.fmt
+ args.fmt = _builtin_types.resolve_format(args.fmt)
+ if isinstance(args.fmt, str):
+ if args.fmt not in _builtin_types._FORMATS:
+ parser.error(f'unsupported format {orig!r}')
+
+ parser.add_argument('--include-modules', dest='showmodules',
+ action='store_true')
+ def process_modules(args, *, argv=None):
+ pass
+
+ return [
+ process_format,
+ process_modules,
+ ]
+
+
+def cmd_builtin_types(fmt, *,
+ showmodules=False,
+ verbosity=VERBOSITY,
+ ):
+ render = _builtin_types.get_renderer(fmt)
+ types = _builtin_types.iter_builtin_types()
+ match = _builtin_types.resolve_matcher(showmodules)
+ if match:
+ types = (t for t in types if match(t, log=lambda msg: logger.log(1, msg)))
+
+ lines = render(
+ types,
+# verbose=verbosity > VERBOSITY,
+ )
+ print()
+ for line in lines:
+ print(line)
+
+
# We do not define any other cmd_*() handlers here,
# favoring those defined elsewhere.
@@ -354,6 +395,11 @@ COMMANDS = {
[_cli_capi],
cmd_capi,
),
+ 'builtin-types': (
+ 'show the builtin types',
+ [_cli_builtin_types],
+ cmd_builtin_types,
+ ),
}
diff --git a/Tools/c-analyzer/cpython/_builtin_types.py b/Tools/c-analyzer/cpython/_builtin_types.py
new file mode 100644
index 0000000000..faa0b7a82a
--- /dev/null
+++ b/Tools/c-analyzer/cpython/_builtin_types.py
@@ -0,0 +1,365 @@
+from collections import namedtuple
+import os.path
+import re
+import textwrap
+
+from c_common import tables
+from . import REPO_ROOT
+from ._files import iter_header_files, iter_filenames
+
+
+CAPI_PREFIX = os.path.join('Include', '')
+INTERNAL_PREFIX = os.path.join('Include', 'internal', '')
+
+REGEX = re.compile(textwrap.dedent(rf'''
+ (?:
+ ^
+ (?:
+ (?:
+ (?:
+ (?:
+ (?:
+ ( static ) # <static>
+ \s+
+ |
+ ( extern ) # <extern>
+ \s+
+ )?
+ PyTypeObject \s+
+ )
+ |
+ (?:
+ ( PyAPI_DATA ) # <capi>
+ \s* [(] \s* PyTypeObject \s* [)] \s*
+ )
+ )
+ (\w+) # <name>
+ \s*
+ (?:
+ (?:
+ ( = \s* {{ ) # <def>
+ $
+ )
+ |
+ ( ; ) # <decl>
+ )
+ )
+ |
+ (?:
+ # These are specific to Objects/exceptions.c:
+ (?:
+ SimpleExtendsException
+ |
+ MiddlingExtendsException
+ |
+ ComplexExtendsException
+ )
+ \( \w+ \s* , \s*
+ ( \w+ ) # <excname>
+ \s* ,
+ )
+ )
+ )
+'''), re.VERBOSE)
+
+
+def _parse_line(line):
+ m = re.match(REGEX, line)
+ if not m:
+ return None
+ (static, extern, capi,
+ name,
+ def_, decl,
+ excname,
+ ) = m.groups()
+ if def_:
+ isdecl = False
+ if extern or capi:
+ raise NotImplementedError(line)
+ kind = 'static' if static else None
+ elif excname:
+ name = f'_PyExc_{excname}'
+ isdecl = False
+ kind = 'static'
+ else:
+ isdecl = True
+ if static:
+ kind = 'static'
+ elif extern:
+ kind = 'extern'
+ elif capi:
+ kind = 'capi'
+ else:
+ kind = None
+ return name, isdecl, kind
+
+
+class BuiltinTypeDecl(namedtuple('BuiltinTypeDecl', 'file lno name kind')):
+
+ KINDS = {
+ 'static',
+ 'extern',
+ 'capi',
+ 'forward',
+ }
+
+ @classmethod
+ def from_line(cls, line, filename, lno):
+ # This is similar to ._capi.CAPIItem.from_line().
+ parsed = _parse_line(line)
+ if not parsed:
+ return None
+ name, isdecl, kind = parsed
+ if not isdecl:
+ return None
+ return cls.from_parsed(name, kind, filename, lno)
+
+ @classmethod
+ def from_parsed(cls, name, kind, filename, lno):
+ if not kind:
+ kind = 'forward'
+ return cls.from_values(filename, lno, name, kind)
+
+ @classmethod
+ def from_values(cls, filename, lno, name, kind):
+ if kind not in cls.KINDS:
+ raise ValueError(f'unsupported kind {kind!r}')
+ self = cls(filename, lno, name, kind)
+ if self.kind not in ('extern', 'capi') and self.api:
+ raise NotImplementedError(self)
+ elif self.kind == 'capi' and not self.api:
+ raise NotImplementedError(self)
+ return self
+
+ @property
+ def relfile(self):
+ return self.file[len(REPO_ROOT) + 1:]
+
+ @property
+ def api(self):
+ return self.relfile.startswith(CAPI_PREFIX)
+
+ @property
+ def internal(self):
+ return self.relfile.startswith(INTERNAL_PREFIX)
+
+ @property
+ def private(self):
+ if not self.name.startswith('_'):
+ return False
+ return self.api and not self.internal
+
+ @property
+ def public(self):
+ if self.kind != 'capi':
+ return False
+ return not self.internal and not self.private
+
+
+class BuiltinTypeInfo(namedtuple('BuiltinTypeInfo', 'file lno name static decl')):
+
+ @classmethod
+ def from_line(cls, line, filename, lno, *, decls=None):
+ parsed = _parse_line(line)
+ if not parsed:
+ return None
+ name, isdecl, kind = parsed
+ if isdecl:
+ return None
+ return cls.from_parsed(name, kind, filename, lno, decls=decls)
+
+ @classmethod
+ def from_parsed(cls, name, kind, filename, lno, *, decls=None):
+ if not kind:
+ static = False
+ elif kind == 'static':
+ static = True
+ else:
+ raise NotImplementedError((filename, line, kind))
+ decl = decls.get(name) if decls else None
+ return cls(filename, lno, name, static, decl)
+
+ @property
+ def relfile(self):
+ return self.file[len(REPO_ROOT) + 1:]
+
+ @property
+ def exported(self):
+ return not self.static
+
+ @property
+ def api(self):
+ if not self.decl:
+ return False
+ return self.decl.api
+
+ @property
+ def internal(self):
+ if not self.decl:
+ return False
+ return self.decl.internal
+
+ @property
+ def private(self):
+ if not self.decl:
+ return False
+ return self.decl.private
+
+ @property
+ def public(self):
+ if not self.decl:
+ return False
+ return self.decl.public
+
+ @property
+ def inmodule(self):
+ return self.relfile.startswith('Modules' + os.path.sep)
+
+ def render_rowvalues(self, kinds):
+ row = {
+ 'name': self.name,
+ **{k: '' for k in kinds},
+ 'filename': f'{self.relfile}:{self.lno}',
+ }
+ if self.static:
+ kind = 'static'
+ else:
+ if self.internal:
+ kind = 'internal'
+ elif self.private:
+ kind = 'private'
+ elif self.public:
+ kind = 'public'
+ else:
+ kind = 'global'
+ row['kind'] = kind
+ row[kind] = kind
+ return row
+
+
+def _ensure_decl(decl, decls):
+ prev = decls.get(decl.name)
+ if prev:
+ if decl.kind == 'forward':
+ return None
+ if prev.kind != 'forward':
+ if decl.kind == prev.kind and decl.file == prev.file:
+ assert decl.lno != prev.lno, (decl, prev)
+ return None
+ raise NotImplementedError(f'duplicate {decl} (was {prev}')
+ decls[decl.name] = decl
+
+
+def iter_builtin_types(filenames=None):
+ decls = {}
+ seen = set()
+ for filename in iter_header_files():
+ seen.add(filename)
+ with open(filename) as infile:
+ for lno, line in enumerate(infile, 1):
+ decl = BuiltinTypeDecl.from_line(line, filename, lno)
+ if not decl:
+ continue
+ _ensure_decl(decl, decls)
+ srcfiles = []
+ for filename in iter_filenames():
+ if filename.endswith('.c'):
+ srcfiles.append(filename)
+ continue
+ if filename in seen:
+ continue
+ with open(filename) as infile:
+ for lno, line in enumerate(infile, 1):
+ decl = BuiltinTypeDecl.from_line(line, filename, lno)
+ if not decl:
+ continue
+ _ensure_decl(decl, decls)
+
+ for filename in srcfiles:
+ with open(filename) as infile:
+ localdecls = {}
+ for lno, line in enumerate(infile, 1):
+ parsed = _parse_line(line)
+ if not parsed:
+ continue
+ name, isdecl, kind = parsed
+ if isdecl:
+ decl = BuiltinTypeDecl.from_parsed(name, kind, filename, lno)
+ if not decl:
+ raise NotImplementedError((filename, line))
+ _ensure_decl(decl, localdecls)
+ else:
+ builtin = BuiltinTypeInfo.from_parsed(
+ name, kind, filename, lno,
+ decls=decls if name in decls else localdecls)
+ if not builtin:
+ raise NotImplementedError((filename, line))
+ yield builtin
+
+
+def resolve_matcher(showmodules=False):
+ def match(info, *, log=None):
+ if not info.inmodule:
+ return True
+ if log is not None:
+ log(f'ignored {info.name!r}')
+ return False
+ return match
+
+
+##################################
+# CLI rendering
+
+def resolve_format(fmt):
+ if not fmt:
+ return 'table'
+ elif isinstance(fmt, str) and fmt in _FORMATS:
+ return fmt
+ else:
+ raise NotImplementedError(fmt)
+
+
+def get_renderer(fmt):
+ fmt = resolve_format(fmt)
+ if isinstance(fmt, str):
+ try:
+ return _FORMATS[fmt]
+ except KeyError:
+ raise ValueError(f'unsupported format {fmt!r}')
+ else:
+ raise NotImplementedError(fmt)
+
+
+def render_table(types):
+ types = sorted(types, key=(lambda t: t.name))
+ colspecs = tables.resolve_columns(
+ 'name:<33 static:^ global:^ internal:^ private:^ public:^ filename:<30')
+ header, div, rowfmt = tables.build_table(colspecs)
+ leader = ' ' * sum(c.width+2 for c in colspecs[:3]) + ' '
+ yield leader + f'{"API":^29}'
+ yield leader + '-' * 29
+ yield header
+ yield div
+ kinds = [c[0] for c in colspecs[1:-1]]
+ counts = {k: 0 for k in kinds}
+ base = {k: '' for k in kinds}
+ for t in types:
+ row = t.render_rowvalues(kinds)
+ kind = row['kind']
+ yield rowfmt.format(**row)
+ counts[kind] += 1
+ yield ''
+ yield f'total: {sum(counts.values()):>3}'
+ for kind in kinds:
+ yield f' {kind:>10}: {counts[kind]:>3}'
+
+
+def render_repr(types):
+ for t in types:
+ yield repr(t)
+
+
+_FORMATS = {
+ 'table': render_table,
+ 'repr': render_repr,
+}
diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv
index c92f64df1c..bc9d6aabb0 100644
--- a/Tools/c-analyzer/cpython/globals-to-fix.tsv
+++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv
@@ -7,7 +7,7 @@ filename funcname name reason
# global objects to fix in core code
#-----------------------
-# static types
+# exported builtin types (C-API)
Objects/boolobject.c - PyBool_Type -
Objects/bytearrayobject.c - PyByteArrayIter_Type -
@@ -18,8 +18,6 @@ Objects/capsule.c - PyCapsule_Type -
Objects/cellobject.c - PyCell_Type -
Objects/classobject.c - PyInstanceMethod_Type -
Objects/classobject.c - PyMethod_Type -
-Objects/codeobject.c - _PyLineIterator -
-Objects/codeobject.c - _PyPositionsIterator -
Objects/codeobject.c - PyCode_Type -
Objects/complexobject.c - PyComplex_Type -
Objects/descrobject.c - PyClassMethodDescr_Type -
@@ -42,16 +40,12 @@ Objects/dictobject.c - PyDictValues_Type -
Objects/dictobject.c - PyDict_Type -
Objects/enumobject.c - PyEnum_Type -
Objects/enumobject.c - PyReversed_Type -
-Objects/exceptions.c - _PyExc_BaseExceptionGroup -
-Objects/exceptions.c - _PyExc_EncodingWarning -
Objects/fileobject.c - PyStdPrinter_Type -
-Objects/floatobject.c - FloatInfoType -
Objects/floatobject.c - PyFloat_Type -
Objects/frameobject.c - PyFrame_Type -
Objects/funcobject.c - PyClassMethod_Type -
Objects/funcobject.c - PyFunction_Type -
Objects/funcobject.c - PyStaticMethod_Type -
-Objects/genericaliasobject.c - _Py_GenericAliasIterType -
Objects/genericaliasobject.c - Py_GenericAliasType -
Objects/genobject.c - PyAsyncGen_Type -
Objects/genobject.c - PyCoro_Type -
@@ -63,13 +57,10 @@ Objects/genobject.c - _PyCoroWrapper_Type -
Objects/interpreteridobject.c - _PyInterpreterID_Type -
Objects/iterobject.c - PyCallIter_Type -
Objects/iterobject.c - PySeqIter_Type -
-Objects/iterobject.c - _PyAnextAwaitable_Type -
Objects/listobject.c - PyListIter_Type -
Objects/listobject.c - PyListRevIter_Type -
Objects/listobject.c - PyList_Type -
-Objects/longobject.c - Int_InfoType -
Objects/longobject.c - PyLong_Type -
-Objects/memoryobject.c - _PyMemoryIter_Type -
Objects/memoryobject.c - PyMemoryView_Type -
Objects/memoryobject.c - _PyManagedBuffer_Type -
Objects/methodobject.c - PyCFunction_Type -
@@ -91,7 +82,6 @@ Objects/rangeobject.c - PyRange_Type -
Objects/setobject.c - PyFrozenSet_Type -
Objects/setobject.c - PySetIter_Type -
Objects/setobject.c - PySet_Type -
-Objects/setobject.c - _PySetDummy_Type -
Objects/sliceobject.c - PyEllipsis_Type -
Objects/sliceobject.c - PySlice_Type -
Objects/tupleobject.c - PyTupleIter_Type -
@@ -99,11 +89,8 @@ Objects/tupleobject.c - PyTuple_Type -
Objects/typeobject.c - PyBaseObject_Type -
Objects/typeobject.c - PySuper_Type -
Objects/typeobject.c - PyType_Type -
-Objects/unicodeobject.c - EncodingMapType -
Objects/unicodeobject.c - PyUnicodeIter_Type -
Objects/unicodeobject.c - PyUnicode_Type -
-Objects/unionobject.c - _PyUnion_Type -
-Objects/unionobject.c - _Py_UnionType -
Objects/weakrefobject.c - _PyWeakref_CallableProxyType -
Objects/weakrefobject.c - _PyWeakref_ProxyType -
Objects/weakrefobject.c - _PyWeakref_RefType -
@@ -113,8 +100,23 @@ Python/bltinmodule.c - PyZip_Type -
Python/context.c - PyContextToken_Type -
Python/context.c - PyContextVar_Type -
Python/context.c - PyContext_Type -
+Python/traceback.c - PyTraceBack_Type -
+
+#-----------------------
+# other exported builtin types
+
+# Not in a .h file:
+Objects/codeobject.c - _PyLineIterator -
+# Not in a .h file:
+Objects/codeobject.c - _PyPositionsIterator -
+Objects/genericaliasobject.c - _Py_GenericAliasIterType -
+# Not in a .h file:
+Objects/iterobject.c - _PyAnextAwaitable_Type -
+# Not in a .h file:
+Objects/memoryobject.c - _PyMemoryIter_Type -
+#Objects/unicodeobject.c - _PyUnicodeASCIIIter_Type -
+Objects/unionobject.c - _PyUnion_Type -
Python/context.c - _PyContextTokenMissing_Type -
-Python/errors.c - UnraisableHookArgsType -
Python/hamt.c - _PyHamtItems_Type -
Python/hamt.c - _PyHamtKeys_Type -
Python/hamt.c - _PyHamtValues_Type -
@@ -123,17 +125,32 @@ Python/hamt.c - _PyHamt_BitmapNode_Type -
Python/hamt.c - _PyHamt_CollisionNode_Type -
Python/hamt.c - _PyHamt_Type -
Python/symtable.c - PySTEntry_Type -
+
+#-----------------------
+# private static builtin types
+
+Objects/setobject.c - _PySetDummy_Type -
+Objects/unicodeobject.c - EncodingMapType -
+#Objects/unicodeobject.c - PyFieldNameIter_Type -
+#Objects/unicodeobject.c - PyFormatterIter_Type -
+
+#-----------------------
+# static builtin structseq
+
+Objects/floatobject.c - FloatInfoType -
+Objects/longobject.c - Int_InfoType -
+Python/errors.c - UnraisableHookArgsType -
Python/sysmodule.c - AsyncGenHooksType -
Python/sysmodule.c - FlagsType -
Python/sysmodule.c - Hash_InfoType -
Python/sysmodule.c - VersionInfoType -
Python/thread.c - ThreadInfoType -
-Python/traceback.c - PyTraceBack_Type -
#-----------------------
# builtin exception types
Objects/exceptions.c - _PyExc_BaseException -
+Objects/exceptions.c - _PyExc_BaseExceptionGroup -
Objects/exceptions.c - _PyExc_UnicodeEncodeError -
Objects/exceptions.c - _PyExc_UnicodeDecodeError -
Objects/exceptions.c - _PyExc_UnicodeTranslateError -
@@ -197,9 +214,11 @@ Objects/exceptions.c - _PyExc_ImportWarning -
Objects/exceptions.c - _PyExc_UnicodeWarning -
Objects/exceptions.c - _PyExc_BytesWarning -
Objects/exceptions.c - _PyExc_ResourceWarning -
+Objects/exceptions.c - _PyExc_EncodingWarning -
Objects/exceptions.c - PyExc_EnvironmentError -
Objects/exceptions.c - PyExc_IOError -
Objects/exceptions.c - PyExc_BaseException -
+Objects/exceptions.c - PyExc_BaseExceptionGroup -
Objects/exceptions.c - PyExc_Exception -
Objects/exceptions.c - PyExc_TypeError -
Objects/exceptions.c - PyExc_StopAsyncIteration -
@@ -263,6 +282,7 @@ Objects/exceptions.c - PyExc_ImportWarning -
Objects/exceptions.c - PyExc_UnicodeWarning -
Objects/exceptions.c - PyExc_BytesWarning -
Objects/exceptions.c - PyExc_ResourceWarning -
+Objects/exceptions.c - PyExc_EncodingWarning -
#-----------------------
# singletons
@@ -354,8 +374,6 @@ Objects/unicodeobject.c - static_strings -
# other
# initialized once
-Objects/exceptions.c - PyExc_BaseExceptionGroup -
-Objects/exceptions.c - PyExc_EncodingWarning -
# XXX This should have been found by the analyzer but wasn't:
Python/context.c - _token_missing -
# XXX This should have been found by the analyzer but wasn't: