diff options
author | Victor Stinner <vstinner@python.org> | 2022-10-17 12:01:00 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-17 12:01:00 +0200 |
commit | 1863302d61a7a5dd8b8d345a00f0ee242c7c10bf (patch) | |
tree | a1e41af02147e2a14155d5b19d7b68bbb31c3f6f /Tools/build/generate_global_objects.py | |
parent | eae7dad40255bad42e4abce53ff8143dcbc66af5 (diff) | |
download | cpython-git-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.tar.gz |
gh-97669: Create Tools/build/ directory (#97963)
Create Tools/build/ directory. Move the following scripts from
Tools/scripts/ to Tools/build/:
* check_extension_modules.py
* deepfreeze.py
* freeze_modules.py
* generate_global_objects.py
* generate_levenshtein_examples.py
* generate_opcode_h.py
* generate_re_casefix.py
* generate_sre_constants.py
* generate_stdlib_module_names.py
* generate_token.py
* parse_html5_entities.py
* smelly.py
* stable_abi.py
* umarshal.py
* update_file.py
* verify_ensurepip_wheels.py
Update references to these scripts.
Diffstat (limited to 'Tools/build/generate_global_objects.py')
-rw-r--r-- | Tools/build/generate_global_objects.py | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/Tools/build/generate_global_objects.py b/Tools/build/generate_global_objects.py new file mode 100644 index 0000000000..dd67cfedad --- /dev/null +++ b/Tools/build/generate_global_objects.py @@ -0,0 +1,382 @@ +import contextlib +import io +import os.path +import re + +SCRIPT_NAME = 'Tools/build/generate_global_objects.py' +__file__ = os.path.abspath(__file__) +ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +INTERNAL = os.path.join(ROOT, 'Include', 'internal') + + +IGNORED = { + 'ACTION', # Python/_warnings.c + 'ATTR', # Python/_warnings.c and Objects/funcobject.c + 'DUNDER', # Objects/typeobject.c + 'RDUNDER', # Objects/typeobject.c + 'SPECIAL', # Objects/weakrefobject.c + 'NAME', # Objects/typeobject.c +} +IDENTIFIERS = [ + # from ADD() Python/_warnings.c + 'default', + 'ignore', + + # from GET_WARNINGS_ATTR() in Python/_warnings.c + 'WarningMessage', + '_showwarnmsg', + '_warn_unawaited_coroutine', + 'defaultaction', + 'filters', + 'onceregistry', + + # from WRAP_METHOD() in Objects/weakrefobject.c + '__bytes__', + '__reversed__', + + # from COPY_ATTR() in Objects/funcobject.c + '__module__', + '__name__', + '__qualname__', + '__doc__', + '__annotations__', + + # from SLOT* in Objects/typeobject.c + '__abs__', + '__add__', + '__aiter__', + '__and__', + '__anext__', + '__await__', + '__bool__', + '__call__', + '__contains__', + '__del__', + '__delattr__', + '__delete__', + '__delitem__', + '__eq__', + '__float__', + '__floordiv__', + '__ge__', + '__get__', + '__getattr__', + '__getattribute__', + '__getitem__', + '__gt__', + '__hash__', + '__iadd__', + '__iand__', + '__ifloordiv__', + '__ilshift__', + '__imatmul__', + '__imod__', + '__imul__', + '__index__', + '__init__', + '__int__', + '__invert__', + '__ior__', + '__ipow__', + '__irshift__', + '__isub__', + '__iter__', + '__itruediv__', + '__ixor__', + '__le__', + '__len__', + '__lshift__', + '__lt__', + '__matmul__', + '__mod__', + '__mul__', + '__ne__', + '__neg__', + '__new__', + '__next__', + '__or__', + '__pos__', + '__pow__', + '__radd__', + '__rand__', + '__repr__', + '__rfloordiv__', + '__rlshift__', + '__rmatmul__', + '__rmod__', + '__rmul__', + '__ror__', + '__rpow__', + '__rrshift__', + '__rshift__', + '__rsub__', + '__rtruediv__', + '__rxor__', + '__set__', + '__setattr__', + '__setitem__', + '__str__', + '__sub__', + '__truediv__', + '__xor__', + '__divmod__', + '__rdivmod__', +] + + +####################################### +# helpers + +def iter_files(): + for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'): + root = os.path.join(ROOT, name) + for dirname, _, files in os.walk(root): + for name in files: + if not name.endswith(('.c', '.h')): + continue + yield os.path.join(dirname, name) + + +def iter_global_strings(): + id_regex = re.compile(r'\b_Py_ID\((\w+)\)') + str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') + for filename in iter_files(): + try: + infile = open(filename, encoding='utf-8') + except FileNotFoundError: + # The file must have been a temporary file. + continue + with infile: + for lno, line in enumerate(infile, 1): + for m in id_regex.finditer(line): + identifier, = m.groups() + yield identifier, None, filename, lno, line + for m in str_regex.finditer(line): + varname, string = m.groups() + yield varname, string, filename, lno, line + + +def iter_to_marker(lines, marker): + for line in lines: + if line.rstrip() == marker: + break + yield line + + +class Printer: + + def __init__(self, file): + self.level = 0 + self.file = file + self.continuation = [False] + + @contextlib.contextmanager + def indent(self): + save_level = self.level + try: + self.level += 1 + yield + finally: + self.level = save_level + + def write(self, arg): + eol = '\n' + if self.continuation[-1]: + eol = f' \\{eol}' if arg else f'\\{eol}' + self.file.writelines((" "*self.level, arg, eol)) + + @contextlib.contextmanager + def block(self, prefix, suffix="", *, continuation=None): + if continuation is None: + continuation = self.continuation[-1] + self.continuation.append(continuation) + + self.write(prefix + " {") + with self.indent(): + yield + self.continuation.pop() + self.write("}" + suffix) + + +@contextlib.contextmanager +def open_for_changes(filename, orig): + """Like open() but only write to the file if it changed.""" + outfile = io.StringIO() + yield outfile + text = outfile.getvalue() + if text != orig: + with open(filename, 'w', encoding='utf-8') as outfile: + outfile.write(text) + else: + print(f'# not changed: {filename}') + + +####################################### +# the global objects + +START = f'/* The following is auto-generated by {SCRIPT_NAME}. */' +END = '/* End auto-generated code */' + + +def generate_global_strings(identifiers, strings): + filename = os.path.join(INTERNAL, 'pycore_global_strings.h') + + # Read the non-generated part of the file. + with open(filename) as infile: + orig = infile.read() + lines = iter(orig.rstrip().splitlines()) + before = '\n'.join(iter_to_marker(lines, START)) + for _ in iter_to_marker(lines, END): + pass + after = '\n'.join(lines) + + # Generate the file. + with open_for_changes(filename, orig) as outfile: + printer = Printer(outfile) + printer.write(before) + printer.write(START) + with printer.block('struct _Py_global_strings', ';'): + with printer.block('struct', ' literals;'): + for literal, name in sorted(strings.items(), key=lambda x: x[1]): + printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') + outfile.write('\n') + with printer.block('struct', ' identifiers;'): + for name in sorted(identifiers): + assert name.isidentifier(), name + printer.write(f'STRUCT_FOR_ID({name})') + with printer.block('struct', ' ascii[128];'): + printer.write("PyASCIIObject _ascii;") + printer.write("uint8_t _data[2];") + with printer.block('struct', ' latin1[128];'): + printer.write("PyCompactUnicodeObject _latin1;") + printer.write("uint8_t _data[2];") + printer.write(END) + printer.write(after) + + +def generate_runtime_init(identifiers, strings): + # First get some info from the declarations. + nsmallposints = None + nsmallnegints = None + with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile: + for line in infile: + if line.startswith('#define _PY_NSMALLPOSINTS'): + nsmallposints = int(line.split()[-1]) + elif line.startswith('#define _PY_NSMALLNEGINTS'): + nsmallnegints = int(line.split()[-1]) + break + else: + raise NotImplementedError + assert nsmallposints and nsmallnegints + + # Then target the runtime initializer. + filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h') + + # Read the non-generated part of the file. + with open(filename) as infile: + orig = infile.read() + lines = iter(orig.rstrip().splitlines()) + before = '\n'.join(iter_to_marker(lines, START)) + for _ in iter_to_marker(lines, END): + pass + after = '\n'.join(lines) + + # Generate the file. + with open_for_changes(filename, orig) as outfile: + immortal_objects = [] + printer = Printer(outfile) + printer.write(before) + printer.write(START) + with printer.block('#define _Py_global_objects_INIT', continuation=True): + with printer.block('.singletons =', ','): + # Global int objects. + with printer.block('.small_ints =', ','): + for i in range(-nsmallnegints, nsmallposints): + printer.write(f'_PyLong_DIGIT_INIT({i}),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]') + printer.write('') + # Global bytes objects. + printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_empty)') + with printer.block('.bytes_characters =', ','): + for i in range(256): + printer.write(f'_PyBytes_CHAR_INIT({i}),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]') + printer.write('') + # Global strings. + with printer.block('.strings =', ','): + with printer.block('.literals =', ','): + for literal, name in sorted(strings.items(), key=lambda x: x[1]): + printer.write(f'INIT_STR({name}, "{literal}"),') + immortal_objects.append(f'(PyObject *)&_Py_STR({name})') + with printer.block('.identifiers =', ','): + for name in sorted(identifiers): + assert name.isidentifier(), name + printer.write(f'INIT_ID({name}),') + immortal_objects.append(f'(PyObject *)&_Py_ID({name})') + with printer.block('.ascii =', ','): + for i in range(128): + printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]') + with printer.block('.latin1 =', ','): + for i in range(128, 256): + utf8 = ['"'] + for c in chr(i).encode('utf-8'): + utf8.append(f"\\x{c:02x}") + utf8.append('"') + printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]') + printer.write('') + with printer.block('.tuple_empty =', ','): + printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)') + immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(tuple_empty)') + printer.write('') + printer.write("static inline void") + with printer.block("_PyUnicode_InitStaticStrings(void)"): + printer.write(f'PyObject *string;') + for i in sorted(identifiers): + # This use of _Py_ID() is ignored by iter_global_strings() + # since iter_files() ignores .h files. + printer.write(f'string = &_Py_ID({i});') + printer.write(f'PyUnicode_InternInPlace(&string);') + printer.write('') + printer.write('#ifdef Py_DEBUG') + printer.write("static inline void") + with printer.block("_PyStaticObjects_CheckRefcnt(void)"): + for i in immortal_objects: + with printer.block(f'if (Py_REFCNT({i}) < _PyObject_IMMORTAL_REFCNT)', ';'): + printer.write(f'_PyObject_Dump({i});') + printer.write(f'Py_FatalError("immortal object has less refcnt than ' + 'expected _PyObject_IMMORTAL_REFCNT");') + printer.write('#endif') + printer.write(END) + printer.write(after) + + +def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]': + identifiers = set(IDENTIFIERS) + strings = {} + for name, string, *_ in iter_global_strings(): + if string is None: + if name not in IGNORED: + identifiers.add(name) + else: + if string not in strings: + strings[string] = name + elif name != strings[string]: + raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') + return identifiers, strings + + +####################################### +# the script + +def main() -> None: + identifiers, strings = get_identifiers_and_strings() + + generate_global_strings(identifiers, strings) + generate_runtime_init(identifiers, strings) + + +if __name__ == '__main__': + main() |