diff options
Diffstat (limited to 'scripts/generate_identifier_pattern.py')
-rwxr-xr-x | scripts/generate_identifier_pattern.py | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/scripts/generate_identifier_pattern.py b/scripts/generate_identifier_pattern.py new file mode 100755 index 0000000..7db5f4a --- /dev/null +++ b/scripts/generate_identifier_pattern.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +import itertools +import os +import re +import sys + +if sys.version_info[0] < 3: + raise RuntimeError('This needs to run on Python 3.') + + +def get_characters(): + """Find every Unicode character that is valid in a Python `identifier`_ but + is not matched by the regex ``\w`` group. + + ``\w`` matches some characters that aren't valid in identifiers, but + :meth:`str.isidentifier` will catch that later in lexing. + + All start characters are valid continue characters, so we only test for + continue characters. + + _identifier: https://docs.python.org/3/reference/lexical_analysis.html#identifiers + """ + for cp in range(sys.maxunicode + 1): + s = chr(cp) + + if ('a' + s).isidentifier() and not re.match(r'\w', s): + yield s + + +def collapse_ranges(data): + """Given a sorted list of unique characters, generate ranges representing + sequential code points. + + Source: https://stackoverflow.com/a/4629241/400617 + """ + for a, b in itertools.groupby( + enumerate(data), + lambda x: ord(x[1]) - x[0] + ): + b = list(b) + yield b[0][1], b[-1][1] + + +def build_pattern(ranges): + """Output the regex pattern for ranges of characters. + + One and two character ranges output the individual characters. + """ + out = [] + + for a, b in ranges: + if a == b: # single char + out.append(a) + elif ord(b) - ord(a) == 1: # two chars, range is redundant + out.append(a) + out.append(b) + else: + out.append(f'{a}-{b}') + + return ''.join(out) + + +def main(): + """Build the regex pattern and write it to the file + :file:`jinja2/_identifier.py`.""" + pattern = build_pattern(collapse_ranges(get_characters())) + filename = os.path.abspath(os.path.join( + os.path.dirname(__file__), '..', 'jinja2', '_identifier.py' + )) + + with open(filename, 'w', encoding='utf8') as f: + f.write('# generated by scripts/generate_identifier_pattern.py\n') + f.write(f'pattern = \'{pattern}\'\n') + + +if __name__ == '__main__': + main() |