diff options
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/check_sources.py | 59 | ||||
-rwxr-xr-x | scripts/debug_lexer.py | 233 | ||||
-rwxr-xr-x | scripts/find_codetags.py | 14 | ||||
l---------[-rwxr-xr-x] | scripts/find_error.py | 174 | ||||
-rw-r--r-- | scripts/get_vimkw.py | 39 |
5 files changed, 297 insertions, 222 deletions
diff --git a/scripts/check_sources.py b/scripts/check_sources.py index 94c2c15d..5f233887 100755 --- a/scripts/check_sources.py +++ b/scripts/check_sources.py @@ -23,8 +23,10 @@ from os.path import join, splitext, abspath checkers = {} + def checker(*suffixes, **kwds): only_pkg = kwds.pop('only_pkg', False) + def deco(func): for suffix in suffixes: checkers.setdefault(suffix, []).append(func) @@ -38,8 +40,6 @@ copyright_re = re.compile(r'^ :copyright: Copyright 2006-2014 by ' r'the Pygments team, see AUTHORS\.$', re.UNICODE) copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' % (name_mail_re, name_mail_re), re.UNICODE) -coding_re = re.compile(r'coding[:=]\s*([-\w.]+)') -not_ix_re = re.compile(r'\bnot\s+\S+?\s+i[sn]\s\S+') is_const_re = re.compile(r'if.*?==\s+(None|False|True)\b') misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING @@ -48,44 +48,30 @@ misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING @checker('.py') def check_syntax(fn, lines): + if '#!/' in lines[0]: + lines = lines[1:] + if 'coding:' in lines[0]: + lines = lines[1:] try: - compile(''.join(lines), fn, "exec") + compile('\n'.join(lines), fn, "exec") except SyntaxError as err: yield 0, "not compilable: %s" % err @checker('.py') def check_style_and_encoding(fn, lines): - encoding = 'ascii' for lno, line in enumerate(lines): - if len(line) > 90: + if len(line) > 110: yield lno+1, "line too long" - m = not_ix_re.search(line) - if m: - yield lno+1, '"' + m.group() + '"' if is_const_re.search(line): yield lno+1, 'using == None/True/False' - if lno < 2: - co = coding_re.search(line) - if co: - encoding = co.group(1) - try: - line.decode(encoding) - except AttributeError: - # Python 3 - encoding was already checked - pass - except UnicodeDecodeError as err: - yield lno+1, "not decodable: %s\n Line: %r" % (err, line) - except LookupError as err: - yield 0, "unknown encoding: %s" % encoding - encoding = 'latin1' @checker('.py', only_pkg=True) def check_fileheader(fn, lines): # line number correction c = 1 - if lines[0:1] == ['#!/usr/bin/env python\n']: + if lines[0:1] == ['#!/usr/bin/env python']: lines = lines[1:] c = 2 @@ -94,31 +80,28 @@ def check_fileheader(fn, lines): for lno, l in enumerate(lines): llist.append(l) if lno == 0: - if l == '# -*- coding: rot13 -*-\n': - # special-case pony package - return - elif l != '# -*- coding: utf-8 -*-\n': + if l != '# -*- coding: utf-8 -*-': yield 1, "missing coding declaration" elif lno == 1: - if l != '"""\n' and l != 'r"""\n': + if l != '"""' and l != 'r"""': yield 2, 'missing docstring begin (""")' else: docopen = True elif docopen: - if l == '"""\n': + if l == '"""': # end of docstring if lno <= 4: yield lno+c, "missing module name in docstring" break - if l != "\n" and l[:4] != ' ' and docopen: + if l != "" and l[:4] != ' ' and docopen: yield lno+c, "missing correct docstring indentation" if lno == 2: # if not in package, don't check the module name modname = fn[:-3].replace('/', '.').replace('.__init__', '') while modname: - if l.lower()[4:-1] == modname: + if l.lower()[4:] == modname: break modname = '.'.join(modname.split('.')[1:]) else: @@ -133,7 +116,7 @@ def check_fileheader(fn, lines): # check for copyright and license fields license = llist[-2:-1] - if license != [" :license: BSD, see LICENSE for details.\n"]: + if license != [" :license: BSD, see LICENSE for details."]: yield 0, "no correct license info" ci = -3 @@ -176,16 +159,19 @@ def main(argv): for root, dirs, files in os.walk(path): if '.hg' in dirs: dirs.remove('.hg') + if 'examplefiles' in dirs: + dirs.remove('examplefiles') if '-i' in opts and abspath(root) in opts['-i']: del dirs[:] continue # XXX: awkward: for the Makefile call: don't check non-package # files for file headers - in_pocoo_pkg = root.startswith('./pygments') + in_pygments_pkg = root.startswith('./pygments') for fn in files: fn = join(root, fn) - if fn[:2] == './': fn = fn[2:] + if fn[:2] == './': + fn = fn[2:] if '-i' in opts and abspath(fn) in opts['-i']: continue @@ -199,15 +185,14 @@ def main(argv): print("Checking %s..." % fn) try: - f = open(fn, 'r') - lines = list(f) + lines = open(fn, 'rb').read().decode('utf-8').splitlines() except (IOError, OSError) as err: print("%s: cannot open: %s" % (fn, err)) num += 1 continue for checker in checkerlist: - if not in_pocoo_pkg and checker.only_pkg: + if not in_pygments_pkg and checker.only_pkg: continue for lno, msg in checker(fn, lines): print(u"%s:%d: %s" % (fn, lno, msg), file=out) diff --git a/scripts/debug_lexer.py b/scripts/debug_lexer.py new file mode 100755 index 00000000..dfc28ce2 --- /dev/null +++ b/scripts/debug_lexer.py @@ -0,0 +1,233 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" + Lexing error finder + ~~~~~~~~~~~~~~~~~~~ + + For the source files given on the command line, display + the text where Error tokens are being generated, along + with some context. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from __future__ import print_function + +import os +import sys + +# always prefer Pygments from source if exists +srcpath = os.path.join(os.path.dirname(__file__), '..') +if os.path.isdir(os.path.join(srcpath, 'pygments')): + sys.path.insert(0, srcpath) + + +from pygments.lexer import RegexLexer, ProfilingRegexLexer, ProfilingRegexLexerMeta +from pygments.lexers import get_lexer_by_name, find_lexer_class, \ + find_lexer_class_for_filename +from pygments.token import Error, Text, _TokenType +from pygments.cmdline import _parse_options + + +class DebuggingRegexLexer(RegexLexer): + """Make the state stack, position and current match instance attributes.""" + + def get_tokens_unprocessed(self, text, stack=('root',)): + """ + Split ``text`` into (tokentype, text) pairs. + + ``stack`` is the inital stack (default: ``['root']``) + """ + self.pos = 0 + tokendefs = self._tokens + self.statestack = list(stack) + statetokens = tokendefs[self.statestack[-1]] + while 1: + for rexmatch, action, new_state in statetokens: + self.m = m = rexmatch(text, self.pos) + if m: + if action is not None: + if type(action) is _TokenType: + yield self.pos, action, m.group() + else: + for item in action(self, m): + yield item + self.pos = m.end() + if new_state is not None: + # state transition + if isinstance(new_state, tuple): + for state in new_state: + if state == '#pop': + self.statestack.pop() + elif state == '#push': + self.statestack.append(self.statestack[-1]) + else: + self.statestack.append(state) + elif isinstance(new_state, int): + # pop + del self.statestack[new_state:] + elif new_state == '#push': + self.statestack.append(self.statestack[-1]) + else: + assert False, 'wrong state def: %r' % new_state + statetokens = tokendefs[self.statestack[-1]] + break + else: + try: + if text[self.pos] == '\n': + # at EOL, reset state to 'root' + self.pos += 1 + self.statestack = ['root'] + statetokens = tokendefs['root'] + yield self.pos, Text, u'\n' + continue + yield self.pos, Error, text[self.pos] + self.pos += 1 + except IndexError: + break + + +def main(fn, lexer=None, options={}): + if lexer is not None: + lxcls = get_lexer_by_name(lexer).__class__ + else: + lxcls = find_lexer_class_for_filename(os.path.basename(fn)) + if lxcls is None: + name, rest = fn.split('_', 1) + lxcls = find_lexer_class(name) + if lxcls is None: + raise AssertionError('no lexer found for file %r' % fn) + debug_lexer = False + if profile: + # does not work for e.g. ExtendedRegexLexers + if lxcls.__bases__ == (RegexLexer,): + # yes we can! (change the metaclass) + lxcls.__class__ = ProfilingRegexLexerMeta + lxcls.__bases__ = (ProfilingRegexLexer,) + lxcls._prof_sort_index = profsort + else: + if lxcls.__bases__ == (RegexLexer,): + lxcls.__bases__ = (DebuggingRegexLexer,) + debug_lexer = True + elif lxcls.__bases__ == (DebuggingRegexLexer,): + # already debugged before + debug_lexer = True + else: + # HACK: ExtendedRegexLexer subclasses will only partially work here. + lxcls.__bases__ = (DebuggingRegexLexer,) + debug_lexer = True + + lx = lxcls(**options) + lno = 1 + if fn == '-': + text = sys.stdin.read() + else: + with open(fn, 'rb') as fp: + text = fp.read().decode('utf-8') + text = text.strip('\n') + '\n' + tokens = [] + states = [] + + def show_token(tok, state): + reprs = list(map(repr, tok)) + print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ') + if debug_lexer: + print(' ' + ' ' * (29-len(reprs[0])) + repr(state), end=' ') + print() + + for type, val in lx.get_tokens(text): + lno += val.count('\n') + if type == Error: + print('Error parsing', fn, 'on line', lno) + print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') + if showall: + for tok, state in map(None, tokens, states): + show_token(tok, state) + else: + for i in range(max(len(tokens) - num, 0), len(tokens)): + if debug_lexer: + show_token(tokens[i], states[i]) + else: + show_token(tokens[i], None) + print('Error token:') + l = len(repr(val)) + print(' ' + repr(val), end=' ') + if debug_lexer and hasattr(lx, 'statestack'): + print(' ' * (60-l) + repr(lx.statestack), end=' ') + print() + print() + return 1 + tokens.append((type, val)) + if debug_lexer: + if hasattr(lx, 'statestack'): + states.append(lx.statestack[:]) + else: + states.append(None) + if showall: + for tok, state in zip(tokens, states): + show_token(tok, state) + return 0 + + +def print_help(): + print('''\ +Pygments development helper to quickly debug lexers. + + scripts/debug_lexer.py [options] file ... + +Give one or more filenames to lex them and display possible error tokens +and/or profiling info. Files are assumed to be encoded in UTF-8. + +Selecting lexer and options: + + -l NAME use lexer named NAME (default is to guess from + the given filenames) + -O OPTIONSTR use lexer options parsed from OPTIONSTR + +Debugging lexing errors: + + -n N show the last N tokens on error + -a always show all lexed tokens (default is only + to show them when an error occurs) + +Profiling: + + -p use the ProfilingRegexLexer to profile regexes + instead of the debugging lexer + -s N sort profiling output by column N (default is + column 4, the time per call) +''') + +num = 10 +showall = False +lexer = None +options = {} +profile = False +profsort = 4 + +if __name__ == '__main__': + import getopt + opts, args = getopt.getopt(sys.argv[1:], 'n:l:apO:s:h') + for opt, val in opts: + if opt == '-n': + num = int(val) + elif opt == '-a': + showall = True + elif opt == '-l': + lexer = val + elif opt == '-p': + profile = True + elif opt == '-s': + profsort = int(val) + elif opt == '-O': + options = _parse_options([val]) + elif opt == '-h': + print_help() + sys.exit(0) + ret = 0 + if not args: + print_help() + for f in args: + ret += main(f, lexer, options) + sys.exit(bool(ret)) diff --git a/scripts/find_codetags.py b/scripts/find_codetags.py index f8204e6e..5063f61f 100755 --- a/scripts/find_codetags.py +++ b/scripts/find_codetags.py @@ -39,12 +39,12 @@ def escape_html(text): def process_file(store, filename): try: - f = open(filename, 'r') + fp = open(filename, 'r') except (IOError, OSError): return False llmatch = 0 - try: - for lno, line in enumerate(f): + with fp: + for lno, line in enumerate(fp): # just some random heuristics to filter out binary files if lno < 100 and binary_re.search(line): return False @@ -69,8 +69,6 @@ def process_file(store, filename): continue llmatch = 0 return True - finally: - f.close() def main(): @@ -198,13 +196,13 @@ td { padding: 2px 5px 2px 5px; '<td class="tag %%(tag)s">%%(tag)s</td>' '<td class="who">%%(who)s</td><td class="what">%%(what)s</td></tr>') - f = open(output, 'w') table = '\n'.join(TABLE % fname + '\n'.join(TR % (no % 2,) % entry for no, entry in enumerate(store[fname])) for fname in sorted(store)) - f.write(HTML % (', '.join(map(abspath, args)), table)) - f.close() + + with open(output, 'w') as fp: + fp.write(HTML % (', '.join(map(abspath, args)), table)) print("Report written to %s." % output) return 0 diff --git a/scripts/find_error.py b/scripts/find_error.py index 7aaa9bee..ba0b76f1 100755..120000 --- a/scripts/find_error.py +++ b/scripts/find_error.py @@ -1,173 +1 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" - Lexing error finder - ~~~~~~~~~~~~~~~~~~~ - - For the source files given on the command line, display - the text where Error tokens are being generated, along - with some context. - - :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -from __future__ import print_function - -import os -import sys - -# always prefer Pygments from source if exists -srcpath = os.path.join(os.path.dirname(__file__), '..') -if os.path.isdir(os.path.join(srcpath, 'pygments')): - sys.path.insert(0, srcpath) - - -from pygments.lexer import RegexLexer -from pygments.lexers import get_lexer_for_filename, get_lexer_by_name -from pygments.token import Error, Text, _TokenType -from pygments.cmdline import _parse_options - - -class DebuggingRegexLexer(RegexLexer): - """Make the state stack, position and current match instance attributes.""" - - def get_tokens_unprocessed(self, text, stack=('root',)): - """ - Split ``text`` into (tokentype, text) pairs. - - ``stack`` is the inital stack (default: ``['root']``) - """ - self.pos = 0 - tokendefs = self._tokens - self.statestack = list(stack) - statetokens = tokendefs[self.statestack[-1]] - while 1: - for rexmatch, action, new_state in statetokens: - self.m = m = rexmatch(text, self.pos) - if m: - if type(action) is _TokenType: - yield self.pos, action, m.group() - else: - for item in action(self, m): - yield item - self.pos = m.end() - if new_state is not None: - # state transition - if isinstance(new_state, tuple): - for state in new_state: - if state == '#pop': - self.statestack.pop() - elif state == '#push': - self.statestack.append(self.statestack[-1]) - else: - self.statestack.append(state) - elif isinstance(new_state, int): - # pop - del self.statestack[new_state:] - elif new_state == '#push': - self.statestack.append(self.statestack[-1]) - else: - assert False, 'wrong state def: %r' % new_state - statetokens = tokendefs[self.statestack[-1]] - break - else: - try: - if text[self.pos] == '\n': - # at EOL, reset state to 'root' - self.pos += 1 - self.statestack = ['root'] - statetokens = tokendefs['root'] - yield self.pos, Text, u'\n' - continue - yield self.pos, Error, text[self.pos] - self.pos += 1 - except IndexError: - break - - -def main(fn, lexer=None, options={}): - if lexer is not None: - lx = get_lexer_by_name(lexer) - else: - try: - lx = get_lexer_for_filename(os.path.basename(fn), **options) - except ValueError: - try: - name, rest = fn.split('_', 1) - lx = get_lexer_by_name(name, **options) - except ValueError: - raise AssertionError('no lexer found for file %r' % fn) - debug_lexer = False - # does not work for e.g. ExtendedRegexLexers - if lx.__class__.__bases__ == (RegexLexer,): - lx.__class__.__bases__ = (DebuggingRegexLexer,) - debug_lexer = True - elif lx.__class__.__bases__ == (DebuggingRegexLexer,): - # already debugged before - debug_lexer = True - lno = 1 - text = open(fn, 'U').read() - text = text.strip('\n') + '\n' - tokens = [] - states = [] - - def show_token(tok, state): - reprs = map(repr, tok) - print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ') - if debug_lexer: - print(' ' + ' ' * (29-len(reprs[0])) + repr(state), end=' ') - print() - - for type, val in lx.get_tokens(text): - lno += val.count('\n') - if type == Error: - print('Error parsing', fn, 'on line', lno) - print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') - if showall: - for tok, state in map(None, tokens, states): - show_token(tok, state) - else: - for i in range(max(len(tokens) - num, 0), len(tokens)): - show_token(tokens[i], states[i]) - print('Error token:') - l = len(repr(val)) - print(' ' + repr(val), end=' ') - if debug_lexer and hasattr(lx, 'statestack'): - print(' ' * (60-l) + repr(lx.statestack), end=' ') - print() - print() - return 1 - tokens.append((type, val)) - if debug_lexer: - if hasattr(lx, 'statestack'): - states.append(lx.statestack[:]) - else: - states.append(None) - if showall: - for tok, state in map(None, tokens, states): - show_token(tok, state) - return 0 - - -num = 10 -showall = False -lexer = None -options = {} - -if __name__ == '__main__': - import getopt - opts, args = getopt.getopt(sys.argv[1:], 'n:l:aO:') - for opt, val in opts: - if opt == '-n': - num = int(val) - elif opt == '-a': - showall = True - elif opt == '-l': - lexer = val - elif opt == '-O': - options = _parse_options([val]) - ret = 0 - for f in args: - ret += main(f, lexer, options) - sys.exit(bool(ret)) +debug_lexer.py
\ No newline at end of file diff --git a/scripts/get_vimkw.py b/scripts/get_vimkw.py index 4ea302f4..fc4d5ec6 100644 --- a/scripts/get_vimkw.py +++ b/scripts/get_vimkw.py @@ -1,13 +1,42 @@ from __future__ import print_function + import re +from pygments.util import format_lines + r_line = re.compile(r"^(syn keyword vimCommand contained|syn keyword vimOption " r"contained|syn keyword vimAutoEvent contained)\s+(.*)") r_item = re.compile(r"(\w+)(?:\[(\w+)\])?") +HEADER = '''\ +# -*- coding: utf-8 -*- +""" + pygments.lexers._vim_builtins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + This file is autogenerated by scripts/get_vimkw.py + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +# Split up in multiple functions so it's importable by jython, which has a +# per-method size limit. +''' + +METHOD = '''\ +def _get%(key)s(): +%(body)s + return var +%(key)s = _get%(key)s() +''' + def getkw(input, output): out = file(output, 'w') + # Copy template from an existing file. + print(HEADER, file=out) + output_info = {'command': [], 'option': [], 'auto': []} for line in file(input): m = r_line.match(line) @@ -29,9 +58,10 @@ def getkw(input, output): output_info['option'].append("('inoremap','inoremap')") output_info['option'].append("('vnoremap','vnoremap')") - for a, b in output_info.items(): - b.sort() - print('%s=[%s]' % (a, ','.join(b)), file=out) + for key, keywordlist in output_info.items(): + keywordlist.sort() + body = format_lines('var', keywordlist, raw=True, indent_level=1) + print(METHOD % locals(), file=out) def is_keyword(w, keywords): for i in range(len(w), 0, -1): @@ -40,4 +70,5 @@ def is_keyword(w, keywords): return False if __name__ == "__main__": - getkw("/usr/share/vim/vim73/syntax/vim.vim", "temp.py") + getkw("/usr/share/vim/vim74/syntax/vim.vim", + "pygments/lexers/_vim_builtins.py") |