diff options
Diffstat (limited to 'scripts/find_error.py')
l---------[-rwxr-xr-x] | scripts/find_error.py | 174 |
1 files changed, 1 insertions, 173 deletions
diff --git a/scripts/find_error.py b/scripts/find_error.py index 7aaa9bee..ba0b76f1 100755..120000 --- a/scripts/find_error.py +++ b/scripts/find_error.py @@ -1,173 +1 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" - Lexing error finder - ~~~~~~~~~~~~~~~~~~~ - - For the source files given on the command line, display - the text where Error tokens are being generated, along - with some context. - - :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -from __future__ import print_function - -import os -import sys - -# always prefer Pygments from source if exists -srcpath = os.path.join(os.path.dirname(__file__), '..') -if os.path.isdir(os.path.join(srcpath, 'pygments')): - sys.path.insert(0, srcpath) - - -from pygments.lexer import RegexLexer -from pygments.lexers import get_lexer_for_filename, get_lexer_by_name -from pygments.token import Error, Text, _TokenType -from pygments.cmdline import _parse_options - - -class DebuggingRegexLexer(RegexLexer): - """Make the state stack, position and current match instance attributes.""" - - def get_tokens_unprocessed(self, text, stack=('root',)): - """ - Split ``text`` into (tokentype, text) pairs. - - ``stack`` is the inital stack (default: ``['root']``) - """ - self.pos = 0 - tokendefs = self._tokens - self.statestack = list(stack) - statetokens = tokendefs[self.statestack[-1]] - while 1: - for rexmatch, action, new_state in statetokens: - self.m = m = rexmatch(text, self.pos) - if m: - if type(action) is _TokenType: - yield self.pos, action, m.group() - else: - for item in action(self, m): - yield item - self.pos = m.end() - if new_state is not None: - # state transition - if isinstance(new_state, tuple): - for state in new_state: - if state == '#pop': - self.statestack.pop() - elif state == '#push': - self.statestack.append(self.statestack[-1]) - else: - self.statestack.append(state) - elif isinstance(new_state, int): - # pop - del self.statestack[new_state:] - elif new_state == '#push': - self.statestack.append(self.statestack[-1]) - else: - assert False, 'wrong state def: %r' % new_state - statetokens = tokendefs[self.statestack[-1]] - break - else: - try: - if text[self.pos] == '\n': - # at EOL, reset state to 'root' - self.pos += 1 - self.statestack = ['root'] - statetokens = tokendefs['root'] - yield self.pos, Text, u'\n' - continue - yield self.pos, Error, text[self.pos] - self.pos += 1 - except IndexError: - break - - -def main(fn, lexer=None, options={}): - if lexer is not None: - lx = get_lexer_by_name(lexer) - else: - try: - lx = get_lexer_for_filename(os.path.basename(fn), **options) - except ValueError: - try: - name, rest = fn.split('_', 1) - lx = get_lexer_by_name(name, **options) - except ValueError: - raise AssertionError('no lexer found for file %r' % fn) - debug_lexer = False - # does not work for e.g. ExtendedRegexLexers - if lx.__class__.__bases__ == (RegexLexer,): - lx.__class__.__bases__ = (DebuggingRegexLexer,) - debug_lexer = True - elif lx.__class__.__bases__ == (DebuggingRegexLexer,): - # already debugged before - debug_lexer = True - lno = 1 - text = open(fn, 'U').read() - text = text.strip('\n') + '\n' - tokens = [] - states = [] - - def show_token(tok, state): - reprs = map(repr, tok) - print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ') - if debug_lexer: - print(' ' + ' ' * (29-len(reprs[0])) + repr(state), end=' ') - print() - - for type, val in lx.get_tokens(text): - lno += val.count('\n') - if type == Error: - print('Error parsing', fn, 'on line', lno) - print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') - if showall: - for tok, state in map(None, tokens, states): - show_token(tok, state) - else: - for i in range(max(len(tokens) - num, 0), len(tokens)): - show_token(tokens[i], states[i]) - print('Error token:') - l = len(repr(val)) - print(' ' + repr(val), end=' ') - if debug_lexer and hasattr(lx, 'statestack'): - print(' ' * (60-l) + repr(lx.statestack), end=' ') - print() - print() - return 1 - tokens.append((type, val)) - if debug_lexer: - if hasattr(lx, 'statestack'): - states.append(lx.statestack[:]) - else: - states.append(None) - if showall: - for tok, state in map(None, tokens, states): - show_token(tok, state) - return 0 - - -num = 10 -showall = False -lexer = None -options = {} - -if __name__ == '__main__': - import getopt - opts, args = getopt.getopt(sys.argv[1:], 'n:l:aO:') - for opt, val in opts: - if opt == '-n': - num = int(val) - elif opt == '-a': - showall = True - elif opt == '-l': - lexer = val - elif opt == '-O': - options = _parse_options([val]) - ret = 0 - for f in args: - ret += main(f, lexer, options) - sys.exit(bool(ret)) +debug_lexer.py
\ No newline at end of file |