summaryrefslogtreecommitdiff
path: root/scripts/find_error.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/find_error.py')
l---------[-rwxr-xr-x]scripts/find_error.py174
1 files changed, 1 insertions, 173 deletions
diff --git a/scripts/find_error.py b/scripts/find_error.py
index 7aaa9bee..ba0b76f1 100755..120000
--- a/scripts/find_error.py
+++ b/scripts/find_error.py
@@ -1,173 +1 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-"""
- Lexing error finder
- ~~~~~~~~~~~~~~~~~~~
-
- For the source files given on the command line, display
- the text where Error tokens are being generated, along
- with some context.
-
- :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
-"""
-
-from __future__ import print_function
-
-import os
-import sys
-
-# always prefer Pygments from source if exists
-srcpath = os.path.join(os.path.dirname(__file__), '..')
-if os.path.isdir(os.path.join(srcpath, 'pygments')):
- sys.path.insert(0, srcpath)
-
-
-from pygments.lexer import RegexLexer
-from pygments.lexers import get_lexer_for_filename, get_lexer_by_name
-from pygments.token import Error, Text, _TokenType
-from pygments.cmdline import _parse_options
-
-
-class DebuggingRegexLexer(RegexLexer):
- """Make the state stack, position and current match instance attributes."""
-
- def get_tokens_unprocessed(self, text, stack=('root',)):
- """
- Split ``text`` into (tokentype, text) pairs.
-
- ``stack`` is the inital stack (default: ``['root']``)
- """
- self.pos = 0
- tokendefs = self._tokens
- self.statestack = list(stack)
- statetokens = tokendefs[self.statestack[-1]]
- while 1:
- for rexmatch, action, new_state in statetokens:
- self.m = m = rexmatch(text, self.pos)
- if m:
- if type(action) is _TokenType:
- yield self.pos, action, m.group()
- else:
- for item in action(self, m):
- yield item
- self.pos = m.end()
- if new_state is not None:
- # state transition
- if isinstance(new_state, tuple):
- for state in new_state:
- if state == '#pop':
- self.statestack.pop()
- elif state == '#push':
- self.statestack.append(self.statestack[-1])
- else:
- self.statestack.append(state)
- elif isinstance(new_state, int):
- # pop
- del self.statestack[new_state:]
- elif new_state == '#push':
- self.statestack.append(self.statestack[-1])
- else:
- assert False, 'wrong state def: %r' % new_state
- statetokens = tokendefs[self.statestack[-1]]
- break
- else:
- try:
- if text[self.pos] == '\n':
- # at EOL, reset state to 'root'
- self.pos += 1
- self.statestack = ['root']
- statetokens = tokendefs['root']
- yield self.pos, Text, u'\n'
- continue
- yield self.pos, Error, text[self.pos]
- self.pos += 1
- except IndexError:
- break
-
-
-def main(fn, lexer=None, options={}):
- if lexer is not None:
- lx = get_lexer_by_name(lexer)
- else:
- try:
- lx = get_lexer_for_filename(os.path.basename(fn), **options)
- except ValueError:
- try:
- name, rest = fn.split('_', 1)
- lx = get_lexer_by_name(name, **options)
- except ValueError:
- raise AssertionError('no lexer found for file %r' % fn)
- debug_lexer = False
- # does not work for e.g. ExtendedRegexLexers
- if lx.__class__.__bases__ == (RegexLexer,):
- lx.__class__.__bases__ = (DebuggingRegexLexer,)
- debug_lexer = True
- elif lx.__class__.__bases__ == (DebuggingRegexLexer,):
- # already debugged before
- debug_lexer = True
- lno = 1
- text = open(fn, 'U').read()
- text = text.strip('\n') + '\n'
- tokens = []
- states = []
-
- def show_token(tok, state):
- reprs = map(repr, tok)
- print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ')
- if debug_lexer:
- print(' ' + ' ' * (29-len(reprs[0])) + repr(state), end=' ')
- print()
-
- for type, val in lx.get_tokens(text):
- lno += val.count('\n')
- if type == Error:
- print('Error parsing', fn, 'on line', lno)
- print('Previous tokens' + (debug_lexer and ' and states' or '') + ':')
- if showall:
- for tok, state in map(None, tokens, states):
- show_token(tok, state)
- else:
- for i in range(max(len(tokens) - num, 0), len(tokens)):
- show_token(tokens[i], states[i])
- print('Error token:')
- l = len(repr(val))
- print(' ' + repr(val), end=' ')
- if debug_lexer and hasattr(lx, 'statestack'):
- print(' ' * (60-l) + repr(lx.statestack), end=' ')
- print()
- print()
- return 1
- tokens.append((type, val))
- if debug_lexer:
- if hasattr(lx, 'statestack'):
- states.append(lx.statestack[:])
- else:
- states.append(None)
- if showall:
- for tok, state in map(None, tokens, states):
- show_token(tok, state)
- return 0
-
-
-num = 10
-showall = False
-lexer = None
-options = {}
-
-if __name__ == '__main__':
- import getopt
- opts, args = getopt.getopt(sys.argv[1:], 'n:l:aO:')
- for opt, val in opts:
- if opt == '-n':
- num = int(val)
- elif opt == '-a':
- showall = True
- elif opt == '-l':
- lexer = val
- elif opt == '-O':
- options = _parse_options([val])
- ret = 0
- for f in args:
- ret += main(f, lexer, options)
- sys.exit(bool(ret))
+debug_lexer.py \ No newline at end of file