path: root/scripts
diff options
Diffstat (limited to 'scripts')
5 files changed, 297 insertions, 222 deletions
diff --git a/scripts/ b/scripts/
index 94c2c15d..5f233887 100755
--- a/scripts/
+++ b/scripts/
@@ -23,8 +23,10 @@ from os.path import join, splitext, abspath
checkers = {}
def checker(*suffixes, **kwds):
only_pkg = kwds.pop('only_pkg', False)
def deco(func):
for suffix in suffixes:
checkers.setdefault(suffix, []).append(func)
@@ -38,8 +40,6 @@ copyright_re = re.compile(r'^ :copyright: Copyright 2006-2014 by '
r'the Pygments team, see AUTHORS\.$', re.UNICODE)
copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' %
(name_mail_re, name_mail_re), re.UNICODE)
-coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')
-not_ix_re = re.compile(r'\bnot\s+\S+?\s+i[sn]\s\S+')
is_const_re = re.compile(r'if.*?==\s+(None|False|True)\b')
misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING
@@ -48,44 +48,30 @@ misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING
def check_syntax(fn, lines):
+ if '#!/' in lines[0]:
+ lines = lines[1:]
+ if 'coding:' in lines[0]:
+ lines = lines[1:]
- compile(''.join(lines), fn, "exec")
+ compile('\n'.join(lines), fn, "exec")
except SyntaxError as err:
yield 0, "not compilable: %s" % err
def check_style_and_encoding(fn, lines):
- encoding = 'ascii'
for lno, line in enumerate(lines):
- if len(line) > 90:
+ if len(line) > 110:
yield lno+1, "line too long"
- m =
- if m:
- yield lno+1, '"' + + '"'
yield lno+1, 'using == None/True/False'
- if lno < 2:
- co =
- if co:
- encoding =
- try:
- line.decode(encoding)
- except AttributeError:
- # Python 3 - encoding was already checked
- pass
- except UnicodeDecodeError as err:
- yield lno+1, "not decodable: %s\n Line: %r" % (err, line)
- except LookupError as err:
- yield 0, "unknown encoding: %s" % encoding
- encoding = 'latin1'
@checker('.py', only_pkg=True)
def check_fileheader(fn, lines):
# line number correction
c = 1
- if lines[0:1] == ['#!/usr/bin/env python\n']:
+ if lines[0:1] == ['#!/usr/bin/env python']:
lines = lines[1:]
c = 2
@@ -94,31 +80,28 @@ def check_fileheader(fn, lines):
for lno, l in enumerate(lines):
if lno == 0:
- if l == '# -*- coding: rot13 -*-\n':
- # special-case pony package
- return
- elif l != '# -*- coding: utf-8 -*-\n':
+ if l != '# -*- coding: utf-8 -*-':
yield 1, "missing coding declaration"
elif lno == 1:
- if l != '"""\n' and l != 'r"""\n':
+ if l != '"""' and l != 'r"""':
yield 2, 'missing docstring begin (""")'
docopen = True
elif docopen:
- if l == '"""\n':
+ if l == '"""':
# end of docstring
if lno <= 4:
yield lno+c, "missing module name in docstring"
- if l != "\n" and l[:4] != ' ' and docopen:
+ if l != "" and l[:4] != ' ' and docopen:
yield lno+c, "missing correct docstring indentation"
if lno == 2:
# if not in package, don't check the module name
modname = fn[:-3].replace('/', '.').replace('.__init__', '')
while modname:
- if l.lower()[4:-1] == modname:
+ if l.lower()[4:] == modname:
modname = '.'.join(modname.split('.')[1:])
@@ -133,7 +116,7 @@ def check_fileheader(fn, lines):
# check for copyright and license fields
license = llist[-2:-1]
- if license != [" :license: BSD, see LICENSE for details.\n"]:
+ if license != [" :license: BSD, see LICENSE for details."]:
yield 0, "no correct license info"
ci = -3
@@ -176,16 +159,19 @@ def main(argv):
for root, dirs, files in os.walk(path):
if '.hg' in dirs:
+ if 'examplefiles' in dirs:
+ dirs.remove('examplefiles')
if '-i' in opts and abspath(root) in opts['-i']:
del dirs[:]
# XXX: awkward: for the Makefile call: don't check non-package
# files for file headers
- in_pocoo_pkg = root.startswith('./pygments')
+ in_pygments_pkg = root.startswith('./pygments')
for fn in files:
fn = join(root, fn)
- if fn[:2] == './': fn = fn[2:]
+ if fn[:2] == './':
+ fn = fn[2:]
if '-i' in opts and abspath(fn) in opts['-i']:
@@ -199,15 +185,14 @@ def main(argv):
print("Checking %s..." % fn)
- f = open(fn, 'r')
- lines = list(f)
+ lines = open(fn, 'rb').read().decode('utf-8').splitlines()
except (IOError, OSError) as err:
print("%s: cannot open: %s" % (fn, err))
num += 1
for checker in checkerlist:
- if not in_pocoo_pkg and checker.only_pkg:
+ if not in_pygments_pkg and checker.only_pkg:
for lno, msg in checker(fn, lines):
print(u"%s:%d: %s" % (fn, lno, msg), file=out)
diff --git a/scripts/ b/scripts/
new file mode 100755
index 00000000..dfc28ce2
--- /dev/null
+++ b/scripts/
@@ -0,0 +1,233 @@
+# -*- coding: utf-8 -*-
+ Lexing error finder
+ ~~~~~~~~~~~~~~~~~~~
+ For the source files given on the command line, display
+ the text where Error tokens are being generated, along
+ with some context.
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+from __future__ import print_function
+import os
+import sys
+# always prefer Pygments from source if exists
+srcpath = os.path.join(os.path.dirname(__file__), '..')
+if os.path.isdir(os.path.join(srcpath, 'pygments')):
+ sys.path.insert(0, srcpath)
+from pygments.lexer import RegexLexer, ProfilingRegexLexer, ProfilingRegexLexerMeta
+from pygments.lexers import get_lexer_by_name, find_lexer_class, \
+ find_lexer_class_for_filename
+from pygments.token import Error, Text, _TokenType
+from pygments.cmdline import _parse_options
+class DebuggingRegexLexer(RegexLexer):
+ """Make the state stack, position and current match instance attributes."""
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ """
+ Split ``text`` into (tokentype, text) pairs.
+ ``stack`` is the inital stack (default: ``['root']``)
+ """
+ self.pos = 0
+ tokendefs = self._tokens
+ self.statestack = list(stack)
+ statetokens = tokendefs[self.statestack[-1]]
+ while 1:
+ for rexmatch, action, new_state in statetokens:
+ self.m = m = rexmatch(text, self.pos)
+ if m:
+ if action is not None:
+ if type(action) is _TokenType:
+ yield self.pos, action,
+ else:
+ for item in action(self, m):
+ yield item
+ self.pos = m.end()
+ if new_state is not None:
+ # state transition
+ if isinstance(new_state, tuple):
+ for state in new_state:
+ if state == '#pop':
+ self.statestack.pop()
+ elif state == '#push':
+ self.statestack.append(self.statestack[-1])
+ else:
+ self.statestack.append(state)
+ elif isinstance(new_state, int):
+ # pop
+ del self.statestack[new_state:]
+ elif new_state == '#push':
+ self.statestack.append(self.statestack[-1])
+ else:
+ assert False, 'wrong state def: %r' % new_state
+ statetokens = tokendefs[self.statestack[-1]]
+ break
+ else:
+ try:
+ if text[self.pos] == '\n':
+ # at EOL, reset state to 'root'
+ self.pos += 1
+ self.statestack = ['root']
+ statetokens = tokendefs['root']
+ yield self.pos, Text, u'\n'
+ continue
+ yield self.pos, Error, text[self.pos]
+ self.pos += 1
+ except IndexError:
+ break
+def main(fn, lexer=None, options={}):
+ if lexer is not None:
+ lxcls = get_lexer_by_name(lexer).__class__
+ else:
+ lxcls = find_lexer_class_for_filename(os.path.basename(fn))
+ if lxcls is None:
+ name, rest = fn.split('_', 1)
+ lxcls = find_lexer_class(name)
+ if lxcls is None:
+ raise AssertionError('no lexer found for file %r' % fn)
+ debug_lexer = False
+ if profile:
+ # does not work for e.g. ExtendedRegexLexers
+ if lxcls.__bases__ == (RegexLexer,):
+ # yes we can! (change the metaclass)
+ lxcls.__class__ = ProfilingRegexLexerMeta
+ lxcls.__bases__ = (ProfilingRegexLexer,)
+ lxcls._prof_sort_index = profsort
+ else:
+ if lxcls.__bases__ == (RegexLexer,):
+ lxcls.__bases__ = (DebuggingRegexLexer,)
+ debug_lexer = True
+ elif lxcls.__bases__ == (DebuggingRegexLexer,):
+ # already debugged before
+ debug_lexer = True
+ else:
+ # HACK: ExtendedRegexLexer subclasses will only partially work here.
+ lxcls.__bases__ = (DebuggingRegexLexer,)
+ debug_lexer = True
+ lx = lxcls(**options)
+ lno = 1
+ if fn == '-':
+ text =
+ else:
+ with open(fn, 'rb') as fp:
+ text ='utf-8')
+ text = text.strip('\n') + '\n'
+ tokens = []
+ states = []
+ def show_token(tok, state):
+ reprs = list(map(repr, tok))
+ print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ')
+ if debug_lexer:
+ print(' ' + ' ' * (29-len(reprs[0])) + repr(state), end=' ')
+ print()
+ for type, val in lx.get_tokens(text):
+ lno += val.count('\n')
+ if type == Error:
+ print('Error parsing', fn, 'on line', lno)
+ print('Previous tokens' + (debug_lexer and ' and states' or '') + ':')
+ if showall:
+ for tok, state in map(None, tokens, states):
+ show_token(tok, state)
+ else:
+ for i in range(max(len(tokens) - num, 0), len(tokens)):
+ if debug_lexer:
+ show_token(tokens[i], states[i])
+ else:
+ show_token(tokens[i], None)
+ print('Error token:')
+ l = len(repr(val))
+ print(' ' + repr(val), end=' ')
+ if debug_lexer and hasattr(lx, 'statestack'):
+ print(' ' * (60-l) + repr(lx.statestack), end=' ')
+ print()
+ print()
+ return 1
+ tokens.append((type, val))
+ if debug_lexer:
+ if hasattr(lx, 'statestack'):
+ states.append(lx.statestack[:])
+ else:
+ states.append(None)
+ if showall:
+ for tok, state in zip(tokens, states):
+ show_token(tok, state)
+ return 0
+def print_help():
+ print('''\
+Pygments development helper to quickly debug lexers.
+ scripts/ [options] file ...
+Give one or more filenames to lex them and display possible error tokens
+and/or profiling info. Files are assumed to be encoded in UTF-8.
+Selecting lexer and options:
+ -l NAME use lexer named NAME (default is to guess from
+ the given filenames)
+ -O OPTIONSTR use lexer options parsed from OPTIONSTR
+Debugging lexing errors:
+ -n N show the last N tokens on error
+ -a always show all lexed tokens (default is only
+ to show them when an error occurs)
+ -p use the ProfilingRegexLexer to profile regexes
+ instead of the debugging lexer
+ -s N sort profiling output by column N (default is
+ column 4, the time per call)
+num = 10
+showall = False
+lexer = None
+options = {}
+profile = False
+profsort = 4
+if __name__ == '__main__':
+ import getopt
+ opts, args = getopt.getopt(sys.argv[1:], 'n:l:apO:s:h')
+ for opt, val in opts:
+ if opt == '-n':
+ num = int(val)
+ elif opt == '-a':
+ showall = True
+ elif opt == '-l':
+ lexer = val
+ elif opt == '-p':
+ profile = True
+ elif opt == '-s':
+ profsort = int(val)
+ elif opt == '-O':
+ options = _parse_options([val])
+ elif opt == '-h':
+ print_help()
+ sys.exit(0)
+ ret = 0
+ if not args:
+ print_help()
+ for f in args:
+ ret += main(f, lexer, options)
+ sys.exit(bool(ret))
diff --git a/scripts/ b/scripts/
index f8204e6e..5063f61f 100755
--- a/scripts/
+++ b/scripts/
@@ -39,12 +39,12 @@ def escape_html(text):
def process_file(store, filename):
- f = open(filename, 'r')
+ fp = open(filename, 'r')
except (IOError, OSError):
return False
llmatch = 0
- try:
- for lno, line in enumerate(f):
+ with fp:
+ for lno, line in enumerate(fp):
# just some random heuristics to filter out binary files
if lno < 100 and
return False
@@ -69,8 +69,6 @@ def process_file(store, filename):
llmatch = 0
return True
- finally:
- f.close()
def main():
@@ -198,13 +196,13 @@ td { padding: 2px 5px 2px 5px;
'<td class="tag %%(tag)s">%%(tag)s</td>'
'<td class="who">%%(who)s</td><td class="what">%%(what)s</td></tr>')
- f = open(output, 'w')
table = '\n'.join(TABLE % fname +
'\n'.join(TR % (no % 2,) % entry
for no, entry in enumerate(store[fname]))
for fname in sorted(store))
- f.write(HTML % (', '.join(map(abspath, args)), table))
- f.close()
+ with open(output, 'w') as fp:
+ fp.write(HTML % (', '.join(map(abspath, args)), table))
print("Report written to %s." % output)
return 0
diff --git a/scripts/ b/scripts/
index 7aaa9bee..ba0b76f1 100755..120000
--- a/scripts/
+++ b/scripts/
@@ -1,173 +1 @@
-# -*- coding: utf-8 -*-
- Lexing error finder
- ~~~~~~~~~~~~~~~~~~~
- For the source files given on the command line, display
- the text where Error tokens are being generated, along
- with some context.
- :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
-from __future__ import print_function
-import os
-import sys
-# always prefer Pygments from source if exists
-srcpath = os.path.join(os.path.dirname(__file__), '..')
-if os.path.isdir(os.path.join(srcpath, 'pygments')):
- sys.path.insert(0, srcpath)
-from pygments.lexer import RegexLexer
-from pygments.lexers import get_lexer_for_filename, get_lexer_by_name
-from pygments.token import Error, Text, _TokenType
-from pygments.cmdline import _parse_options
-class DebuggingRegexLexer(RegexLexer):
- """Make the state stack, position and current match instance attributes."""
- def get_tokens_unprocessed(self, text, stack=('root',)):
- """
- Split ``text`` into (tokentype, text) pairs.
- ``stack`` is the inital stack (default: ``['root']``)
- """
- self.pos = 0
- tokendefs = self._tokens
- self.statestack = list(stack)
- statetokens = tokendefs[self.statestack[-1]]
- while 1:
- for rexmatch, action, new_state in statetokens:
- self.m = m = rexmatch(text, self.pos)
- if m:
- if type(action) is _TokenType:
- yield self.pos, action,
- else:
- for item in action(self, m):
- yield item
- self.pos = m.end()
- if new_state is not None:
- # state transition
- if isinstance(new_state, tuple):
- for state in new_state:
- if state == '#pop':
- self.statestack.pop()
- elif state == '#push':
- self.statestack.append(self.statestack[-1])
- else:
- self.statestack.append(state)
- elif isinstance(new_state, int):
- # pop
- del self.statestack[new_state:]
- elif new_state == '#push':
- self.statestack.append(self.statestack[-1])
- else:
- assert False, 'wrong state def: %r' % new_state
- statetokens = tokendefs[self.statestack[-1]]
- break
- else:
- try:
- if text[self.pos] == '\n':
- # at EOL, reset state to 'root'
- self.pos += 1
- self.statestack = ['root']
- statetokens = tokendefs['root']
- yield self.pos, Text, u'\n'
- continue
- yield self.pos, Error, text[self.pos]
- self.pos += 1
- except IndexError:
- break
-def main(fn, lexer=None, options={}):
- if lexer is not None:
- lx = get_lexer_by_name(lexer)
- else:
- try:
- lx = get_lexer_for_filename(os.path.basename(fn), **options)
- except ValueError:
- try:
- name, rest = fn.split('_', 1)
- lx = get_lexer_by_name(name, **options)
- except ValueError:
- raise AssertionError('no lexer found for file %r' % fn)
- debug_lexer = False
- # does not work for e.g. ExtendedRegexLexers
- if lx.__class__.__bases__ == (RegexLexer,):
- lx.__class__.__bases__ = (DebuggingRegexLexer,)
- debug_lexer = True
- elif lx.__class__.__bases__ == (DebuggingRegexLexer,):
- # already debugged before
- debug_lexer = True
- lno = 1
- text = open(fn, 'U').read()
- text = text.strip('\n') + '\n'
- tokens = []
- states = []
- def show_token(tok, state):
- reprs = map(repr, tok)
- print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ')
- if debug_lexer:
- print(' ' + ' ' * (29-len(reprs[0])) + repr(state), end=' ')
- print()
- for type, val in lx.get_tokens(text):
- lno += val.count('\n')
- if type == Error:
- print('Error parsing', fn, 'on line', lno)
- print('Previous tokens' + (debug_lexer and ' and states' or '') + ':')
- if showall:
- for tok, state in map(None, tokens, states):
- show_token(tok, state)
- else:
- for i in range(max(len(tokens) - num, 0), len(tokens)):
- show_token(tokens[i], states[i])
- print('Error token:')
- l = len(repr(val))
- print(' ' + repr(val), end=' ')
- if debug_lexer and hasattr(lx, 'statestack'):
- print(' ' * (60-l) + repr(lx.statestack), end=' ')
- print()
- print()
- return 1
- tokens.append((type, val))
- if debug_lexer:
- if hasattr(lx, 'statestack'):
- states.append(lx.statestack[:])
- else:
- states.append(None)
- if showall:
- for tok, state in map(None, tokens, states):
- show_token(tok, state)
- return 0
-num = 10
-showall = False
-lexer = None
-options = {}
-if __name__ == '__main__':
- import getopt
- opts, args = getopt.getopt(sys.argv[1:], 'n:l:aO:')
- for opt, val in opts:
- if opt == '-n':
- num = int(val)
- elif opt == '-a':
- showall = True
- elif opt == '-l':
- lexer = val
- elif opt == '-O':
- options = _parse_options([val])
- ret = 0
- for f in args:
- ret += main(f, lexer, options)
- sys.exit(bool(ret)) \ No newline at end of file
diff --git a/scripts/ b/scripts/
index 4ea302f4..fc4d5ec6 100644
--- a/scripts/
+++ b/scripts/
@@ -1,13 +1,42 @@
from __future__ import print_function
import re
+from pygments.util import format_lines
r_line = re.compile(r"^(syn keyword vimCommand contained|syn keyword vimOption "
r"contained|syn keyword vimAutoEvent contained)\s+(.*)")
r_item = re.compile(r"(\w+)(?:\[(\w+)\])?")
+HEADER = '''\
+# -*- coding: utf-8 -*-
+ pygments.lexers._vim_builtins
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ This file is autogenerated by scripts/
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+# Split up in multiple functions so it's importable by jython, which has a
+# per-method size limit.
+METHOD = '''\
+def _get%(key)s():
+ return var
+%(key)s = _get%(key)s()
def getkw(input, output):
out = file(output, 'w')
+ # Copy template from an existing file.
+ print(HEADER, file=out)
output_info = {'command': [], 'option': [], 'auto': []}
for line in file(input):
m = r_line.match(line)
@@ -29,9 +58,10 @@ def getkw(input, output):
- for a, b in output_info.items():
- b.sort()
- print('%s=[%s]' % (a, ','.join(b)), file=out)
+ for key, keywordlist in output_info.items():
+ keywordlist.sort()
+ body = format_lines('var', keywordlist, raw=True, indent_level=1)
+ print(METHOD % locals(), file=out)
def is_keyword(w, keywords):
for i in range(len(w), 0, -1):
@@ -40,4 +70,5 @@ def is_keyword(w, keywords):
return False
if __name__ == "__main__":
- getkw("/usr/share/vim/vim73/syntax/vim.vim", "")
+ getkw("/usr/share/vim/vim74/syntax/vim.vim",
+ "pygments/lexers/")